Files
rocm-systems/bin/hipify
T
2016-09-05 11:18:48 +03:00

732 řádky
27 KiB
Perl
Spustitelný soubor

#!/usr/bin/perl -w
##
# Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
##
#usage hipify [OPTIONS] INPUT_FILE
use Getopt::Long;
my $warn_whitelist ="";
GetOptions(
"print-stats" => \$print_stats # print the command-line, like a header.
, "count-conversions" => \$count_conversions # count conversions.
, "quiet-warnings" => \$quiet_warnings # don't print warnings on unknown CUDA functions.
, "warn-whitelist=s"=> \$warn_whitelist
, "no-translate-builtins" => \$no_translate_builtins # don't translate math functions.
, "no-translate-textures" => \$no_translate_textures
, "no-output" => \$no_output # don't write any translated output to stdout.
, "inplace" => \$inplace # modify input file inplace, replacing input with hipified output, save backup in ".prehip" file.
# If .prehip file exists, use that as input to hip.
, "n" => \$n # combination of print_stats + no-output.
);
$print_stats = 1 if $n;
$no_output = 1 if $n;
# These uses of cuda[A-Z] are commonly used in CUDA code but don't actually map to any CUDA API:
# TODO - use a hash lookup for these.
@warn_whitelist = (
"cudaError"
,"cudaStatus"
,"cudaDevice"
,"cudaDevice_t"
,"cudaIDs"
,"cudaGridDim"
,"cudaDimGrid"
,"cudaDimBlock"
,"cudaDeviceId"
,"cudaDevices",
,"cudaGradOutput",
,"cudaInput",
,"cudaOutput",
,"cudaGradInput",
,"cudaIndices",
,"cudaColorSpinorField"
,"cudaGaugeField"
,"cudaMom"
,"cudaGauge"
,"cudaInGauge"
,"cudaGaugeField"
,"cudaColorSpinorField"
,"cudaSiteLink"
,"cudaFatLink"
,"cudaStaple"
,"cudaCloverField"
,"cudaFatLink"
,"cudaParam"
);
#print "WW=@warn_whitelist\n";
# Allow users to add their own functions.
push (@warn_whitelist, split(',',$warn_whitelist));
#---
#Stats tracking code:
@statNames = ("dev", "mem", "kern", 'coord_func', "math_func", "special_func", "stream", "event", "err", "def", "tex", "extern_shared", "other");
#---
#Compute total of all individual counts:
sub totalStats
{
my %count = %{ shift() };
my $total = 0;
foreach $key (keys %count) {
$total += $count{$key};
}
return $total;
};
#---
sub printStats
{
my $label = shift();
my @statNames = @{ shift() };
my %counts = %{ shift() };
my $warnings = shift();
my $loc = shift();
my $total = totalStats(\%counts);
printf STDERR "%s %d CUDA->HIP refs( ", $label, $total;
foreach $stat (@statNames) {
printf STDERR "%s:%d ", $stat, $counts{$stat};
}
printf STDERR ") warn:%d LOC:%d", $warnings, $loc;
}
#---
# Add adder stats to dest. Used to add stats for current file to a running total for all files:
sub addStats
{
my $dest_ref = shift() ;
my %adder = %{ shift() };
foreach $key (keys %adder) {
$dest_ref->{$key} += $adder{$key};
#printf ("D{$key} += %d => %d\n", $adder{$key}, $dest{$key});
}
}
#---
sub clearStats
{
my $dest_ref = shift() ;
my @statNames = @{ shift() };
foreach $stat (@statNames) {
$dest_ref->{$stat} = 0;
}
}
# count of transforms in all files:
my %tt;
clearStats(\%tt, \@statNames);
$Twarnings = 0;
$TlineCount = 0;
my %TwarningTags ;
my %Tkernels ;
my $fileCount = @ARGV;
my $fileName = "";
while (@ARGV) {
$fileName=shift (@ARGV);
if ($inplace) {
my $file_prehip = "$fileName" . ".prehip";
my $infile;
my $outfile;
if (-e $file_prehip) {
$infile = $file_prehip;
$outfile = $fileName;
} else {
system ("cp $fileName $file_prehip");
$infile = $file_prehip;
$outfile = $fileName;
}
open(INFILE,"<", $infile) or die "error: could not open $infile";
open(OUTFILE,">", $outfile) or die "error: could not open $outfile";
$OUTFILE = OUTFILE;
} else {
open(INFILE,"<", $fileName) or die "error: could not open $fileName";
$OUTFILE = STDOUT;
}
# Note : \b is used in perl to indicate the start of a word - typically that is what we want in this case:
#
# count of transforms in this file, init to 0 here:
my %ft;
clearStats(\%ft, \@statNames);
my $countIncludes = 0;
my $countKeywords = 0; # keywords like __global__, __shared__ - not converted by hipify but counted here.
my $warnings = 0;
my $warningsCublas = 0;
my $warningsCurand = 0;
my %warningTags; # hash with counts of particular unknown keywords.
my $lineCount = 0;
undef $/; # Read whole file at once, so we can match newlines.
while (<INFILE>)
{
#--------
# Compiler Defines
# __CUDACC__ is set by NVCC to indicate it is treating the input file as CUDA code (as opposed to host)
# Typically we want any code treated as CUDA code to be treated as accelerator code by Kalmar too
# __HIPCC__ will set KALMARCC
$ft{'def'} += s/\b__CUDACC__\b/__HIPCC__/g;
# __CUDA_ARCH is often used to detect when a function or kernel is being compiled for the device.
# Don't automaticall convert this - likely these will need special attention with HIP_ARCH_HAS_* macros
#$ft{'def'} += s/\b__CUDA_ARCH__\b/__HIP_ARCH__/g;
#--------
#Includes:
$countIncludes += s/(\s*#\s*include\s+)[<"]cuda_runtime\.h[>"]/$1<hip\/hip_runtime.h>/;
$countIncludes += s/(\s*#\s*include\s+)[<"]cuda_runtime_api\.h[>"]/$1<hip\/hip_runtime_api.h>/;
#--------
# Error codes and return types:
$ft{'err'} += s/\bcudaError_t\b/hipError_t/g;
$ft{'err'} += s/\bcudaError\b/hipError_t/g;
$ft{'err'} += s/\bcudaSuccess\b/hipSuccess/g;
$ft{'err'} += s/\bcudaErrorUnknown\b/hipErrorUnknown/g;
$ft{'err'} += s/\bcudaErrorMemoryAllocation\b/hipErrorMemoryAllocation/g;
$ft{'err'} += s/\bcudaErrorMemoryFree\b/hipErrorMemoryFree/g;
$ft{'err'} += s/\bcudaErrorUnknownSymbol\b/hipErrorUnknownSymbol/g;
$ft{'err'} += s/\bcudaErrorOutOfResources\b/hipErrorOutOfResources/g;
$ft{'err'} += s/\bcudaErrorInvalidValue\b/hipErrorInvalidValue/g;
$ft{'err'} += s/\bcudaErrorInvalidResourceHandle\b/hipErrorInvalidResourceHandle/g;
$ft{'err'} += s/\bcudaErrorInvalidDevice\b/hipErrorInvalidDevice/g;
$ft{'err'} += s/\bcudaErrorNoDevice\b/hipErrorNoDevice/g;
$ft{'err'} += s/\bcudaErrorNotReady\b/hipErrorNotReady/g;
$ft{'err'} += s/\bcudaErrorUnknown\b/hipErrorUnknown/g;
# error APIs:
$ft{'err'} += s/\bcudaGetLastError\b/hipGetLastError/g;
$ft{'err'} += s/\bcudaPeekAtLastError\b/hipPeekAtLastError/g;
$ft{'err'} += s/\bcudaGetErrorName\b/hipGetErrorName/g;
$ft{'err'} += s/\bcudaGetErrorString\b/hipGetErrorString/g;
#--------
# Memcpy
$ft{'mem'} += s/\bcudaMemcpy\b/hipMemcpy/g;
$ft{'mem'} += s/\bcudaMemcpyHostToHost\b/hipMemcpyHostToHost/g;
$ft{'mem'} += s/\bcudaMemcpyHostToDevice\b/hipMemcpyHostToDevice/g;
$ft{'mem'} += s/\bcudaMemcpyDeviceToHost\b/hipMemcpyDeviceToHost/g;
$ft{'mem'} += s/\bcudaMemcpyDeviceToDevice\b/hipMemcpyDeviceToDevice/g;
$ft{'mem'} += s/\bcudaMemcpyDefault\b/hipMemcpyDefault/g;
$ft{'mem'} += s/\bcudaMemcpyToSymbol\b/hipMemcpyToSymbol/g;
$ft{'mem'} += s/\bcudaMemset\b/hipMemset/g;
$ft{'mem'} += s/\bcudaMemsetAsync\b/hipMemsetAsync/g;
$ft{'mem'} += s/\bcudaMemcpyAsync\b/hipMemcpyAsync/g;
$ft{'mem'} += s/\bcudaMemGetInfo\b/hipMemGetInfo/g;
$ft{'mem'} += s/\bcudaMemcpyKind\b/hipMemcpyKind/g;
$ft{'mem'} += s/\bcudaPointerAttributes\b/hipPointerAttribute_t/g;
$ft{'mem'} += s/\bcudaMemcpy2D\b/hipMemcpy2D/g;
$ft{'mem'} += s/\bcudaMemcpy2DToArray\b/hipMemcpy2DToArray/g;
#--------
# Memory management:
$ft{'mem'} += s/\bcudaMalloc\b/hipMalloc/g;
$ft{'mem'} += s/\bcudaMallocHost\b/hipHostMalloc/g; # note conversion to standard hipHost* naming convention
$ft{'mem'} += s/\bcudaFree\b/hipFree/g;
$ft{'mem'} += s/\bcudaFreeHost\b/hipHostFree/g; # note conversion to standard hipHost* naming convention
$ft{'mem'} += s/\bcudaHostAlloc\b/hipHostAlloc/g;
$ft{'mem'} += s/\bcudaHostGetDevicePointer\b/hipHostGetDevicePointer/g;
$ft{'mem'} += s/\bcudaHostAllocDefault\b/hipHostMallocDefault/g;
$ft{'mem'} += s/\bcudaHostAllocPortable\b/hipHostMallocPortable/g;
$ft{'mem'} += s/\bcudaHostAllocMapped\b/hipHostMallocMapped/g;
$ft{'mem'} += s/\bcudaHostAllocWriteCombined\b/hipHostMallocWriteCombined/g;
$ft{'mem'} += s/\bcudaMallocArray\b/hipMallocArray/g;
$ft{'mem'} += s/\bcudaMallocPitch\b/hipMallocPitch/g;
#--------
# Coordinate Indexing and Dimensions:
$ft{'coord_func'} += s/\bthreadIdx\.x\b/hipThreadIdx_x/g;
$ft{'coord_func'} += s/\bthreadIdx\.y\b/hipThreadIdx_y/g;
$ft{'coord_func'} += s/\bthreadIdx\.z\b/hipThreadIdx_z/g;
$ft{'coord_func'} += s/\bblockIdx\.x\b/hipBlockIdx_x/g;
$ft{'coord_func'} += s/\bblockIdx\.y\b/hipBlockIdx_y/g;
$ft{'coord_func'} += s/\bblockIdx\.z\b/hipBlockIdx_z/g;
$ft{'coord_func'} += s/\bblockDim\.x\b/hipBlockDim_x/g;
$ft{'coord_func'} += s/\bblockDim\.y\b/hipBlockDim_y/g;
$ft{'coord_func'} += s/\bblockDim\.z\b/hipBlockDim_z/g;
$ft{'coord_func'} += s/\bgridDim\.x\b/hipGridDim_x/g;
$ft{'coord_func'} += s/\bgridDim\.y\b/hipGridDim_y/g;
$ft{'coord_func'} += s/\bgridDim\.z\b/hipGridDim_z/g;
# hack to avoid replacing hipDeviceProp.warpSize call
$ft{'special_func'} += s/([^.])\bwarpSize\b/$1hipWarpSize/g;
#--------
# Events
$ft{'event'} += s/\bcudaEvent_t\b/hipEvent_t/g;
$ft{'event'} += s/\bcudaEventCreate\b/hipEventCreate/g;
$ft{'event'} += s/\bcudaEventCreateWithFlags\b/hipEventCreateWithFlags/g;
$ft{'event'} += s/\bcudaEventDestroy\b/hipEventDestroy/g;
$ft{'event'} += s/\bcudaEventRecord\b/hipEventRecord/g;
$ft{'event'} += s/\bcudaEventElapsedTime\b/hipEventElapsedTime/g;
$ft{'event'} += s/\bcudaEventSynchronize\b/hipEventSynchronize/g;
#--------
# Streams
$ft{'stream'} += s/\bcudaStream_t\b/hipStream_t/g;
$ft{'stream'} += s/\bcudaStreamCreate\b/hipStreamCreate/g;
$ft{'stream'} += s/\bcudaStreamCreateWithFlags\b/hipStreamCreateWithFlags/g;
$ft{'stream'} += s/\bcudaStreamDestroy\b/hipStreamDestroy/g;
$ft{'stream'} += s/\bcudaStreamWaitEvent\b/hipStreamWaitEvent/g;
$ft{'stream'} += s/\bcudaStreamSynchronize\b/hipStreamSynchronize/g;
$ft{'stream'} += s/\bcudaStreamDefault\b/hipStreamDefault/g;
$ft{'stream'} += s/\bcudaStreamNonBlocking\b/hipStreamNonBlocking/g;
#--------
# Other synchronization
$ft{'dev'} += s/\bcudaDeviceSynchronize\b/hipDeviceSynchronize/g;
$ft{'dev'} += s/\bcudaThreadSynchronize\b/hipDeviceSynchronize/g; # translate deprecated cudaThreadSynchronize
$ft{'dev'} += s/\bcudaDeviceReset\b/hipDeviceReset/g;
$ft{'dev'} += s/\bcudaThreadExit\b/hipDeviceReset/g; # translate deprecated cudaThreadExit
$ft{'dev'} += s/\bcudaSetDevice\b/hipSetDevice/g;
$ft{'dev'} += s/\bcudaGetDevice\b/hipGetDevice/g;
#--------
# Device
$ft{'dev'} += s/\bcudaDeviceProp\b/hipDeviceProp_t/g;
$ft{'dev'} += s/\bcudaGetDeviceProperties\b/hipGetDeviceProperties/g;
# Attribute
$ft{'err'} += s/\bcudaDevAttrMaxThreadsPerBlock\b/hipDeviceAttributeMaxThreadsPerBlock/g;
$ft{'err'} += s/\bcudaDevAttrMaxBlockDimX\b/hipDeviceAttributeMaxBlockDimX/g;
$ft{'err'} += s/\bcudaDevAttrMaxBlockDimY\b/hipDeviceAttributeMaxBlockDimY/g;
$ft{'err'} += s/\bcudaDevAttrMaxBlockDimZ\b/hipDeviceAttributeMaxBlockDimZ/g;
$ft{'err'} += s/\bcudaDevAttrMaxGridDimX\b/hipDeviceAttributeMaxGridDimX/g;
$ft{'err'} += s/\bcudaDevAttrMaxGridDimY\b/hipDeviceAttributeMaxGridDimY/g;
$ft{'err'} += s/\bcudaDevAttrMaxGridDimZ\b/hipDeviceAttributeMaxGridDimZ/g;
$ft{'err'} += s/\bcudaDevAttrMaxSharedMemoryPerBlock\b/hipDeviceAttributeMaxSharedMemoryPerBlock/g;
$ft{'err'} += s/\bcudaDevAttrTotalConstantMemory\b/hipDeviceAttributeTotalConstantMemory/g;
$ft{'err'} += s/\bcudaDevAttrWarpSize\b/hipDeviceAttributeWarpSize/g;
$ft{'err'} += s/\bcudaDevAttrMaxRegistersPerBlock\b/hipDeviceAttributeMaxRegistersPerBlock/g;
$ft{'err'} += s/\bcudaDevAttrClockRate\b/hipDeviceAttributeClockRate/g;
$ft{'err'} += s/\bcudaDevAttrMultiProcessorCount\b/hipDeviceAttributeMultiprocessorCount/g;
$ft{'err'} += s/\bcudaDevAttrComputeMode\b/hipDeviceAttributeComputeMode/g;
$ft{'err'} += s/\bcudaDevAttrL2CacheSize\b/hipDeviceAttributeL2CacheSize/g;
$ft{'err'} += s/\bcudaDevAttrMaxThreadsPerMultiProcessor\b/hipDeviceAttributeMaxThreadsPerMultiProcessor/g;
$ft{'err'} += s/\bcudaDevAttrComputeCapabilityMajor\b/hipDeviceAttributeComputeCapabilityMajor/g;
$ft{'err'} += s/\bcudaDevAttrComputeCapabilityMinor\b/hipDeviceAttributeComputeCapabilityMinor/g;
$ft{'err'} += s/\bcudaDevAttrConcurrentKernels\b/hipDeviceAttributeConcurrentKernels/g;
$ft{'err'} += s/\bcudaDevAttrPciBusId\b/hipDeviceAttributePciBusId/g;
$ft{'err'} += s/\bcudaDevAttrPciDeviceId\b/hipDeviceAttributePciDeviceId/g;
$ft{'err'} += s/\bcudaDevAttrMaxSharedMemoryPerMultiprocessor\b/hipDeviceAttributeMaxSharedMemoryPerMultiprocessor/g;
$ft{'err'} += s/\bcudaDevAttrMemoryClockRate\b/hipDeviceAttributeMemoryClockRate/g;
$ft{'err'} += s/\bcudaDevAttrGlobalMemoryBusWidth\b/hipDeviceAttributeMemoryBusWidth/g;
$ft{'dev'} += s/\bcudaDeviceAttr\b/hipDeviceAttribute_t/g;
$ft{'dev'} += s/\bcudaDeviceGetAttribute\b/hipDeviceGetAttribute/g;
# Cache config
$ft{'dev'} += s/\bcudaDeviceSetCacheConfig\b/hipDeviceSetCacheConfig/g;
$ft{'dev'} += s/\bcudaThreadSetCacheConfig\b/hipDeviceSetCacheConfig/g; # translate deprecated
$ft{'dev'} += s/\bcudaDeviceGetCacheConfig\b/hipDeviceGetCacheConfig/g;
$ft{'dev'} += s/\bcudaThreadGetCacheConfig\b/hipDeviceGetCacheConfig/g; # translate deprecated
$ft{'dev'} += s/\bcudaFuncCache\b/hipFuncCache/g;
$ft{'dev'} += s/\bcudaFuncCachePreferNone\b/hipFuncCachePreferNone/g;
$ft{'dev'} += s/\bcudaFuncCachePreferShared\b/hipFuncCachePreferShared/g;
$ft{'dev'} += s/\bcudaFuncCachePreferL1\b/hipFuncCachePreferL1/g;
$ft{'dev'} += s/\bcudaFuncCachePreferEqual\b/hipFuncCachePreferEqual/g;
# function
$ft{'dev'} += s/\bcudaFuncSetCacheConfig\b/hipFuncSetCacheConfig/g;
$ft{'dev'} += s/\bcudaDriverGetVersion\b/hipDriverGetVersion/g;
#--------
# Peer2Peer
$ft{'dev'} += s/\bcudaDeviceCanAccessPeer\b/hipDeviceCanAccessPeer/g;
$ft{'dev'} += s/\bcudaDeviceDisablePeerAccess\b/hipDeviceDisablePeerAccess/g;
$ft{'dev'} += s/\bcudaDeviceEnablePeerAccess\b/hipDeviceEnablePeerAccess/g;
$ft{'mem'} += s/\bcudaMemcpyPeerAsync\b/hipMemcpyPeerAsync/g;
$ft{'mem'} += s/\bcudaMemcpyPeer\b/hipMemcpyPeer/g;
# Shared mem:
$ft{'dev'} += s/\bcudaDeviceSetSharedMemConfig\b/hipDeviceSetSharedMemConfig/g;
$ft{'dev'} += s/\bcudaThreadSetSharedMemConfig\b/hipDeviceSetSharedMemConfig/g; # translate deprecated
$ft{'dev'} += s/\bcudaDeviceGetSharedMemConfig\b/hipDeviceGetSharedMemConfig/g;
$ft{'dev'} += s/\bcudaThreadGetSharedMemConfig\b/hipDeviceGetSharedMemConfig/g; # translate deprecated
$ft{'dev'} += s/\bcudaSharedMemConfig\b/hipSharedMemConfig/g;
$ft{'dev'} += s/\bcudaSharedMemBankSizeDefault\b/hipSharedMemBankSizeDefault/g;
$ft{'dev'} += s/\bcudaSharedMemBankSizeFourByte\b/hipSharedMemBankSizeFourByte/g;
$ft{'dev'} += s/\bcudaSharedMemBankSizeEightByte\b/hipSharedMemBankSizeEightByte/g;
$ft{'dev'} += s/\bcudaGetDeviceCount\b/hipGetDeviceCount/g;
#--------
# Profiler
#$aOt += s/\bcudaProfilerInitialize\b/hipProfilerInitialize/g; // see if these are called anywhere.
$ft{'other'} += s/\bcudaProfilerStart\b/hipProfilerStart/g;
$ft{'other'} += s/\bcudaProfilerStop\b/hipProfilerStop/g;
$countKeywords += m/__global__/;
$countKeywords += m/__shared__/;
#--------
# CUDA extern __shared__ syntax
# Note these only work if declaration is on a single line.
{
# match uses ? for <.*> which will be unitialized if this is not present in launch syntax.
no warnings qw/uninitialized/;
my $k = 0;
# Match extern __shared__ type foo[]; syntax
# Replace as HIP_DYNAMIC_SHARED() macro
$k += s/extern\s+([\w\(\)]+)?\s*__shared__\s+([\w:<>\s]+)\s+(\w+)\s*\[\s*\]\s*;/HIP_DYNAMIC_SHARED($1 $2, $3)/g;
# test patterns for the regular expression above:
#'extern __shared__ double foo[];'
#'extern __shared__ unsigned int foo[];'
#'extern volatile __shared__ double foo[];'
#'extern volatile __shared__ unsigned int sdata[];'
#'extern __shared__ volatile unsigned int sdata[];'
#'extern __shared__ T s[];'
#'extern __shared__ T::type s[];'
#'extern __shared__ blah<T>::type s[];'
#'extern __shared__ typename mapper<Float>::type s_data[];'
#'extern __attribute__((used)) __shared__ typename mapper<Float>::type s_data[];'
$ft{'extern_shared'} += $k;
}
#--------
# CUDA Launch Syntax
# Note these only work if launch is on a single line.
# Handle the <<numBlocks, blockDim>>> syntax:
{
# match uses ? for <.*> which will be unitialized if this is not present in launch syntax.
no warnings qw/uninitialized/;
my $k = 0;
my $kernelName;
# Handle the <<numBlocks, blockDim, sharedSize, stream>>> syntax:
$k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>([\s*\\]*)\(/hipLaunchKernel(HIP_KERNEL_NAME($1$2), dim3($3), dim3($4), $5, $6, /g;
$kernelName = $1 if $k;
# Handle the <<numBlocks, blockDim, sharedSize>>> syntax:
$k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>([\s*\\]*)\(/hipLaunchKernel(HIP_KERNEL_NAME($1$2), dim3($3), dim3($4), $5, 0, /g;
$kernelName = $1 if $k;
# Handle the <<numBlocks, blockDim>>> syntax:
$k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>([\s\\]*)\(/hipLaunchKernel(HIP_KERNEL_NAME($1$2), dim3($3), dim3($4), 0, 0, /g;
$kernelName = $1 if $k;
$ft{'kern'} += $k;
if ($k) {
$Tkernels{$kernelName} ++;
}
}
unless ($no_translate_textures) {
$ft{'tex'} += s/\bcudaChannelFormatDesc\b/hipChannelFormatDesc/g;
$ft{'tex'} += s/\bcudaFilterModePoint\b/hipFilterModePoint/g;
$ft{'tex'} += s/\bcudaReadModeElementType\b/hipReadModeElementType/g;
$ft{'tex'} += s/\bcudaArray\b/hipArrary/g;
$ft{'tex'} += s/\bcudaCreateChannelDesc\b/hipCreateChannelDesc/g;
$ft{'tex'} += s/\bcudaBindTexture\b/hipBindTexture/g;
$ft{'tex'} += s/\bcudaBindTextureToArray\b/hipBindTextureToArray/g;
$ft{'tex'} += s/\bcudaUnbindTexture\b/hipUnbindTexture/g;
}
if ($count_conversions) {
while (/(\bhip[A-Z]\w+\b)/g) {
$convertedTags{$1}++;
#print STDERR "HIP: $1 : ", $translateTags{$1}, "\n";
}
}
# guess that we are in device code , or at least in a file that calls device code.
# will almost certainly call one of the coordiante functions - could be fooled by clever macros but usually works:
my $hasDeviceCode = $countKeywords + $ft{'coord_func'} + $ft{'math_func'} + $ft{'special_func'};
unless ($quiet_warnings) {
#print STDERR "Check WARNINGs\n";
# copy into array of lines, process line-by-line to show warnings:
if ($hasDeviceCode or (/\bcuda/) or (/<<<.*>>>/) or (/(\bcublas[A-Z]\w+)/) or (/(\bcurand[A-Z]\w+)/) ) {
my @lines = split /\n/, $_;
my $tmp = $_; # copies the whole file, could be a little smarter here...
my $line_num = 0;
foreach (@lines) {
$line_num ++;
# remove any whitelisted words:
foreach $w (@warn_whitelist) {
s/\b$w\b/ZAP/
}
my $tag ;
if ((/(\bcuda[A-Z]\w+)/) or (/<<<.*>>>/)) {
# flag any remaining code that look like cuda API calls, may want to add these to hipify
$tag = (defined $1) ? $1 : "Launch";
} elsif (/(\bcublas[A-Z]\w+)/) {
$warningsCublas++;
$tag = $1;
} elsif (/(\bcurand[A-Z]\w+)/) {
$warningsCurand++;
$tag = $1;
}
if (defined $tag) {
$warnings++;
$warningTags{$tag}++;
print STDERR " warning: $fileName:#$line_num : $_";
print STDERR "\n";
}
$s = warnUnsupportedSpecialFunctions($line_num);
$warnings += $s;
}
$_ = $tmp;
}
}
#--------
# Math libraries
# To limit bogus translations, try to make sure we are in a kernel (ft{'builtin'} != 0):
if (not $no_translate_builtins and ($hasDeviceCode > 0)) {
$ft{'special_func'} += countSupportedSpecialFunctions();
}
#--------
# Print it!
# TODO - would like to move this code outside loop but it uses $_ which contains the whole file.
unless ($no_output) {
my $apiCalls = $ft{'err'} + $ft{'event'} + $ft{'mem'} + $ft{'stream'} + $ft{'dev'} + $ft{'def'} + $ft{'tex'} + $ft{'other'} + $ft{'math_func'};
my $kernStuff = $hasDeviceCode + $ft{'kern'};
my $totalCalls = $apiCalls + $kernStuff;
$is_dos = m/\r\n$/;
if ($totalCalls and ($countIncludes == 0) and ($kernStuff != 0)) {
# If this file makes kernel builtin calls, and does not include the cuda_runtime.h,
# then add an #include to match "magic" includes provided by NVCC.
# This logic can miss cases where cuda_runtime.h is included by another include file.
print $OUTFILE '#include "hip/hip_runtime.h"' . ($is_dos ? "\r\n" : "\n");
}
print $OUTFILE "$_";
}
$lineCount = $_ =~ tr/\n//;
}
my $totalConverted = totalStats(\%ft);
#printf "TOTAL-CONV=%d\n", $totalConverted;
if (($totalConverted+$warnings) and $print_stats) {
printStats("info: converted", \@statNames, \%ft, $warnings, $lineCount);
print STDERR " in '$fileName'\n";
}
# Update totals for all files:
addStats(\%tt, \%ft);
$Twarnings += $warnings;
$TlineCount += $lineCount;
foreach $key (keys %warningTags) {
$TwarningTags{$key} += $warningTags{$key};
}
}
#-- Print total stats for all files processed:
if ($print_stats and ($fileCount > 1)) {
print STDERR "\n";
printStats("info: TOTAL-converted", \@statNames, \%tt, $Twarnings, $TlineCount);
print STDERR "\n";
foreach my $key (sort { $TwarningTags{$b} <=> $TwarningTags{$a} } keys %TwarningTags) {
printf STDERR " warning: unconverted %s : %d\n", $key, $TwarningTags{$key};
}
my $kernelCnt = keys %Tkernels;
printf STDERR " kernels (%d total) : ", $kernelCnt;
foreach my $key (sort { $Tkernels{$b} <=> $Tkernels{$a} } keys %Tkernels) {
printf STDERR " %s(%d)", $key, $Tkernels{$key};
}
print STDERR "\n";
print STDERR "\n";
}
if ($count_conversions) {
foreach my $key (sort { $convertedTags{$b} <=> $convertedTags{$a} } keys %convertedTags) {
printf STDERR " %s %d\n", $key, $convertedTags{$key};
}
}
sub countSupportedSpecialFunctions
{
my $m = 0;
#supported special functions:
foreach $func (
# Synchronization:
"__syncthreads",
)
{
# match math at the beginning of a word, but not if it already has a namespace qualifier ('::') :
$m += m/[:]?[:]?\b($func)\b(\w*\()/g;
}
return $m;
}
sub warnUnsupportedSpecialFunctions
{
my $line_num = shift;
my $m = 0;
foreach $func (
# memory fence:
"__threadfence_block",
"__threadfence",
"__threadfence_system",
# Synchronization:
"__syncthreads_count",
"__syncthreads_and",
"__syncthreads_or",
# Read-only cache function:
"__ldg",
# Cross-lane and warp-vote instructions:
#"__all",
#"__any",
#"__ballot",
#"__popc",
#"__clz",
#"__shfl",
#"__shfl_up",
#"__shfl_down",
#"__shfl_xor",
"__prof_trigger",
# too popular, and we can't tell if we are in device or host code.
#"assert",
#"printf",
#"malloc",
#"free",
#"memset",
#"memcpy"
)
{
# match math at the beginning of a word, but not if it already has a namespace qualifier ('::') :
my $mt = m/[:]?[:]?\b($func)\b(\w*\()/g;
if ($mt) {
$m += $mt;
print STDERR " warning: $fileName:#$line_num : unsupported device function : $_\n";
}
}
return $m;
}