732 řádky
27 KiB
Perl
Spustitelný soubor
732 řádky
27 KiB
Perl
Spustitelný soubor
#!/usr/bin/perl -w
|
|
##
|
|
# Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
# of this software and associated documentation files (the "Software"), to deal
|
|
# in the Software without restriction, including without limitation the rights
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
# copies of the Software, and to permit persons to whom the Software is
|
|
# furnished to do so, subject to the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in
|
|
# all copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
# THE SOFTWARE.
|
|
##
|
|
#usage hipify [OPTIONS] INPUT_FILE
|
|
use Getopt::Long;
|
|
|
|
my $warn_whitelist ="";
|
|
GetOptions(
|
|
"print-stats" => \$print_stats # print the command-line, like a header.
|
|
, "count-conversions" => \$count_conversions # count conversions.
|
|
, "quiet-warnings" => \$quiet_warnings # don't print warnings on unknown CUDA functions.
|
|
, "warn-whitelist=s"=> \$warn_whitelist
|
|
, "no-translate-builtins" => \$no_translate_builtins # don't translate math functions.
|
|
, "no-translate-textures" => \$no_translate_textures
|
|
, "no-output" => \$no_output # don't write any translated output to stdout.
|
|
, "inplace" => \$inplace # modify input file inplace, replacing input with hipified output, save backup in ".prehip" file.
|
|
# If .prehip file exists, use that as input to hip.
|
|
, "n" => \$n # combination of print_stats + no-output.
|
|
);
|
|
|
|
$print_stats = 1 if $n;
|
|
$no_output = 1 if $n;
|
|
|
|
# These uses of cuda[A-Z] are commonly used in CUDA code but don't actually map to any CUDA API:
|
|
# TODO - use a hash lookup for these.
|
|
@warn_whitelist = (
|
|
"cudaError"
|
|
,"cudaStatus"
|
|
,"cudaDevice"
|
|
,"cudaDevice_t"
|
|
|
|
,"cudaIDs"
|
|
,"cudaGridDim"
|
|
,"cudaDimGrid"
|
|
,"cudaDimBlock"
|
|
,"cudaDeviceId"
|
|
,"cudaDevices",
|
|
|
|
,"cudaGradOutput",
|
|
,"cudaInput",
|
|
,"cudaOutput",
|
|
,"cudaGradInput",
|
|
,"cudaIndices",
|
|
|
|
,"cudaColorSpinorField"
|
|
,"cudaGaugeField"
|
|
,"cudaMom"
|
|
,"cudaGauge"
|
|
,"cudaInGauge"
|
|
,"cudaGaugeField"
|
|
,"cudaColorSpinorField"
|
|
,"cudaSiteLink"
|
|
,"cudaFatLink"
|
|
,"cudaStaple"
|
|
,"cudaCloverField"
|
|
,"cudaFatLink"
|
|
,"cudaParam"
|
|
);
|
|
#print "WW=@warn_whitelist\n";
|
|
|
|
|
|
# Allow users to add their own functions.
|
|
push (@warn_whitelist, split(',',$warn_whitelist));
|
|
|
|
|
|
#---
|
|
#Stats tracking code:
|
|
@statNames = ("dev", "mem", "kern", 'coord_func', "math_func", "special_func", "stream", "event", "err", "def", "tex", "extern_shared", "other");
|
|
|
|
|
|
#---
|
|
#Compute total of all individual counts:
|
|
sub totalStats
|
|
{
|
|
my %count = %{ shift() };
|
|
|
|
my $total = 0;
|
|
foreach $key (keys %count) {
|
|
$total += $count{$key};
|
|
}
|
|
|
|
return $total;
|
|
};
|
|
|
|
#---
|
|
sub printStats
|
|
{
|
|
my $label = shift();
|
|
my @statNames = @{ shift() };
|
|
my %counts = %{ shift() };
|
|
my $warnings = shift();
|
|
my $loc = shift();
|
|
|
|
my $total = totalStats(\%counts);
|
|
|
|
printf STDERR "%s %d CUDA->HIP refs( ", $label, $total;
|
|
|
|
foreach $stat (@statNames) {
|
|
printf STDERR "%s:%d ", $stat, $counts{$stat};
|
|
}
|
|
|
|
printf STDERR ") warn:%d LOC:%d", $warnings, $loc;
|
|
}
|
|
|
|
|
|
#---
|
|
# Add adder stats to dest. Used to add stats for current file to a running total for all files:
|
|
sub addStats
|
|
{
|
|
my $dest_ref = shift() ;
|
|
my %adder = %{ shift() };
|
|
|
|
foreach $key (keys %adder) {
|
|
$dest_ref->{$key} += $adder{$key};
|
|
#printf ("D{$key} += %d => %d\n", $adder{$key}, $dest{$key});
|
|
}
|
|
}
|
|
|
|
|
|
#---
|
|
sub clearStats
|
|
{
|
|
my $dest_ref = shift() ;
|
|
my @statNames = @{ shift() };
|
|
|
|
foreach $stat (@statNames) {
|
|
$dest_ref->{$stat} = 0;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
# count of transforms in all files:
|
|
my %tt;
|
|
clearStats(\%tt, \@statNames);
|
|
|
|
|
|
|
|
$Twarnings = 0;
|
|
$TlineCount = 0;
|
|
my %TwarningTags ;
|
|
my %Tkernels ;
|
|
|
|
my $fileCount = @ARGV;
|
|
my $fileName = "";
|
|
|
|
while (@ARGV) {
|
|
$fileName=shift (@ARGV);
|
|
|
|
|
|
if ($inplace) {
|
|
my $file_prehip = "$fileName" . ".prehip";
|
|
my $infile;
|
|
my $outfile;
|
|
if (-e $file_prehip) {
|
|
$infile = $file_prehip;
|
|
$outfile = $fileName;
|
|
} else {
|
|
system ("cp $fileName $file_prehip");
|
|
$infile = $file_prehip;
|
|
$outfile = $fileName;
|
|
}
|
|
|
|
open(INFILE,"<", $infile) or die "error: could not open $infile";
|
|
open(OUTFILE,">", $outfile) or die "error: could not open $outfile";
|
|
$OUTFILE = OUTFILE;
|
|
|
|
} else {
|
|
open(INFILE,"<", $fileName) or die "error: could not open $fileName";
|
|
$OUTFILE = STDOUT;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# Note : \b is used in perl to indicate the start of a word - typically that is what we want in this case:
|
|
#
|
|
|
|
# count of transforms in this file, init to 0 here:
|
|
my %ft;
|
|
clearStats(\%ft, \@statNames);
|
|
my $countIncludes = 0;
|
|
my $countKeywords = 0; # keywords like __global__, __shared__ - not converted by hipify but counted here.
|
|
my $warnings = 0;
|
|
my $warningsCublas = 0;
|
|
my $warningsCurand = 0;
|
|
my %warningTags; # hash with counts of particular unknown keywords.
|
|
|
|
my $lineCount = 0;
|
|
|
|
undef $/; # Read whole file at once, so we can match newlines.
|
|
while (<INFILE>)
|
|
{
|
|
|
|
#--------
|
|
# Compiler Defines
|
|
# __CUDACC__ is set by NVCC to indicate it is treating the input file as CUDA code (as opposed to host)
|
|
# Typically we want any code treated as CUDA code to be treated as accelerator code by Kalmar too
|
|
# __HIPCC__ will set KALMARCC
|
|
$ft{'def'} += s/\b__CUDACC__\b/__HIPCC__/g;
|
|
|
|
# __CUDA_ARCH is often used to detect when a function or kernel is being compiled for the device.
|
|
# Don't automaticall convert this - likely these will need special attention with HIP_ARCH_HAS_* macros
|
|
#$ft{'def'} += s/\b__CUDA_ARCH__\b/__HIP_ARCH__/g;
|
|
|
|
|
|
|
|
#--------
|
|
#Includes:
|
|
$countIncludes += s/(\s*#\s*include\s+)[<"]cuda_runtime\.h[>"]/$1<hip\/hip_runtime.h>/;
|
|
$countIncludes += s/(\s*#\s*include\s+)[<"]cuda_runtime_api\.h[>"]/$1<hip\/hip_runtime_api.h>/;
|
|
|
|
|
|
#--------
|
|
# Error codes and return types:
|
|
$ft{'err'} += s/\bcudaError_t\b/hipError_t/g;
|
|
$ft{'err'} += s/\bcudaError\b/hipError_t/g;
|
|
$ft{'err'} += s/\bcudaSuccess\b/hipSuccess/g;
|
|
|
|
$ft{'err'} += s/\bcudaErrorUnknown\b/hipErrorUnknown/g;
|
|
$ft{'err'} += s/\bcudaErrorMemoryAllocation\b/hipErrorMemoryAllocation/g;
|
|
$ft{'err'} += s/\bcudaErrorMemoryFree\b/hipErrorMemoryFree/g;
|
|
$ft{'err'} += s/\bcudaErrorUnknownSymbol\b/hipErrorUnknownSymbol/g;
|
|
$ft{'err'} += s/\bcudaErrorOutOfResources\b/hipErrorOutOfResources/g;
|
|
$ft{'err'} += s/\bcudaErrorInvalidValue\b/hipErrorInvalidValue/g;
|
|
$ft{'err'} += s/\bcudaErrorInvalidResourceHandle\b/hipErrorInvalidResourceHandle/g;
|
|
$ft{'err'} += s/\bcudaErrorInvalidDevice\b/hipErrorInvalidDevice/g;
|
|
$ft{'err'} += s/\bcudaErrorNoDevice\b/hipErrorNoDevice/g;
|
|
$ft{'err'} += s/\bcudaErrorNotReady\b/hipErrorNotReady/g;
|
|
$ft{'err'} += s/\bcudaErrorUnknown\b/hipErrorUnknown/g;
|
|
|
|
# error APIs:
|
|
$ft{'err'} += s/\bcudaGetLastError\b/hipGetLastError/g;
|
|
$ft{'err'} += s/\bcudaPeekAtLastError\b/hipPeekAtLastError/g;
|
|
$ft{'err'} += s/\bcudaGetErrorName\b/hipGetErrorName/g;
|
|
$ft{'err'} += s/\bcudaGetErrorString\b/hipGetErrorString/g;
|
|
|
|
|
|
|
|
#--------
|
|
# Memcpy
|
|
$ft{'mem'} += s/\bcudaMemcpy\b/hipMemcpy/g;
|
|
$ft{'mem'} += s/\bcudaMemcpyHostToHost\b/hipMemcpyHostToHost/g;
|
|
$ft{'mem'} += s/\bcudaMemcpyHostToDevice\b/hipMemcpyHostToDevice/g;
|
|
$ft{'mem'} += s/\bcudaMemcpyDeviceToHost\b/hipMemcpyDeviceToHost/g;
|
|
$ft{'mem'} += s/\bcudaMemcpyDeviceToDevice\b/hipMemcpyDeviceToDevice/g;
|
|
$ft{'mem'} += s/\bcudaMemcpyDefault\b/hipMemcpyDefault/g;
|
|
$ft{'mem'} += s/\bcudaMemcpyToSymbol\b/hipMemcpyToSymbol/g;
|
|
|
|
$ft{'mem'} += s/\bcudaMemset\b/hipMemset/g;
|
|
$ft{'mem'} += s/\bcudaMemsetAsync\b/hipMemsetAsync/g;
|
|
|
|
$ft{'mem'} += s/\bcudaMemcpyAsync\b/hipMemcpyAsync/g;
|
|
|
|
$ft{'mem'} += s/\bcudaMemGetInfo\b/hipMemGetInfo/g;
|
|
|
|
$ft{'mem'} += s/\bcudaMemcpyKind\b/hipMemcpyKind/g;
|
|
|
|
$ft{'mem'} += s/\bcudaPointerAttributes\b/hipPointerAttribute_t/g;
|
|
|
|
$ft{'mem'} += s/\bcudaMemcpy2D\b/hipMemcpy2D/g;
|
|
$ft{'mem'} += s/\bcudaMemcpy2DToArray\b/hipMemcpy2DToArray/g;
|
|
|
|
#--------
|
|
# Memory management:
|
|
$ft{'mem'} += s/\bcudaMalloc\b/hipMalloc/g;
|
|
$ft{'mem'} += s/\bcudaMallocHost\b/hipHostMalloc/g; # note conversion to standard hipHost* naming convention
|
|
$ft{'mem'} += s/\bcudaFree\b/hipFree/g;
|
|
$ft{'mem'} += s/\bcudaFreeHost\b/hipHostFree/g; # note conversion to standard hipHost* naming convention
|
|
$ft{'mem'} += s/\bcudaHostAlloc\b/hipHostAlloc/g;
|
|
$ft{'mem'} += s/\bcudaHostGetDevicePointer\b/hipHostGetDevicePointer/g;
|
|
$ft{'mem'} += s/\bcudaHostAllocDefault\b/hipHostMallocDefault/g;
|
|
$ft{'mem'} += s/\bcudaHostAllocPortable\b/hipHostMallocPortable/g;
|
|
$ft{'mem'} += s/\bcudaHostAllocMapped\b/hipHostMallocMapped/g;
|
|
$ft{'mem'} += s/\bcudaHostAllocWriteCombined\b/hipHostMallocWriteCombined/g;
|
|
|
|
$ft{'mem'} += s/\bcudaMallocArray\b/hipMallocArray/g;
|
|
$ft{'mem'} += s/\bcudaMallocPitch\b/hipMallocPitch/g;
|
|
|
|
|
|
#--------
|
|
# Coordinate Indexing and Dimensions:
|
|
$ft{'coord_func'} += s/\bthreadIdx\.x\b/hipThreadIdx_x/g;
|
|
$ft{'coord_func'} += s/\bthreadIdx\.y\b/hipThreadIdx_y/g;
|
|
$ft{'coord_func'} += s/\bthreadIdx\.z\b/hipThreadIdx_z/g;
|
|
|
|
$ft{'coord_func'} += s/\bblockIdx\.x\b/hipBlockIdx_x/g;
|
|
$ft{'coord_func'} += s/\bblockIdx\.y\b/hipBlockIdx_y/g;
|
|
$ft{'coord_func'} += s/\bblockIdx\.z\b/hipBlockIdx_z/g;
|
|
|
|
$ft{'coord_func'} += s/\bblockDim\.x\b/hipBlockDim_x/g;
|
|
$ft{'coord_func'} += s/\bblockDim\.y\b/hipBlockDim_y/g;
|
|
$ft{'coord_func'} += s/\bblockDim\.z\b/hipBlockDim_z/g;
|
|
|
|
$ft{'coord_func'} += s/\bgridDim\.x\b/hipGridDim_x/g;
|
|
$ft{'coord_func'} += s/\bgridDim\.y\b/hipGridDim_y/g;
|
|
$ft{'coord_func'} += s/\bgridDim\.z\b/hipGridDim_z/g;
|
|
|
|
# hack to avoid replacing hipDeviceProp.warpSize call
|
|
$ft{'special_func'} += s/([^.])\bwarpSize\b/$1hipWarpSize/g;
|
|
|
|
|
|
#--------
|
|
# Events
|
|
$ft{'event'} += s/\bcudaEvent_t\b/hipEvent_t/g;
|
|
$ft{'event'} += s/\bcudaEventCreate\b/hipEventCreate/g;
|
|
$ft{'event'} += s/\bcudaEventCreateWithFlags\b/hipEventCreateWithFlags/g;
|
|
$ft{'event'} += s/\bcudaEventDestroy\b/hipEventDestroy/g;
|
|
$ft{'event'} += s/\bcudaEventRecord\b/hipEventRecord/g;
|
|
$ft{'event'} += s/\bcudaEventElapsedTime\b/hipEventElapsedTime/g;
|
|
$ft{'event'} += s/\bcudaEventSynchronize\b/hipEventSynchronize/g;
|
|
|
|
#--------
|
|
# Streams
|
|
$ft{'stream'} += s/\bcudaStream_t\b/hipStream_t/g;
|
|
$ft{'stream'} += s/\bcudaStreamCreate\b/hipStreamCreate/g;
|
|
$ft{'stream'} += s/\bcudaStreamCreateWithFlags\b/hipStreamCreateWithFlags/g;
|
|
$ft{'stream'} += s/\bcudaStreamDestroy\b/hipStreamDestroy/g;
|
|
$ft{'stream'} += s/\bcudaStreamWaitEvent\b/hipStreamWaitEvent/g;
|
|
$ft{'stream'} += s/\bcudaStreamSynchronize\b/hipStreamSynchronize/g;
|
|
$ft{'stream'} += s/\bcudaStreamDefault\b/hipStreamDefault/g;
|
|
$ft{'stream'} += s/\bcudaStreamNonBlocking\b/hipStreamNonBlocking/g;
|
|
|
|
|
|
#--------
|
|
# Other synchronization
|
|
$ft{'dev'} += s/\bcudaDeviceSynchronize\b/hipDeviceSynchronize/g;
|
|
$ft{'dev'} += s/\bcudaThreadSynchronize\b/hipDeviceSynchronize/g; # translate deprecated cudaThreadSynchronize
|
|
$ft{'dev'} += s/\bcudaDeviceReset\b/hipDeviceReset/g;
|
|
$ft{'dev'} += s/\bcudaThreadExit\b/hipDeviceReset/g; # translate deprecated cudaThreadExit
|
|
$ft{'dev'} += s/\bcudaSetDevice\b/hipSetDevice/g;
|
|
$ft{'dev'} += s/\bcudaGetDevice\b/hipGetDevice/g;
|
|
|
|
#--------
|
|
# Device
|
|
$ft{'dev'} += s/\bcudaDeviceProp\b/hipDeviceProp_t/g;
|
|
$ft{'dev'} += s/\bcudaGetDeviceProperties\b/hipGetDeviceProperties/g;
|
|
|
|
# Attribute
|
|
$ft{'err'} += s/\bcudaDevAttrMaxThreadsPerBlock\b/hipDeviceAttributeMaxThreadsPerBlock/g;
|
|
$ft{'err'} += s/\bcudaDevAttrMaxBlockDimX\b/hipDeviceAttributeMaxBlockDimX/g;
|
|
$ft{'err'} += s/\bcudaDevAttrMaxBlockDimY\b/hipDeviceAttributeMaxBlockDimY/g;
|
|
$ft{'err'} += s/\bcudaDevAttrMaxBlockDimZ\b/hipDeviceAttributeMaxBlockDimZ/g;
|
|
$ft{'err'} += s/\bcudaDevAttrMaxGridDimX\b/hipDeviceAttributeMaxGridDimX/g;
|
|
$ft{'err'} += s/\bcudaDevAttrMaxGridDimY\b/hipDeviceAttributeMaxGridDimY/g;
|
|
$ft{'err'} += s/\bcudaDevAttrMaxGridDimZ\b/hipDeviceAttributeMaxGridDimZ/g;
|
|
$ft{'err'} += s/\bcudaDevAttrMaxSharedMemoryPerBlock\b/hipDeviceAttributeMaxSharedMemoryPerBlock/g;
|
|
$ft{'err'} += s/\bcudaDevAttrTotalConstantMemory\b/hipDeviceAttributeTotalConstantMemory/g;
|
|
$ft{'err'} += s/\bcudaDevAttrWarpSize\b/hipDeviceAttributeWarpSize/g;
|
|
$ft{'err'} += s/\bcudaDevAttrMaxRegistersPerBlock\b/hipDeviceAttributeMaxRegistersPerBlock/g;
|
|
$ft{'err'} += s/\bcudaDevAttrClockRate\b/hipDeviceAttributeClockRate/g;
|
|
$ft{'err'} += s/\bcudaDevAttrMultiProcessorCount\b/hipDeviceAttributeMultiprocessorCount/g;
|
|
$ft{'err'} += s/\bcudaDevAttrComputeMode\b/hipDeviceAttributeComputeMode/g;
|
|
$ft{'err'} += s/\bcudaDevAttrL2CacheSize\b/hipDeviceAttributeL2CacheSize/g;
|
|
$ft{'err'} += s/\bcudaDevAttrMaxThreadsPerMultiProcessor\b/hipDeviceAttributeMaxThreadsPerMultiProcessor/g;
|
|
$ft{'err'} += s/\bcudaDevAttrComputeCapabilityMajor\b/hipDeviceAttributeComputeCapabilityMajor/g;
|
|
$ft{'err'} += s/\bcudaDevAttrComputeCapabilityMinor\b/hipDeviceAttributeComputeCapabilityMinor/g;
|
|
$ft{'err'} += s/\bcudaDevAttrConcurrentKernels\b/hipDeviceAttributeConcurrentKernels/g;
|
|
$ft{'err'} += s/\bcudaDevAttrPciBusId\b/hipDeviceAttributePciBusId/g;
|
|
$ft{'err'} += s/\bcudaDevAttrPciDeviceId\b/hipDeviceAttributePciDeviceId/g;
|
|
$ft{'err'} += s/\bcudaDevAttrMaxSharedMemoryPerMultiprocessor\b/hipDeviceAttributeMaxSharedMemoryPerMultiprocessor/g;
|
|
$ft{'err'} += s/\bcudaDevAttrMemoryClockRate\b/hipDeviceAttributeMemoryClockRate/g;
|
|
$ft{'err'} += s/\bcudaDevAttrGlobalMemoryBusWidth\b/hipDeviceAttributeMemoryBusWidth/g;
|
|
$ft{'dev'} += s/\bcudaDeviceAttr\b/hipDeviceAttribute_t/g;
|
|
$ft{'dev'} += s/\bcudaDeviceGetAttribute\b/hipDeviceGetAttribute/g;
|
|
|
|
# Cache config
|
|
$ft{'dev'} += s/\bcudaDeviceSetCacheConfig\b/hipDeviceSetCacheConfig/g;
|
|
$ft{'dev'} += s/\bcudaThreadSetCacheConfig\b/hipDeviceSetCacheConfig/g; # translate deprecated
|
|
$ft{'dev'} += s/\bcudaDeviceGetCacheConfig\b/hipDeviceGetCacheConfig/g;
|
|
$ft{'dev'} += s/\bcudaThreadGetCacheConfig\b/hipDeviceGetCacheConfig/g; # translate deprecated
|
|
$ft{'dev'} += s/\bcudaFuncCache\b/hipFuncCache/g;
|
|
$ft{'dev'} += s/\bcudaFuncCachePreferNone\b/hipFuncCachePreferNone/g;
|
|
$ft{'dev'} += s/\bcudaFuncCachePreferShared\b/hipFuncCachePreferShared/g;
|
|
$ft{'dev'} += s/\bcudaFuncCachePreferL1\b/hipFuncCachePreferL1/g;
|
|
$ft{'dev'} += s/\bcudaFuncCachePreferEqual\b/hipFuncCachePreferEqual/g;
|
|
# function
|
|
$ft{'dev'} += s/\bcudaFuncSetCacheConfig\b/hipFuncSetCacheConfig/g;
|
|
|
|
|
|
$ft{'dev'} += s/\bcudaDriverGetVersion\b/hipDriverGetVersion/g;
|
|
|
|
#--------
|
|
# Peer2Peer
|
|
$ft{'dev'} += s/\bcudaDeviceCanAccessPeer\b/hipDeviceCanAccessPeer/g;
|
|
$ft{'dev'} += s/\bcudaDeviceDisablePeerAccess\b/hipDeviceDisablePeerAccess/g;
|
|
$ft{'dev'} += s/\bcudaDeviceEnablePeerAccess\b/hipDeviceEnablePeerAccess/g;
|
|
$ft{'mem'} += s/\bcudaMemcpyPeerAsync\b/hipMemcpyPeerAsync/g;
|
|
$ft{'mem'} += s/\bcudaMemcpyPeer\b/hipMemcpyPeer/g;
|
|
|
|
|
|
# Shared mem:
|
|
$ft{'dev'} += s/\bcudaDeviceSetSharedMemConfig\b/hipDeviceSetSharedMemConfig/g;
|
|
$ft{'dev'} += s/\bcudaThreadSetSharedMemConfig\b/hipDeviceSetSharedMemConfig/g; # translate deprecated
|
|
$ft{'dev'} += s/\bcudaDeviceGetSharedMemConfig\b/hipDeviceGetSharedMemConfig/g;
|
|
$ft{'dev'} += s/\bcudaThreadGetSharedMemConfig\b/hipDeviceGetSharedMemConfig/g; # translate deprecated
|
|
$ft{'dev'} += s/\bcudaSharedMemConfig\b/hipSharedMemConfig/g;
|
|
$ft{'dev'} += s/\bcudaSharedMemBankSizeDefault\b/hipSharedMemBankSizeDefault/g;
|
|
$ft{'dev'} += s/\bcudaSharedMemBankSizeFourByte\b/hipSharedMemBankSizeFourByte/g;
|
|
$ft{'dev'} += s/\bcudaSharedMemBankSizeEightByte\b/hipSharedMemBankSizeEightByte/g;
|
|
|
|
$ft{'dev'} += s/\bcudaGetDeviceCount\b/hipGetDeviceCount/g;
|
|
|
|
#--------
|
|
# Profiler
|
|
#$aOt += s/\bcudaProfilerInitialize\b/hipProfilerInitialize/g; // see if these are called anywhere.
|
|
$ft{'other'} += s/\bcudaProfilerStart\b/hipProfilerStart/g;
|
|
$ft{'other'} += s/\bcudaProfilerStop\b/hipProfilerStop/g;
|
|
|
|
|
|
|
|
$countKeywords += m/__global__/;
|
|
$countKeywords += m/__shared__/;
|
|
|
|
#--------
|
|
# CUDA extern __shared__ syntax
|
|
# Note these only work if declaration is on a single line.
|
|
{
|
|
# match uses ? for <.*> which will be unitialized if this is not present in launch syntax.
|
|
no warnings qw/uninitialized/;
|
|
|
|
my $k = 0;
|
|
|
|
# Match extern __shared__ type foo[]; syntax
|
|
# Replace as HIP_DYNAMIC_SHARED() macro
|
|
$k += s/extern\s+([\w\(\)]+)?\s*__shared__\s+([\w:<>\s]+)\s+(\w+)\s*\[\s*\]\s*;/HIP_DYNAMIC_SHARED($1 $2, $3)/g;
|
|
|
|
# test patterns for the regular expression above:
|
|
#'extern __shared__ double foo[];'
|
|
#'extern __shared__ unsigned int foo[];'
|
|
#'extern volatile __shared__ double foo[];'
|
|
#'extern volatile __shared__ unsigned int sdata[];'
|
|
#'extern __shared__ volatile unsigned int sdata[];'
|
|
#'extern __shared__ T s[];'
|
|
#'extern __shared__ T::type s[];'
|
|
#'extern __shared__ blah<T>::type s[];'
|
|
#'extern __shared__ typename mapper<Float>::type s_data[];'
|
|
#'extern __attribute__((used)) __shared__ typename mapper<Float>::type s_data[];'
|
|
|
|
$ft{'extern_shared'} += $k;
|
|
}
|
|
|
|
#--------
|
|
# CUDA Launch Syntax
|
|
# Note these only work if launch is on a single line.
|
|
|
|
# Handle the <<numBlocks, blockDim>>> syntax:
|
|
{
|
|
# match uses ? for <.*> which will be unitialized if this is not present in launch syntax.
|
|
no warnings qw/uninitialized/;
|
|
|
|
my $k = 0;
|
|
my $kernelName;
|
|
|
|
# Handle the <<numBlocks, blockDim, sharedSize, stream>>> syntax:
|
|
$k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>([\s*\\]*)\(/hipLaunchKernel(HIP_KERNEL_NAME($1$2), dim3($3), dim3($4), $5, $6, /g;
|
|
$kernelName = $1 if $k;
|
|
|
|
# Handle the <<numBlocks, blockDim, sharedSize>>> syntax:
|
|
$k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>([\s*\\]*)\(/hipLaunchKernel(HIP_KERNEL_NAME($1$2), dim3($3), dim3($4), $5, 0, /g;
|
|
$kernelName = $1 if $k;
|
|
|
|
# Handle the <<numBlocks, blockDim>>> syntax:
|
|
$k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>([\s\\]*)\(/hipLaunchKernel(HIP_KERNEL_NAME($1$2), dim3($3), dim3($4), 0, 0, /g;
|
|
$kernelName = $1 if $k;
|
|
|
|
$ft{'kern'} += $k;
|
|
if ($k) {
|
|
$Tkernels{$kernelName} ++;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
unless ($no_translate_textures) {
|
|
$ft{'tex'} += s/\bcudaChannelFormatDesc\b/hipChannelFormatDesc/g;
|
|
$ft{'tex'} += s/\bcudaFilterModePoint\b/hipFilterModePoint/g;
|
|
$ft{'tex'} += s/\bcudaReadModeElementType\b/hipReadModeElementType/g;
|
|
|
|
$ft{'tex'} += s/\bcudaArray\b/hipArrary/g;
|
|
$ft{'tex'} += s/\bcudaCreateChannelDesc\b/hipCreateChannelDesc/g;
|
|
$ft{'tex'} += s/\bcudaBindTexture\b/hipBindTexture/g;
|
|
$ft{'tex'} += s/\bcudaBindTextureToArray\b/hipBindTextureToArray/g;
|
|
$ft{'tex'} += s/\bcudaUnbindTexture\b/hipUnbindTexture/g;
|
|
}
|
|
|
|
|
|
if ($count_conversions) {
|
|
while (/(\bhip[A-Z]\w+\b)/g) {
|
|
$convertedTags{$1}++;
|
|
|
|
#print STDERR "HIP: $1 : ", $translateTags{$1}, "\n";
|
|
}
|
|
}
|
|
|
|
|
|
# guess that we are in device code , or at least in a file that calls device code.
|
|
# will almost certainly call one of the coordiante functions - could be fooled by clever macros but usually works:
|
|
my $hasDeviceCode = $countKeywords + $ft{'coord_func'} + $ft{'math_func'} + $ft{'special_func'};
|
|
|
|
|
|
|
|
unless ($quiet_warnings) {
|
|
#print STDERR "Check WARNINGs\n";
|
|
# copy into array of lines, process line-by-line to show warnings:
|
|
if ($hasDeviceCode or (/\bcuda/) or (/<<<.*>>>/) or (/(\bcublas[A-Z]\w+)/) or (/(\bcurand[A-Z]\w+)/) ) {
|
|
my @lines = split /\n/, $_;
|
|
my $tmp = $_; # copies the whole file, could be a little smarter here...
|
|
my $line_num = 0;
|
|
|
|
foreach (@lines) {
|
|
|
|
$line_num ++;
|
|
|
|
# remove any whitelisted words:
|
|
foreach $w (@warn_whitelist) {
|
|
s/\b$w\b/ZAP/
|
|
}
|
|
|
|
my $tag ;
|
|
if ((/(\bcuda[A-Z]\w+)/) or (/<<<.*>>>/)) {
|
|
# flag any remaining code that look like cuda API calls, may want to add these to hipify
|
|
$tag = (defined $1) ? $1 : "Launch";
|
|
} elsif (/(\bcublas[A-Z]\w+)/) {
|
|
$warningsCublas++;
|
|
$tag = $1;
|
|
} elsif (/(\bcurand[A-Z]\w+)/) {
|
|
$warningsCurand++;
|
|
$tag = $1;
|
|
}
|
|
|
|
if (defined $tag) {
|
|
$warnings++;
|
|
$warningTags{$tag}++;
|
|
print STDERR " warning: $fileName:#$line_num : $_";
|
|
print STDERR "\n";
|
|
}
|
|
|
|
$s = warnUnsupportedSpecialFunctions($line_num);
|
|
$warnings += $s;
|
|
|
|
|
|
}
|
|
|
|
$_ = $tmp;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
#--------
|
|
# Math libraries
|
|
# To limit bogus translations, try to make sure we are in a kernel (ft{'builtin'} != 0):
|
|
if (not $no_translate_builtins and ($hasDeviceCode > 0)) {
|
|
$ft{'special_func'} += countSupportedSpecialFunctions();
|
|
}
|
|
|
|
|
|
|
|
|
|
#--------
|
|
# Print it!
|
|
# TODO - would like to move this code outside loop but it uses $_ which contains the whole file.
|
|
unless ($no_output) {
|
|
my $apiCalls = $ft{'err'} + $ft{'event'} + $ft{'mem'} + $ft{'stream'} + $ft{'dev'} + $ft{'def'} + $ft{'tex'} + $ft{'other'} + $ft{'math_func'};
|
|
my $kernStuff = $hasDeviceCode + $ft{'kern'};
|
|
my $totalCalls = $apiCalls + $kernStuff;
|
|
|
|
$is_dos = m/\r\n$/;
|
|
|
|
if ($totalCalls and ($countIncludes == 0) and ($kernStuff != 0)) {
|
|
# If this file makes kernel builtin calls, and does not include the cuda_runtime.h,
|
|
# then add an #include to match "magic" includes provided by NVCC.
|
|
# This logic can miss cases where cuda_runtime.h is included by another include file.
|
|
print $OUTFILE '#include "hip/hip_runtime.h"' . ($is_dos ? "\r\n" : "\n");
|
|
}
|
|
print $OUTFILE "$_";
|
|
}
|
|
|
|
$lineCount = $_ =~ tr/\n//;
|
|
|
|
}
|
|
|
|
my $totalConverted = totalStats(\%ft);
|
|
|
|
#printf "TOTAL-CONV=%d\n", $totalConverted;
|
|
|
|
|
|
if (($totalConverted+$warnings) and $print_stats) {
|
|
printStats("info: converted", \@statNames, \%ft, $warnings, $lineCount);
|
|
print STDERR " in '$fileName'\n";
|
|
}
|
|
|
|
|
|
# Update totals for all files:
|
|
addStats(\%tt, \%ft);
|
|
$Twarnings += $warnings;
|
|
$TlineCount += $lineCount;
|
|
foreach $key (keys %warningTags) {
|
|
$TwarningTags{$key} += $warningTags{$key};
|
|
}
|
|
}
|
|
|
|
#-- Print total stats for all files processed:
|
|
if ($print_stats and ($fileCount > 1)) {
|
|
print STDERR "\n";
|
|
printStats("info: TOTAL-converted", \@statNames, \%tt, $Twarnings, $TlineCount);
|
|
print STDERR "\n";
|
|
|
|
foreach my $key (sort { $TwarningTags{$b} <=> $TwarningTags{$a} } keys %TwarningTags) {
|
|
printf STDERR " warning: unconverted %s : %d\n", $key, $TwarningTags{$key};
|
|
}
|
|
|
|
my $kernelCnt = keys %Tkernels;
|
|
printf STDERR " kernels (%d total) : ", $kernelCnt;
|
|
foreach my $key (sort { $Tkernels{$b} <=> $Tkernels{$a} } keys %Tkernels) {
|
|
printf STDERR " %s(%d)", $key, $Tkernels{$key};
|
|
}
|
|
print STDERR "\n";
|
|
|
|
|
|
print STDERR "\n";
|
|
}
|
|
|
|
if ($count_conversions) {
|
|
foreach my $key (sort { $convertedTags{$b} <=> $convertedTags{$a} } keys %convertedTags) {
|
|
printf STDERR " %s %d\n", $key, $convertedTags{$key};
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
sub countSupportedSpecialFunctions
|
|
{
|
|
my $m = 0;
|
|
|
|
#supported special functions:
|
|
foreach $func (
|
|
# Synchronization:
|
|
"__syncthreads",
|
|
)
|
|
{
|
|
# match math at the beginning of a word, but not if it already has a namespace qualifier ('::') :
|
|
$m += m/[:]?[:]?\b($func)\b(\w*\()/g;
|
|
|
|
}
|
|
|
|
return $m;
|
|
}
|
|
|
|
sub warnUnsupportedSpecialFunctions
|
|
{
|
|
my $line_num = shift;
|
|
|
|
my $m = 0;
|
|
|
|
|
|
|
|
foreach $func (
|
|
# memory fence:
|
|
"__threadfence_block",
|
|
"__threadfence",
|
|
"__threadfence_system",
|
|
|
|
# Synchronization:
|
|
"__syncthreads_count",
|
|
"__syncthreads_and",
|
|
"__syncthreads_or",
|
|
|
|
# Read-only cache function:
|
|
"__ldg",
|
|
|
|
# Cross-lane and warp-vote instructions:
|
|
#"__all",
|
|
#"__any",
|
|
#"__ballot",
|
|
|
|
#"__popc",
|
|
#"__clz",
|
|
|
|
#"__shfl",
|
|
#"__shfl_up",
|
|
#"__shfl_down",
|
|
#"__shfl_xor",
|
|
|
|
"__prof_trigger",
|
|
|
|
# too popular, and we can't tell if we are in device or host code.
|
|
#"assert",
|
|
#"printf",
|
|
|
|
#"malloc",
|
|
#"free",
|
|
#"memset",
|
|
#"memcpy"
|
|
)
|
|
{
|
|
# match math at the beginning of a word, but not if it already has a namespace qualifier ('::') :
|
|
my $mt = m/[:]?[:]?\b($func)\b(\w*\()/g;
|
|
if ($mt) {
|
|
$m += $mt;
|
|
print STDERR " warning: $fileName:#$line_num : unsupported device function : $_\n";
|
|
}
|
|
|
|
}
|
|
|
|
return $m;
|
|
}
|