Merge pull request #1044 from emankov/master
[HIPIFY][hipify-perl] Formatting
[ROCm/hip commit: 3402f9110b]
Этот коммит содержится в:
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/perl -w
|
||||
##
|
||||
# Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
# Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -25,16 +25,16 @@ use Getopt::Long;
|
||||
|
||||
my $warn_whitelist ="";
|
||||
GetOptions(
|
||||
"print-stats" => \$print_stats # print the command-line, like a header.
|
||||
, "count-conversions" => \$count_conversions # count conversions.
|
||||
, "quiet-warnings" => \$quiet_warnings # don't print warnings on unknown CUDA functions.
|
||||
"print-stats" => \$print_stats # print the command-line, like a header.
|
||||
, "count-conversions" => \$count_conversions # count conversions.
|
||||
, "quiet-warnings" => \$quiet_warnings # don't print warnings on unknown CUDA functions.
|
||||
, "warn-whitelist=s"=> \$warn_whitelist
|
||||
, "no-translate-builtins" => \$no_translate_builtins # don't translate math functions.
|
||||
, "no-translate-textures" => \$no_translate_textures
|
||||
, "no-output" => \$no_output # don't write any translated output to stdout.
|
||||
, "inplace" => \$inplace # modify input file inplace, replacing input with hipified output, save backup in ".prehip" file.
|
||||
# If .prehip file exists, use that as input to hip.
|
||||
, "n" => \$n # combination of print_stats + no-output.
|
||||
, "no-translate-builtins" => \$no_translate_builtins # don't translate math functions.
|
||||
, "no-translate-textures" => \$no_translate_textures # don't translate texture functions.
|
||||
, "no-output" => \$no_output # don't write any translated output to stdout.
|
||||
, "inplace" => \$inplace # modify input file inplace, replacing input with hipified output, save backup in ".prehip" file.
|
||||
# If .prehip file exists, use that as input to hip.
|
||||
, "n" => \$n # combination of print_stats + no-output.
|
||||
);
|
||||
|
||||
$print_stats = 1 if $n;
|
||||
@@ -47,20 +47,17 @@ $no_output = 1 if $n;
|
||||
,"cudaStatus"
|
||||
,"cudaDevice"
|
||||
,"cudaDevice_t"
|
||||
|
||||
,"cudaIDs"
|
||||
,"cudaGridDim"
|
||||
,"cudaDimGrid"
|
||||
,"cudaDimBlock"
|
||||
,"cudaDeviceId"
|
||||
,"cudaDevices",
|
||||
|
||||
,"cudaGradOutput",
|
||||
,"cudaInput",
|
||||
,"cudaOutput",
|
||||
,"cudaGradInput",
|
||||
,"cudaIndices",
|
||||
|
||||
,"cudaColorSpinorField"
|
||||
,"cudaGaugeField"
|
||||
,"cudaMom"
|
||||
@@ -77,126 +74,96 @@ $no_output = 1 if $n;
|
||||
);
|
||||
#print "WW=@warn_whitelist\n";
|
||||
|
||||
|
||||
# Allow users to add their own functions.
|
||||
push (@warn_whitelist, split(',',$warn_whitelist));
|
||||
|
||||
|
||||
#---
|
||||
#Stats tracking code:
|
||||
@statNames = ("dev", "mem", "kern", 'coord_func', "math_func", "special_func", "stream", "event", "err", "def", "tex", "extern_shared", "other");
|
||||
|
||||
|
||||
#---
|
||||
#Compute total of all individual counts:
|
||||
sub totalStats
|
||||
{
|
||||
my %count = %{ shift() };
|
||||
|
||||
my $total = 0;
|
||||
foreach $key (keys %count) {
|
||||
$total += $count{$key};
|
||||
}
|
||||
|
||||
return $total;
|
||||
};
|
||||
|
||||
#---
|
||||
sub printStats
|
||||
{
|
||||
my $label = shift();
|
||||
my $label = shift();
|
||||
my @statNames = @{ shift() };
|
||||
my %counts = %{ shift() };
|
||||
my $warnings = shift();
|
||||
my %counts = %{ shift() };
|
||||
my $warnings = shift();
|
||||
my $loc = shift();
|
||||
|
||||
my $total = totalStats(\%counts);
|
||||
|
||||
my $total = totalStats(\%counts);
|
||||
printf STDERR "%s %d CUDA->HIP refs( ", $label, $total;
|
||||
|
||||
foreach $stat (@statNames) {
|
||||
printf STDERR "%s:%d ", $stat, $counts{$stat};
|
||||
}
|
||||
|
||||
printf STDERR ") warn:%d LOC:%d", $warnings, $loc;
|
||||
}
|
||||
|
||||
|
||||
#---
|
||||
# Add adder stats to dest. Used to add stats for current file to a running total for all files:
|
||||
# Add adder stats to dest. Used to add stats for current file to a running total for all files:
|
||||
sub addStats
|
||||
{
|
||||
my $dest_ref = shift() ;
|
||||
my $dest_ref = shift();
|
||||
my %adder = %{ shift() };
|
||||
|
||||
foreach $key (keys %adder) {
|
||||
$dest_ref->{$key} += $adder{$key};
|
||||
#printf ("D{$key} += %d => %d\n", $adder{$key}, $dest{$key});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#---
|
||||
sub clearStats
|
||||
{
|
||||
my $dest_ref = shift() ;
|
||||
my @statNames = @{ shift() };
|
||||
|
||||
foreach $stat (@statNames) {
|
||||
$dest_ref->{$stat} = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#---
|
||||
# count of transforms in all files:
|
||||
my %tt;
|
||||
clearStats(\%tt, \@statNames);
|
||||
|
||||
|
||||
|
||||
$Twarnings = 0;
|
||||
$TlineCount = 0;
|
||||
my %TwarningTags ;
|
||||
my %Tkernels ;
|
||||
|
||||
my %TwarningTags;
|
||||
my %Tkernels;
|
||||
my $fileCount = @ARGV;
|
||||
my $fileName = "";
|
||||
|
||||
while (@ARGV) {
|
||||
$fileName=shift (@ARGV);
|
||||
|
||||
|
||||
if ($inplace) {
|
||||
my $file_prehip = "$fileName" . ".prehip";
|
||||
my $infile;
|
||||
my $outfile;
|
||||
if (-e $file_prehip) {
|
||||
$infile = $file_prehip;
|
||||
$outfile = $fileName;
|
||||
} else {
|
||||
system ("cp $fileName $file_prehip");
|
||||
$infile = $file_prehip;
|
||||
$outfile = $fileName;
|
||||
}
|
||||
|
||||
my $infile;
|
||||
my $outfile;
|
||||
if (-e $file_prehip) {
|
||||
$infile = $file_prehip;
|
||||
$outfile = $fileName;
|
||||
} else {
|
||||
system ("cp $fileName $file_prehip");
|
||||
$infile = $file_prehip;
|
||||
$outfile = $fileName;
|
||||
}
|
||||
open(INFILE,"<", $infile) or die "error: could not open $infile";
|
||||
open(OUTFILE,">", $outfile) or die "error: could not open $outfile";
|
||||
$OUTFILE = OUTFILE;
|
||||
|
||||
} else {
|
||||
open(INFILE,"<", $fileName) or die "error: could not open $fileName";
|
||||
$OUTFILE = STDOUT;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Note : \b is used in perl to indicate the start of a word - typically that is what we want in this case:
|
||||
#
|
||||
|
||||
# count of transforms in this file, init to 0 here:
|
||||
my %ft;
|
||||
clearStats(\%ft, \@statNames);
|
||||
@@ -206,44 +173,34 @@ while (@ARGV) {
|
||||
my $warningsCublas = 0;
|
||||
my $warningsCurand = 0;
|
||||
my %warningTags; # hash with counts of particular unknown keywords.
|
||||
|
||||
my $lineCount = 0;
|
||||
|
||||
undef $/; # Read whole file at once, so we can match newlines.
|
||||
while (<INFILE>)
|
||||
{
|
||||
|
||||
#--------
|
||||
# Compiler Defines
|
||||
# __CUDACC__ is set by NVCC to indicate it is treating the input file as CUDA code (as opposed to host)
|
||||
# Typically we want any code treated as CUDA code to be treated as accelerator code by Kalmar too
|
||||
# __HIPCC__ will set KALMARCC
|
||||
$ft{'def'} += s/\b__CUDACC__\b/__HIPCC__/g;
|
||||
|
||||
# __CUDA_ARCH is often used to detect when a function or kernel is being compiled for the device.
|
||||
# Don't automaticall convert this - likely these will need special attention with HIP_ARCH_HAS_* macros
|
||||
#$ft{'def'} += s/\b__CUDA_ARCH__\b/__HIP_ARCH__/g;
|
||||
|
||||
|
||||
|
||||
#--------
|
||||
#Includes:
|
||||
$countIncludes += s/(\s*#\s*include\s+)[<"]cuda_runtime\.h[>"]/$1<hip\/hip_runtime.h>/;
|
||||
$countIncludes += s/(\s*#\s*include\s+)[<"]cuda_runtime_api\.h[>"]/$1<hip\/hip_runtime_api.h>/;
|
||||
$countIncludes += s/(\s*#\s*include\s+)[<"]cuda_fp16\.h[>"]/$1<hip\/hip_fp16.h>/;
|
||||
|
||||
|
||||
#--------
|
||||
# Error codes and return types:
|
||||
$ft{'err'} += s/\bcudaError_t\b/hipError_t/g;
|
||||
$ft{'err'} += s/\bcudaError\b/hipError_t/g;
|
||||
$ft{'err'} += s/\bcudaSuccess\b/hipSuccess/g;
|
||||
|
||||
$ft{'err'} += s/\bcudaErrorUnknown\b/hipErrorUnknown/g;
|
||||
$ft{'err'} += s/\bcudaErrorMemoryAllocation\b/hipErrorMemoryAllocation/g;
|
||||
$ft{'err'} += s/\bcudaErrorMemoryFree\b/hipErrorMemoryFree/g;
|
||||
$ft{'err'} += s/\bcudaErrorUnknownSymbol\b/hipErrorUnknownSymbol/g;
|
||||
$ft{'err'} += s/\bcudaErrorInvalidSymbol\b/hipErrorInvalidSymbol/g;
|
||||
$ft{'err'} += s/\bcudaErrorInvalidSymbol\b/hipErrorInvalidSymbol/g;
|
||||
$ft{'err'} += s/\bcudaErrorOutOfResources\b/hipErrorOutOfResources/g;
|
||||
$ft{'err'} += s/\bcudaErrorInvalidValue\b/hipErrorInvalidValue/g;
|
||||
$ft{'err'} += s/\bcudaErrorInvalidResourceHandle\b/hipErrorInvalidResourceHandle/g;
|
||||
@@ -253,15 +210,12 @@ while (@ARGV) {
|
||||
$ft{'err'} += s/\bcudaErrorNotReady\b/hipErrorNotReady/g;
|
||||
$ft{'err'} += s/\bcudaErrorUnknown\b/hipErrorUnknown/g;
|
||||
$ft{'err'} += s/\bcudaErrorPeerAccessAlreadyEnabled\b/hipErrorPeerAccessAlreadyEnabled/g;
|
||||
|
||||
# error APIs:
|
||||
#--------
|
||||
# error APIs:
|
||||
$ft{'err'} += s/\bcudaGetLastError\b/hipGetLastError/g;
|
||||
$ft{'err'} += s/\bcudaPeekAtLastError\b/hipPeekAtLastError/g;
|
||||
$ft{'err'} += s/\bcudaGetErrorName\b/hipGetErrorName/g;
|
||||
$ft{'err'} += s/\bcudaGetErrorString\b/hipGetErrorString/g;
|
||||
|
||||
|
||||
|
||||
#--------
|
||||
# Memcpy
|
||||
$ft{'mem'} += s/\bcudaMemcpy\b/hipMemcpy/g;
|
||||
@@ -272,48 +226,39 @@ while (@ARGV) {
|
||||
$ft{'mem'} += s/\bcudaMemcpyDefault\b/hipMemcpyDefault/g;
|
||||
$ft{'mem'} += s/\bcudaMemcpyToSymbol\s*\(\s*(\w+)\b/hipMemcpyToSymbol\(HIP_SYMBOL\($1\)/g;
|
||||
$ft{'mem'} += s/\bcudaMemcpyFromSymbol\s*\(\s*(.+?)\s*,\s*(.+?)\b/hipMemcpyFromSymbol\($1, HIP_SYMBOL\($2\)/g;
|
||||
|
||||
$ft{'mem'} += s/\bcudaMemset\b/hipMemset/g;
|
||||
$ft{'mem'} += s/\bcudaMemsetAsync\b/hipMemsetAsync/g;
|
||||
|
||||
$ft{'mem'} += s/\bcudaMemcpyAsync\b/hipMemcpyAsync/g;
|
||||
|
||||
$ft{'mem'} += s/\bcudaMemGetInfo\b/hipMemGetInfo/g;
|
||||
|
||||
$ft{'mem'} += s/\bcudaMemcpyKind\b/hipMemcpyKind/g;
|
||||
|
||||
$ft{'mem'} += s/\bcudaPointerAttributes\b/hipPointerAttribute_t/g;
|
||||
$ft{'mem'} += s/\bcudaPointerGetAttributes\b/hipPointerGetAttributes/g;
|
||||
|
||||
$ft{'mem'} += s/\bcudaMemcpy2D\b/hipMemcpy2D/g;
|
||||
$ft{'mem'} += s/\bcudaMemcpy2DToArray\b/hipMemcpy2DToArray/g;
|
||||
$ft{'mem'} += s/\bcudaMemcpyToArray\b/hipMemcpyToArray/g;
|
||||
|
||||
$ft{'mem'} += s/\bcudaGetSymbolAddress\s*\(\s*(.+?)\s*,\s*(.+?)\b/hipGetSymbolAddress\($1, HIP_SYMBOL\($2\)/g;
|
||||
$ft{'mem'} += s/\bcudaGetSymbolSize\s*\(\s*&(\w+)\s*,\s*(.+?)\b/hipGetSymbolSize(&$1, HIP_SYMBOL\($2\)/g;
|
||||
|
||||
#--------
|
||||
# Memory management:
|
||||
$ft{'mem'} += s/\bcudaMalloc\b/hipMalloc/g;
|
||||
$ft{'mem'} += s/\bcudaMallocHost\b/hipHostMalloc/g; # note conversion to standard hipHost* naming convention
|
||||
# note conversion to standard hipHost* naming convention
|
||||
$ft{'mem'} += s/\bcudaMallocHost\b/hipHostMalloc/g;
|
||||
$ft{'mem'} += s/\bcudaFree\b/hipFree/g;
|
||||
$ft{'mem'} += s/\bcudaFreeHost\b/hipHostFree/g; # note conversion to standard hipHost* naming convention
|
||||
$ft{'mem'} += s/\bcudaHostAlloc\b/hipHostMalloc/g;
|
||||
$ft{'mem'} += s/\bcudaHostGetDevicePointer\b/hipHostGetDevicePointer/g;
|
||||
$ft{'mem'} += s/\bcudaHostAllocDefault\b/hipHostMallocDefault/g;
|
||||
$ft{'mem'} += s/\bcudaHostAllocPortable\b/hipHostMallocPortable/g;
|
||||
$ft{'mem'} += s/\bcudaHostAllocMapped\b/hipHostMallocMapped/g;
|
||||
$ft{'mem'} += s/\bcudaHostAllocWriteCombined\b/hipHostMallocWriteCombined/g;
|
||||
$ft{'mem'} += s/\bcudaHostRegisterMapped\b/hipHostRegisterMapped/g;
|
||||
# note conversion to standard hipHost* naming convention
|
||||
$ft{'mem'} += s/\bcudaFreeHost\b/hipHostFree/g;
|
||||
$ft{'mem'} += s/\bcudaHostAlloc\b/hipHostMalloc/g;
|
||||
$ft{'mem'} += s/\bcudaHostGetDevicePointer\b/hipHostGetDevicePointer/g;
|
||||
$ft{'mem'} += s/\bcudaHostAllocDefault\b/hipHostMallocDefault/g;
|
||||
$ft{'mem'} += s/\bcudaHostAllocPortable\b/hipHostMallocPortable/g;
|
||||
$ft{'mem'} += s/\bcudaHostAllocMapped\b/hipHostMallocMapped/g;
|
||||
$ft{'mem'} += s/\bcudaHostAllocWriteCombined\b/hipHostMallocWriteCombined/g;
|
||||
$ft{'mem'} += s/\bcudaHostRegisterMapped\b/hipHostRegisterMapped/g;
|
||||
$ft{'mem'} += s/\bcudaHostRegister\b/hipHostRegister/g;
|
||||
$ft{'mem'} += s/\bcudaHostUnregister\b/hipHostUnregister/g;
|
||||
$ft{'mem'} += s/\bcudaHostGetDevicePointer\b/hipHostGetDevicePointer/g;
|
||||
|
||||
$ft{'mem'} += s/\bcudaMallocArray\b/hipMallocArray/g;
|
||||
$ft{'mem'} += s/\bcudaFreeArray\b/hipFreeArray/g;
|
||||
$ft{'mem'} += s/\bcudaMallocPitch\b/hipMallocPitch/g;
|
||||
|
||||
|
||||
#--------
|
||||
# Events
|
||||
$ft{'event'} += s/\bcudaEvent_t\b/hipEvent_t/g;
|
||||
@@ -325,7 +270,6 @@ while (@ARGV) {
|
||||
$ft{'event'} += s/\bcudaEventSynchronize\b/hipEventSynchronize/g;
|
||||
$ft{'event'} += s/\bcudaEventDisableTiming\b/hipEventDisableTiming/g;
|
||||
$ft{'event'} += s/\bcudaEventQuery\b/hipEventQuery/g;
|
||||
|
||||
#--------
|
||||
# Streams
|
||||
$ft{'stream'} += s/\bcudaStream_t\b/hipStream_t/g;
|
||||
@@ -336,23 +280,22 @@ while (@ARGV) {
|
||||
$ft{'stream'} += s/\bcudaStreamSynchronize\b/hipStreamSynchronize/g;
|
||||
$ft{'stream'} += s/\bcudaStreamDefault\b/hipStreamDefault/g;
|
||||
$ft{'stream'} += s/\bcudaStreamNonBlocking\b/hipStreamNonBlocking/g;
|
||||
|
||||
|
||||
#--------
|
||||
# Other synchronization
|
||||
$ft{'dev'} += s/\bcudaDeviceSynchronize\b/hipDeviceSynchronize/g;
|
||||
$ft{'dev'} += s/\bcudaThreadSynchronize\b/hipDeviceSynchronize/g; # translate deprecated cudaThreadSynchronize
|
||||
# translate deprecated cudaThreadSynchronize
|
||||
$ft{'dev'} += s/\bcudaThreadSynchronize\b/hipDeviceSynchronize/g;
|
||||
$ft{'dev'} += s/\bcudaDeviceReset\b/hipDeviceReset/g;
|
||||
$ft{'dev'} += s/\bcudaThreadExit\b/hipDeviceReset/g; # translate deprecated cudaThreadExit
|
||||
# translate deprecated cudaThreadExit
|
||||
$ft{'dev'} += s/\bcudaThreadExit\b/hipDeviceReset/g;
|
||||
$ft{'dev'} += s/\bcudaSetDevice\b/hipSetDevice/g;
|
||||
$ft{'dev'} += s/\bcudaGetDevice\b/hipGetDevice/g;
|
||||
|
||||
#--------
|
||||
# Device
|
||||
$ft{'dev'} += s/\bcudaDeviceProp\b/hipDeviceProp_t/g;
|
||||
$ft{'dev'} += s/\bcudaGetDeviceProperties\b/hipGetDeviceProperties/g;
|
||||
$ft{'dev'} += s/\bcudaDeviceGetPCIBusId\b/hipDeviceGetPCIBusId/g;
|
||||
|
||||
#--------
|
||||
# Attribute
|
||||
$ft{'err'} += s/\bcudaDevAttrMaxThreadsPerBlock\b/hipDeviceAttributeMaxThreadsPerBlock/g;
|
||||
$ft{'err'} += s/\bcudaDevAttrMaxBlockDimX\b/hipDeviceAttributeMaxBlockDimX/g;
|
||||
@@ -378,9 +321,10 @@ while (@ARGV) {
|
||||
$ft{'err'} += s/\bcudaDevAttrMaxSharedMemoryPerMultiprocessor\b/hipDeviceAttributeMaxSharedMemoryPerMultiprocessor/g;
|
||||
$ft{'err'} += s/\bcudaDevAttrMemoryClockRate\b/hipDeviceAttributeMemoryClockRate/g;
|
||||
$ft{'err'} += s/\bcudaDevAttrGlobalMemoryBusWidth\b/hipDeviceAttributeMemoryBusWidth/g;
|
||||
#--------
|
||||
$ft{'dev'} += s/\bcudaDeviceAttr\b/hipDeviceAttribute_t/g;
|
||||
$ft{'dev'} += s/\bcudaDeviceGetAttribute\b/hipDeviceGetAttribute/g;
|
||||
|
||||
#--------
|
||||
# Cache config
|
||||
$ft{'dev'} += s/\bcudaDeviceSetCacheConfig\b/hipDeviceSetCacheConfig/g;
|
||||
$ft{'dev'} += s/\bcudaThreadSetCacheConfig\b/hipDeviceSetCacheConfig/g; # translate deprecated
|
||||
@@ -393,10 +337,7 @@ while (@ARGV) {
|
||||
$ft{'dev'} += s/\bcudaFuncCachePreferEqual\b/hipFuncCachePreferEqual/g;
|
||||
# function
|
||||
$ft{'dev'} += s/\bcudaFuncSetCacheConfig\b/hipFuncSetCacheConfig/g;
|
||||
|
||||
|
||||
$ft{'dev'} += s/\bcudaDriverGetVersion\b/hipDriverGetVersion/g;
|
||||
|
||||
#--------
|
||||
# Peer2Peer
|
||||
$ft{'dev'} += s/\bcudaDeviceCanAccessPeer\b/hipDeviceCanAccessPeer/g;
|
||||
@@ -409,8 +350,7 @@ while (@ARGV) {
|
||||
$ft{'mem'} += s/\bcudaIpcGetMemHandle\b/hipIpcGetMemHandle/g;
|
||||
$ft{'mem'} += s/\bcudaIpcMemHandle_t\b/hipIpcMemHandle_t/g;
|
||||
$ft{'mem'} += s/\bcudaIpcMemLazyEnablePeerAccess\b/hipIpcMemLazyEnablePeerAccess/g;
|
||||
|
||||
|
||||
#--------
|
||||
# Shared mem:
|
||||
$ft{'dev'} += s/\bcudaDeviceSetSharedMemConfig\b/hipDeviceSetSharedMemConfig/g;
|
||||
$ft{'dev'} += s/\bcudaThreadSetSharedMemConfig\b/hipDeviceSetSharedMemConfig/g; # translate deprecated
|
||||
@@ -420,33 +360,25 @@ while (@ARGV) {
|
||||
$ft{'dev'} += s/\bcudaSharedMemBankSizeDefault\b/hipSharedMemBankSizeDefault/g;
|
||||
$ft{'dev'} += s/\bcudaSharedMemBankSizeFourByte\b/hipSharedMemBankSizeFourByte/g;
|
||||
$ft{'dev'} += s/\bcudaSharedMemBankSizeEightByte\b/hipSharedMemBankSizeEightByte/g;
|
||||
|
||||
$ft{'dev'} += s/\bcudaGetDeviceCount\b/hipGetDeviceCount/g;
|
||||
|
||||
#--------
|
||||
# Profiler
|
||||
#$aOt += s/\bcudaProfilerInitialize\b/hipProfilerInitialize/g; // see if these are called anywhere.
|
||||
#$aOt += s/\bcudaProfilerInitialize\b/hipProfilerInitialize/g;
|
||||
$ft{'other'} += s/\bcudaProfilerStart\b/hipProfilerStart/g;
|
||||
$ft{'other'} += s/\bcudaProfilerStop\b/hipProfilerStop/g;
|
||||
|
||||
|
||||
|
||||
#--------
|
||||
$countKeywords += m/__global__/;
|
||||
$countKeywords += m/__shared__/;
|
||||
|
||||
#--------
|
||||
# CUDA extern __shared__ syntax
|
||||
# Note these only work if declaration is on a single line.
|
||||
{
|
||||
# match uses ? for <.*> which will be unitialized if this is not present in launch syntax.
|
||||
no warnings qw/uninitialized/;
|
||||
|
||||
my $k = 0;
|
||||
|
||||
# Match extern __shared__ type foo[]; syntax
|
||||
# Replace as HIP_DYNAMIC_SHARED() macro
|
||||
$k += s/extern\s+([\w\(\)]+)?\s*__shared__\s+([\w:<>\s]+)\s+(\w+)\s*\[\s*\]\s*;/HIP_DYNAMIC_SHARED($1 $2, $3)/g;
|
||||
|
||||
# test patterns for the regular expression above:
|
||||
#'extern __shared__ double foo[];'
|
||||
#'extern __shared__ unsigned int foo[];'
|
||||
@@ -458,47 +390,35 @@ while (@ARGV) {
|
||||
#'extern __shared__ blah<T>::type s[];'
|
||||
#'extern __shared__ typename mapper<Float>::type s_data[];'
|
||||
#'extern __attribute__((used)) __shared__ typename mapper<Float>::type s_data[];'
|
||||
|
||||
$ft{'extern_shared'} += $k;
|
||||
}
|
||||
|
||||
#--------
|
||||
# CUDA Launch Syntax
|
||||
# Note these only work if launch is on a single line.
|
||||
|
||||
# Handle the <<numBlocks, blockDim>>> syntax:
|
||||
{
|
||||
# match uses ? for <.*> which will be unitialized if this is not present in launch syntax.
|
||||
no warnings qw/uninitialized/;
|
||||
|
||||
my $k = 0;
|
||||
my $kernelName;
|
||||
|
||||
# Handle the <<numBlocks, blockDim, sharedSize, stream>>> syntax:
|
||||
$k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>([\s*\\]*)\(/hipLaunchKernelGGL(($1$2), dim3($3), dim3($4), $5, $6, /g;
|
||||
$kernelName = $1 if $k;
|
||||
|
||||
# Handle the <<numBlocks, blockDim, sharedSize>>> syntax:
|
||||
$k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>([\s*\\]*)\(/hipLaunchKernelGGL(($1$2), dim3($3), dim3($4), $5, 0, /g;
|
||||
$kernelName = $1 if $k;
|
||||
|
||||
# Handle the <<numBlocks, blockDim>>> syntax:
|
||||
$k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>([\s\\]*)\(/hipLaunchKernelGGL(($1$2), dim3($3), dim3($4), 0, 0, /g;
|
||||
$kernelName = $1 if $k;
|
||||
|
||||
$ft{'kern'} += $k;
|
||||
if ($k) {
|
||||
$Tkernels{$kernelName} ++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
unless ($no_translate_textures) {
|
||||
$ft{'tex'} += s/\bcudaChannelFormatDesc\b/hipChannelFormatDesc/g;
|
||||
$ft{'tex'} += s/\bcudaFilterModePoint\b/hipFilterModePoint/g;
|
||||
$ft{'tex'} += s/\bcudaReadModeElementType\b/hipReadModeElementType/g;
|
||||
|
||||
$ft{'tex'} += s/\bcudaArray\b/hipArray/g;
|
||||
$ft{'tex'} += s/\bcudaCreateChannelDesc\b/hipCreateChannelDesc/g;
|
||||
$ft{'tex'} += s/\bcudaBindTexture\b/hipBindTexture/g;
|
||||
@@ -508,23 +428,15 @@ while (@ARGV) {
|
||||
$ft{'tex'} += s/\bcudaAddressMode/hipAddressMode/g;
|
||||
$ft{'tex'} += s/\bcudaFilterMode/hipFilterMode/g;
|
||||
}
|
||||
|
||||
|
||||
if ($count_conversions) {
|
||||
while (/(\bhip[A-Z]\w+\b)/g) {
|
||||
$convertedTags{$1}++;
|
||||
|
||||
#print STDERR "HIP: $1 : ", $translateTags{$1}, "\n";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# guess that we are in device code , or at least in a file that calls device code.
|
||||
# will almost certainly call one of the coordiante functions - could be fooled by clever macros but usually works:
|
||||
my $hasDeviceCode = $countKeywords + $ft{'coord_func'} + $ft{'math_func'} + $ft{'special_func'};
|
||||
|
||||
|
||||
|
||||
unless ($quiet_warnings) {
|
||||
#print STDERR "Check WARNINGs\n";
|
||||
# copy into array of lines, process line-by-line to show warnings:
|
||||
@@ -532,58 +444,41 @@ while (@ARGV) {
|
||||
my @lines = split /\n/, $_;
|
||||
my $tmp = $_; # copies the whole file, could be a little smarter here...
|
||||
my $line_num = 0;
|
||||
|
||||
foreach (@lines) {
|
||||
|
||||
$line_num ++;
|
||||
|
||||
# remove any whitelisted words:
|
||||
foreach $w (@warn_whitelist) {
|
||||
s/\b$w\b/ZAP/
|
||||
}
|
||||
|
||||
my $tag ;
|
||||
my $tag;
|
||||
if ((/(\bcuda[A-Z]\w+)/) or (/<<<.*>>>/)) {
|
||||
# flag any remaining code that look like cuda API calls, may want to add these to hipify
|
||||
$tag = (defined $1) ? $1 : "Launch";
|
||||
} elsif (/(\bcublas[A-Z]\w+)/) {
|
||||
} elsif (/(\bcublas[A-Z]\w+)/) {
|
||||
$warningsCublas++;
|
||||
$tag = $1;
|
||||
} elsif (/(\bcurand[A-Z]\w+)/) {
|
||||
$tag = $1;
|
||||
} elsif (/(\bcurand[A-Z]\w+)/) {
|
||||
$warningsCurand++;
|
||||
$tag = $1;
|
||||
}
|
||||
|
||||
if (defined $tag) {
|
||||
$tag = $1;
|
||||
}
|
||||
if (defined $tag) {
|
||||
$warnings++;
|
||||
$warningTags{$tag}++;
|
||||
print STDERR " warning: $fileName:#$line_num : $_";
|
||||
print STDERR "\n";
|
||||
}
|
||||
|
||||
$s = warnUnsupportedSpecialFunctions($line_num);
|
||||
$warnings += $s;
|
||||
|
||||
|
||||
}
|
||||
|
||||
$_ = $tmp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#--------
|
||||
# Math libraries
|
||||
# To limit bogus translations, try to make sure we are in a kernel (ft{'builtin'} != 0):
|
||||
if (not $no_translate_builtins and ($hasDeviceCode > 0)) {
|
||||
$ft{'special_func'} += countSupportedSpecialFunctions();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#--------
|
||||
# Print it!
|
||||
# TODO - would like to move this code outside loop but it uses $_ which contains the whole file.
|
||||
@@ -591,33 +486,23 @@ while (@ARGV) {
|
||||
my $apiCalls = $ft{'err'} + $ft{'event'} + $ft{'mem'} + $ft{'stream'} + $ft{'dev'} + $ft{'def'} + $ft{'tex'} + $ft{'other'} + $ft{'math_func'};
|
||||
my $kernStuff = $hasDeviceCode + $ft{'kern'};
|
||||
my $totalCalls = $apiCalls + $kernStuff;
|
||||
|
||||
$is_dos = m/\r\n$/;
|
||||
|
||||
if ($totalCalls and ($countIncludes == 0) and ($kernStuff != 0)) {
|
||||
# If this file makes kernel builtin calls, and does not include the cuda_runtime.h,
|
||||
# then add an #include to match "magic" includes provided by NVCC.
|
||||
# This logic can miss cases where cuda_runtime.h is included by another include file.
|
||||
# This logic can miss cases where cuda_runtime.h is included by another include file.
|
||||
print $OUTFILE '#include "hip/hip_runtime.h"' . ($is_dos ? "\r\n" : "\n");
|
||||
}
|
||||
print $OUTFILE "$_";
|
||||
}
|
||||
|
||||
$lineCount = $_ =~ tr/\n//;
|
||||
|
||||
}
|
||||
|
||||
my $totalConverted = totalStats(\%ft);
|
||||
|
||||
#printf "TOTAL-CONV=%d\n", $totalConverted;
|
||||
|
||||
|
||||
if (($totalConverted+$warnings) and $print_stats) {
|
||||
printStats("info: converted", \@statNames, \%ft, $warnings, $lineCount);
|
||||
print STDERR " in '$fileName'\n";
|
||||
}
|
||||
|
||||
|
||||
# Update totals for all files:
|
||||
addStats(\%tt, \%ft);
|
||||
$Twarnings += $warnings;
|
||||
@@ -626,41 +511,31 @@ while (@ARGV) {
|
||||
$TwarningTags{$key} += $warningTags{$key};
|
||||
}
|
||||
}
|
||||
|
||||
#-- Print total stats for all files processed:
|
||||
if ($print_stats and ($fileCount > 1)) {
|
||||
print STDERR "\n";
|
||||
printStats("info: TOTAL-converted", \@statNames, \%tt, $Twarnings, $TlineCount);
|
||||
print STDERR "\n";
|
||||
|
||||
foreach my $key (sort { $TwarningTags{$b} <=> $TwarningTags{$a} } keys %TwarningTags) {
|
||||
printf STDERR " warning: unconverted %s : %d\n", $key, $TwarningTags{$key};
|
||||
}
|
||||
|
||||
my $kernelCnt = keys %Tkernels;
|
||||
printf STDERR " kernels (%d total) : ", $kernelCnt;
|
||||
foreach my $key (sort { $Tkernels{$b} <=> $Tkernels{$a} } keys %Tkernels) {
|
||||
printf STDERR " %s(%d)", $key, $Tkernels{$key};
|
||||
}
|
||||
print STDERR "\n";
|
||||
|
||||
|
||||
print STDERR "\n";
|
||||
}
|
||||
|
||||
if ($count_conversions) {
|
||||
foreach my $key (sort { $convertedTags{$b} <=> $convertedTags{$a} } keys %convertedTags) {
|
||||
printf STDERR " %s %d\n", $key, $convertedTags{$key};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
sub countSupportedSpecialFunctions
|
||||
{
|
||||
my $m = 0;
|
||||
|
||||
#supported special functions:
|
||||
foreach $func (
|
||||
# Synchronization:
|
||||
@@ -669,53 +544,39 @@ sub countSupportedSpecialFunctions
|
||||
{
|
||||
# match math at the beginning of a word, but not if it already has a namespace qualifier ('::') :
|
||||
$m += m/[:]?[:]?\b($func)\b(\w*\()/g;
|
||||
|
||||
}
|
||||
|
||||
return $m;
|
||||
}
|
||||
|
||||
sub warnUnsupportedSpecialFunctions
|
||||
{
|
||||
my $line_num = shift;
|
||||
|
||||
my $m = 0;
|
||||
|
||||
|
||||
|
||||
foreach $func (
|
||||
# memory fence:
|
||||
"__threadfence_block",
|
||||
"__threadfence",
|
||||
"__threadfence_system",
|
||||
|
||||
# Synchronization:
|
||||
"__syncthreads_count",
|
||||
"__syncthreads_and",
|
||||
"__syncthreads_or",
|
||||
|
||||
# Read-only cache function:
|
||||
"__ldg",
|
||||
|
||||
# Cross-lane and warp-vote instructions:
|
||||
#"__all",
|
||||
#"__any",
|
||||
#"__ballot",
|
||||
|
||||
#"__popc",
|
||||
#"__clz",
|
||||
|
||||
#"__shfl",
|
||||
#"__shfl_up",
|
||||
#"__shfl_down",
|
||||
#"__shfl_xor",
|
||||
|
||||
"__prof_trigger",
|
||||
|
||||
# too popular, and we can't tell if we are in device or host code.
|
||||
#"assert",
|
||||
#"printf",
|
||||
|
||||
#"malloc",
|
||||
#"free",
|
||||
#"memset",
|
||||
@@ -728,8 +589,6 @@ sub warnUnsupportedSpecialFunctions
|
||||
$m += $mt;
|
||||
print STDERR " warning: $fileName:#$line_num : unsupported device function : $_\n";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return $m;
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user