[HIPIFY][perl] Generate transformKernelLaunch
+ Update hipify-perl accordingly + Minor refactoring
This commit is contained in:
+173
-171
@@ -26,6 +26,7 @@
|
||||
|
||||
use Getopt::Long;
|
||||
my $whitelist = "";
|
||||
my $fileName = "";
|
||||
|
||||
GetOptions(
|
||||
"examine" => \$examine # Combines -no-output and -print-stats options.
|
||||
@@ -1602,184 +1603,45 @@ sub transformExternShared {
|
||||
$ft{ 'extern_shared' } += $k;
|
||||
}
|
||||
|
||||
# Count of transforms in all files
|
||||
my %tt;
|
||||
clearStats(\%tt, \@statNames);
|
||||
$Twarnings = 0;
|
||||
$TlineCount = 0;
|
||||
my %TwarningTags;
|
||||
my %Tkernels;
|
||||
my $fileCount = @ARGV;
|
||||
my $fileName = "";
|
||||
# CUDA Kernel Launch Syntax
|
||||
sub transformKernelLaunch {
|
||||
my $TkernRef = @_;
|
||||
no warnings qw/uninitialized/;
|
||||
my $k = 0;
|
||||
|
||||
while (@ARGV) {
|
||||
$fileName=shift (@ARGV);
|
||||
if ($inplace) {
|
||||
my $file_prehip = "$fileName" . ".prehip";
|
||||
my $infile;
|
||||
my $outfile;
|
||||
if (-e $file_prehip) {
|
||||
$infile = $file_prehip;
|
||||
$outfile = $fileName;
|
||||
} else {
|
||||
system ("cp $fileName $file_prehip");
|
||||
$infile = $file_prehip;
|
||||
$outfile = $fileName;
|
||||
}
|
||||
open(INFILE,"<", $infile) or die "error: could not open $infile";
|
||||
open(OUTFILE,">", $outfile) or die "error: could not open $outfile";
|
||||
$OUTFILE = OUTFILE;
|
||||
} else {
|
||||
open(INFILE,"<", $fileName) or die "error: could not open $fileName";
|
||||
$OUTFILE = STDOUT;
|
||||
}
|
||||
# Note : \b is used in perl to indicate the start of a word
|
||||
# Count of transforms in this file
|
||||
my %ft;
|
||||
clearStats(\%ft, \@statNames);
|
||||
my $countIncludes = 0;
|
||||
my $countKeywords = 0;
|
||||
my $warnings = 0;
|
||||
my %warningTags;
|
||||
my $lineCount = 0;
|
||||
undef $/;
|
||||
# Read whole file at once, so we can match newlines
|
||||
while (<INFILE>)
|
||||
{
|
||||
simpleSubstitutions();
|
||||
# Handle the kern<...><<<Dg, Db, Ns, S>>>() syntax with empty args:
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6)/g;
|
||||
# Handle the kern<<<Dg, Db, Ns, S>>>() syntax with empty args:
|
||||
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5)/g;
|
||||
|
||||
$countKeywords += m/__global__/;
|
||||
$countKeywords += m/__shared__/;
|
||||
# Handle the kern<...><<<Dg, Db, Ns, S>>>(...) syntax with non-empty args:
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6, /g;
|
||||
# Handle the kern<<<Dg, Db, Ns, S>>>(...) syntax with non-empty args:
|
||||
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5, /g;
|
||||
|
||||
transformExternShared();
|
||||
# Handle the kern<...><<<Dg, Db, Ns>>>() syntax with empty args:
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0)/g;
|
||||
# Handle the kern<<<Dg, Db, Ns>>>() syntax with empty args:
|
||||
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0)/g;
|
||||
|
||||
# CUDA Launch Syntax. Note these only work if launch is on a single line.
|
||||
{
|
||||
# Match uses ? for <.*> which will be unitialized if this is not present in launch syntax
|
||||
no warnings qw/uninitialized/;
|
||||
my $k = 0;
|
||||
# Handle the kern<...><<Dg, Db, Ns>>>(...) syntax with non-empty args:
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0, /g;
|
||||
# Handle the kern<<<Dg, Db, Ns>>>(...) syntax with non-empty args:
|
||||
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0, /g;
|
||||
|
||||
# Handle the kern<...><<<numBlocks, blockDim, sharedSize, stream>>> syntax with empty args:
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6)/g;
|
||||
# Handle the kern<<<numBlocks, blockDim, sharedSize, stream>>> syntax with empty args:
|
||||
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5)/g;
|
||||
# Handle the kern<...><<<Dg, Db>>>() syntax with empty args:
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0)/g;
|
||||
# Handle the kern<<<Dg, Db>>>() syntax with empty args:
|
||||
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0)/g;
|
||||
|
||||
# Handle the kern<...><<<numBlocks, blockDim, sharedSize, stream>>> syntax with non-empty args:
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6, /g;
|
||||
# Handle the kern<<<numBlocks, blockDim, sharedSize, stream>>> syntax with non-empty args:
|
||||
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5, /g;
|
||||
# Handle the kern<...><<<Dg, Db>>>(...) syntax with non-empty args:
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0, /g;
|
||||
# Handle the kern<<<Dg, Db>>>(...) syntax with non-empty args:
|
||||
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0, /g;
|
||||
|
||||
# Handle the kern<...><<<numBlocks, blockDim, sharedSize>>> syntax with empty args:
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0)/g;
|
||||
# Handle the kern<<<numBlocks, blockDim, sharedSize>>> syntax with empty args:
|
||||
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0)/g;
|
||||
|
||||
# Handle the kern<...><<<numBlocks, blockDim, sharedSize>>> syntax with non-empty args:
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0, /g;
|
||||
# Handle the kern<<<numBlocks, blockDim, sharedSize>>> syntax with non-empty args:
|
||||
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0, /g;
|
||||
|
||||
# Handle the kern<...><<<numBlocks, blockDim>>> syntax with empty args:
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0)/g;
|
||||
# Handle the kern<<<numBlocks, blockDim>>> syntax with empty args:
|
||||
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0)/g;
|
||||
|
||||
# Handle the kern<...><<<numBlocks, blockDim>>> syntax with non-empty args:
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0, /g;
|
||||
# Handle the kern<<<numBlocks, blockDim>>> syntax with non-empty args:
|
||||
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0, /g;
|
||||
|
||||
if ($k) {
|
||||
$ft{'kernel_launch'} += $k;
|
||||
$Tkernels{$1} ++;
|
||||
}
|
||||
}
|
||||
if ($print_stats) {
|
||||
while (/(\bhip[A-Z]\w+\b)/g) {
|
||||
$convertedTags{$1}++;
|
||||
}
|
||||
}
|
||||
my $hasDeviceCode = $countKeywords + $ft{'device_function'};
|
||||
unless ($quiet_warnings) {
|
||||
# Copy into array of lines, process line-by-line to show warnings
|
||||
if ($hasDeviceCode or (/\bcu/) or (/\bCU_/) or (/\bCUDA_/) or (/<<<.*>>>/)) {
|
||||
my @lines = split /\n/, $_;
|
||||
# Copy the whole file
|
||||
my $tmp = $_;
|
||||
my $line_num = 0;
|
||||
foreach (@lines) {
|
||||
$line_num ++;
|
||||
# Remove any whitelisted words
|
||||
foreach $w (@whitelist) {
|
||||
s/\b$w\b/ZAP/
|
||||
}
|
||||
my $tag;
|
||||
if ((/(\bcuda[A-Z]\w+)/) or (/<<<.*>>>/)) {
|
||||
# Flag any remaining code that look like cuda API calls: may want to add these to hipify
|
||||
$tag = (defined $1) ? $1 : "Launch";
|
||||
}
|
||||
if (defined $tag) {
|
||||
$warnings++;
|
||||
$warningTags{$tag}++;
|
||||
print STDERR " warning: $fileName:#$line_num : $_";
|
||||
print STDERR "\n";
|
||||
}
|
||||
$s = warnUnsupportedDeviceFunctions($line_num);
|
||||
$warnings += $s;
|
||||
}
|
||||
$_ = $tmp;
|
||||
}
|
||||
}
|
||||
if ($hasDeviceCode > 0) {
|
||||
$ft{'device_function'} += countSupportedDeviceFunctions();
|
||||
}
|
||||
transformHostFunctions();
|
||||
# TODO: would like to move this code outside loop but it uses $_ which contains the whole file
|
||||
unless ($no_output) {
|
||||
my $apiCalls = $ft{'error'} + $ft{'init'} + $ft{'version'} + $ft{'device'} + $ft{'context'} + $ft{'module'} + $ft{'memory'} + $ft{'addressing'} + $ft{'stream'} + $ft{'event'} + $ft{'external_resource_interop'} + $ft{'stream_memory'} + $ft{'execution'} + $ft{'graph'} + $ft{'occupancy'} + $ft{'texture'} + $ft{'surface'} + $ft{'peer'} + $ft{'graphics'} + $ft{'profiler'} + $ft{'openGL'} + $ft{'D3D9'} + $ft{'D3D10'} + $ft{'D3D11'} + $ft{'VDPAU'} + $ft{'EGL'} + $ft{'thread'} + $ft{'complex'} + $ft{'library'} + $ft{'device_library'} + $ft{'include'} + $ft{'include_cuda_main_header'} + $ft{'type'} + $ft{'literal'} + $ft{'numeric_literal'} + $ft{'define'};
|
||||
my $kernStuff = $hasDeviceCode + $ft{'kernel_launch'} + $ft{'device_function'};
|
||||
my $totalCalls = $apiCalls + $kernStuff;
|
||||
$is_dos = m/\r\n$/;
|
||||
if ($totalCalls and ($countIncludes == 0) and ($kernStuff != 0)) {
|
||||
# TODO: implement hipify-clang's logic with header files AMAP
|
||||
print $OUTFILE '#include "hip/hip_runtime.h"' . ($is_dos ? "\r\n" : "\n");
|
||||
}
|
||||
print $OUTFILE "$_";
|
||||
}
|
||||
$lineCount = $_ =~ tr/\n//;
|
||||
}
|
||||
my $totalConverted = totalStats(\%ft);
|
||||
if (($totalConverted+$warnings) and $print_stats) {
|
||||
printStats(" info: converted", \@statNames, \%ft, $warnings, $lineCount);
|
||||
print STDERR " in '$fileName'\n";
|
||||
}
|
||||
# Update totals for all files
|
||||
addStats(\%tt, \%ft);
|
||||
$Twarnings += $warnings;
|
||||
$TlineCount += $lineCount;
|
||||
foreach $key (keys %warningTags) {
|
||||
$TwarningTags{$key} += $warningTags{$key};
|
||||
}
|
||||
}
|
||||
# Print total stats for all files processed:
|
||||
if ($print_stats and ($fileCount > 1)) {
|
||||
print STDERR "\n";
|
||||
printStats(" info: TOTAL-converted", \@statNames, \%tt, $Twarnings, $TlineCount);
|
||||
print STDERR "\n";
|
||||
foreach my $key (sort { $TwarningTags{$b} <=> $TwarningTags{$a} } keys %TwarningTags) {
|
||||
printf STDERR " warning: unconverted %s : %d\n", $key, $TwarningTags{$key};
|
||||
}
|
||||
my $kernelCnt = keys %Tkernels;
|
||||
printf STDERR " kernels (%d total) : ", $kernelCnt;
|
||||
foreach my $key (sort { $Tkernels{$b} <=> $Tkernels{$a} } keys %Tkernels) {
|
||||
printf STDERR " %s(%d)", $key, $Tkernels{$key};
|
||||
}
|
||||
print STDERR "\n";
|
||||
print STDERR "\n";
|
||||
}
|
||||
if ($print_stats) {
|
||||
foreach my $key (sort { $convertedTags{$b} <=> $convertedTags{$a} } keys %convertedTags) {
|
||||
printf STDERR " %s %d\n", $key, $convertedTags{$key};
|
||||
if ($k) {
|
||||
$ft{'kernel_launch'} += $k;
|
||||
@$TkernRef{$1} ++;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2420,3 +2282,143 @@ sub warnUnsupportedDeviceFunctions {
|
||||
}
|
||||
return $m;
|
||||
}
|
||||
|
||||
# Count of transforms in all files
|
||||
my %tt;
|
||||
clearStats(\%tt, \@statNames);
|
||||
$Twarnings = 0;
|
||||
$TlineCount = 0;
|
||||
my %TwarningTags;
|
||||
my %Tkernels;
|
||||
my $fileCount = @ARGV;
|
||||
|
||||
while (@ARGV) {
|
||||
$fileName=shift (@ARGV);
|
||||
if ($inplace) {
|
||||
my $file_prehip = "$fileName" . ".prehip";
|
||||
my $infile;
|
||||
my $outfile;
|
||||
if (-e $file_prehip) {
|
||||
$infile = $file_prehip;
|
||||
$outfile = $fileName;
|
||||
} else {
|
||||
system ("cp $fileName $file_prehip");
|
||||
$infile = $file_prehip;
|
||||
$outfile = $fileName;
|
||||
}
|
||||
open(INFILE,"<", $infile) or die "error: could not open $infile";
|
||||
open(OUTFILE,">", $outfile) or die "error: could not open $outfile";
|
||||
$OUTFILE = OUTFILE;
|
||||
} else {
|
||||
open(INFILE,"<", $fileName) or die "error: could not open $fileName";
|
||||
$OUTFILE = STDOUT;
|
||||
}
|
||||
# Note : \b is used in perl to indicate the start of a word
|
||||
# Count of transforms in this file
|
||||
my %ft;
|
||||
clearStats(\%ft, \@statNames);
|
||||
my $countIncludes = 0;
|
||||
my $countKeywords = 0;
|
||||
my $warnings = 0;
|
||||
my %warningTags;
|
||||
my $lineCount = 0;
|
||||
undef $/;
|
||||
# Read whole file at once, so we can match newlines
|
||||
while (<INFILE>)
|
||||
{
|
||||
simpleSubstitutions();
|
||||
|
||||
$countKeywords += m/__global__/;
|
||||
$countKeywords += m/__shared__/;
|
||||
|
||||
transformExternShared();
|
||||
transformKernelLaunch(\%Tkernels);
|
||||
|
||||
if ($print_stats) {
|
||||
while (/(\bhip[A-Z]\w+\b)/g) {
|
||||
$convertedTags{$1}++;
|
||||
}
|
||||
}
|
||||
my $hasDeviceCode = $countKeywords + $ft{'device_function'};
|
||||
unless ($quiet_warnings) {
|
||||
# Copy into array of lines, process line-by-line to show warnings
|
||||
if ($hasDeviceCode or (/\bcu/) or (/\bCU_/) or (/\bCUDA_/) or (/<<<.*>>>/)) {
|
||||
my @lines = split /\n/, $_;
|
||||
# Copy the whole file
|
||||
my $tmp = $_;
|
||||
my $line_num = 0;
|
||||
foreach (@lines) {
|
||||
$line_num ++;
|
||||
# Remove any whitelisted words
|
||||
foreach $w (@whitelist) {
|
||||
s/\b$w\b/ZAP/
|
||||
}
|
||||
my $tag;
|
||||
if ((/(\bcuda[A-Z]\w+)/) or (/<<<.*>>>/)) {
|
||||
# Flag any remaining code that look like cuda API calls: may want to add these to hipify
|
||||
$tag = (defined $1) ? $1 : "Launch";
|
||||
}
|
||||
if (defined $tag) {
|
||||
$warnings++;
|
||||
$warningTags{$tag}++;
|
||||
print STDERR " warning: $fileName:#$line_num : $_";
|
||||
print STDERR "\n";
|
||||
}
|
||||
$s = warnUnsupportedDeviceFunctions($line_num);
|
||||
$warnings += $s;
|
||||
}
|
||||
$_ = $tmp;
|
||||
}
|
||||
}
|
||||
if ($hasDeviceCode > 0) {
|
||||
$ft{'device_function'} += countSupportedDeviceFunctions();
|
||||
}
|
||||
transformHostFunctions();
|
||||
# TODO: would like to move this code outside loop but it uses $_ which contains the whole file
|
||||
unless ($no_output) {
|
||||
my $apiCalls = $ft{'error'} + $ft{'init'} + $ft{'version'} + $ft{'device'} + $ft{'context'} + $ft{'module'} + $ft{'memory'} + $ft{'addressing'} + $ft{'stream'} + $ft{'event'} + $ft{'external_resource_interop'} + $ft{'stream_memory'} + $ft{'execution'} + $ft{'graph'} + $ft{'occupancy'} + $ft{'texture'} + $ft{'surface'} + $ft{'peer'} + $ft{'graphics'} + $ft{'profiler'} + $ft{'openGL'} + $ft{'D3D9'} + $ft{'D3D10'} + $ft{'D3D11'} + $ft{'VDPAU'} + $ft{'EGL'} + $ft{'thread'} + $ft{'complex'} + $ft{'library'} + $ft{'device_library'} + $ft{'include'} + $ft{'include_cuda_main_header'} + $ft{'type'} + $ft{'literal'} + $ft{'numeric_literal'} + $ft{'define'};
|
||||
my $kernStuff = $hasDeviceCode + $ft{'kernel_launch'} + $ft{'device_function'};
|
||||
my $totalCalls = $apiCalls + $kernStuff;
|
||||
$is_dos = m/\r\n$/;
|
||||
if ($totalCalls and ($countIncludes == 0) and ($kernStuff != 0)) {
|
||||
# TODO: implement hipify-clang's logic with header files AMAP
|
||||
print $OUTFILE '#include "hip/hip_runtime.h"' . ($is_dos ? "\r\n" : "\n");
|
||||
}
|
||||
print $OUTFILE "$_";
|
||||
}
|
||||
$lineCount = $_ =~ tr/\n//;
|
||||
}
|
||||
my $totalConverted = totalStats(\%ft);
|
||||
if (($totalConverted+$warnings) and $print_stats) {
|
||||
printStats(" info: converted", \@statNames, \%ft, $warnings, $lineCount);
|
||||
print STDERR " in '$fileName'\n";
|
||||
}
|
||||
# Update totals for all files
|
||||
addStats(\%tt, \%ft);
|
||||
$Twarnings += $warnings;
|
||||
$TlineCount += $lineCount;
|
||||
foreach $key (keys %warningTags) {
|
||||
$TwarningTags{$key} += $warningTags{$key};
|
||||
}
|
||||
}
|
||||
# Print total stats for all files processed:
|
||||
if ($print_stats and ($fileCount > 1)) {
|
||||
print STDERR "\n";
|
||||
printStats(" info: TOTAL-converted", \@statNames, \%tt, $Twarnings, $TlineCount);
|
||||
print STDERR "\n";
|
||||
foreach my $key (sort { $TwarningTags{$b} <=> $TwarningTags{$a} } keys %TwarningTags) {
|
||||
printf STDERR " warning: unconverted %s : %d\n", $key, $TwarningTags{$key};
|
||||
}
|
||||
my $kernelCnt = keys %Tkernels;
|
||||
printf STDERR " kernels (%d total) : ", $kernelCnt;
|
||||
foreach my $key (sort { $Tkernels{$b} <=> $Tkernels{$a} } keys %Tkernels) {
|
||||
printf STDERR " %s(%d)", $key, $Tkernels{$key};
|
||||
}
|
||||
print STDERR "\n";
|
||||
print STDERR "\n";
|
||||
}
|
||||
if ($print_stats) {
|
||||
foreach my $key (sort { $convertedTags{$b} <=> $convertedTags{$a} } keys %convertedTags) {
|
||||
printf STDERR " %s %d\n", $key, $convertedTags{$key};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -64,10 +64,11 @@ namespace perl {
|
||||
const std::string triple_tab = double_tab + tab;
|
||||
const std::string sSub = "sub";
|
||||
const std::string sReturn_0 = "return 0;\n";
|
||||
const std::string sReturn_m = "return $m;\n";
|
||||
const std::string sReturn_k = "return $k;\n";
|
||||
const std::string sForeach = "foreach $func (\n";
|
||||
const std::string sMy = "my $m = 0;\n";
|
||||
|
||||
const std::string sMy_k = "my $k = 0;";
|
||||
const std::string sNoWarns = "no warnings qw/uninitialized/;";
|
||||
|
||||
const std::string sCudaDevice = "cudaDevice";
|
||||
const std::string sCudaDeviceId = "cudaDeviceId";
|
||||
const std::string sCudaDevices = "cudaDevices";
|
||||
@@ -182,17 +183,59 @@ namespace perl {
|
||||
void generateExternShared(std::unique_ptr<std::ostream>& streamPtr) {
|
||||
*streamPtr.get() << std::endl << "# CUDA extern __shared__ syntax replace with HIP_DYNAMIC_SHARED() macro" << std::endl;
|
||||
*streamPtr.get() << sSub << " transformExternShared" << " {" << std::endl;
|
||||
*streamPtr.get() << tab << "no warnings qw/uninitialized/;" << std::endl;
|
||||
*streamPtr.get() << tab << "my $k = 0;" << std::endl;
|
||||
*streamPtr.get() << tab << sNoWarns << std::endl;
|
||||
*streamPtr.get() << tab << sMy_k << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/extern\\s+([\\w\\(\\)]+)?\\s*__shared__\\s+([\\w:<>\\s]+)\\s+(\\w+)\\s*\\[\\s*\\]\\s*;/HIP_DYNAMIC_SHARED($1 $2, $3)/g;" << std::endl;
|
||||
*streamPtr.get() << tab << "$ft{ 'extern_shared' } += $k;" << std::endl << "}" << std::endl;
|
||||
}
|
||||
|
||||
void generateKernelLaunch(std::unique_ptr<std::ostream>& streamPtr) {
|
||||
*streamPtr.get() << std::endl << "# CUDA Kernel Launch Syntax" << std::endl;
|
||||
*streamPtr.get() << sSub << " transformKernelLaunch" << " {" << std::endl;
|
||||
*streamPtr.get() << tab << "my $TkernRef = @_;" << std::endl;
|
||||
*streamPtr.get() << tab << sNoWarns << std::endl;
|
||||
*streamPtr.get() << tab << sMy_k << std::endl << std::endl;
|
||||
|
||||
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db, Ns, S>>>() syntax with empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6)/g;" << std::endl;
|
||||
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db, Ns, S>>>() syntax with empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5)/g;" << std::endl << std::endl;
|
||||
|
||||
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db, Ns, S>>>(...) syntax with non-empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6, /g;" << std::endl;
|
||||
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db, Ns, S>>>(...) syntax with non-empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5, /g;" << std::endl << std::endl;
|
||||
|
||||
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db, Ns>>>() syntax with empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0)/g;" << std::endl;
|
||||
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db, Ns>>>() syntax with empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0)/g;" << std::endl << std::endl;
|
||||
|
||||
*streamPtr.get() << tab << "# Handle the kern<...><<Dg, Db, Ns>>>(...) syntax with non-empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0, /g;" << std::endl;
|
||||
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db, Ns>>>(...) syntax with non-empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0, /g;" << std::endl << std::endl;
|
||||
|
||||
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db>>>() syntax with empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0)/g;" << std::endl;
|
||||
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db>>>() syntax with empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0)/g;" << std::endl << std::endl;
|
||||
|
||||
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db>>>(...) syntax with non-empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0, /g;" << std::endl;
|
||||
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db>>>(...) syntax with non-empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0, /g;" << std::endl << std::endl;
|
||||
|
||||
*streamPtr.get() << tab << "if ($k) {" << std::endl;
|
||||
*streamPtr.get() << double_tab << "$ft{'kernel_launch'} += $k;" << std::endl;
|
||||
*streamPtr.get() << double_tab << "@$TkernRef{$1} ++;" << std::endl << tab << "}" << std::endl << "}" << std::endl;
|
||||
}
|
||||
|
||||
void generateHostFunctions(std::unique_ptr<std::ostream>& streamPtr) {
|
||||
*streamPtr.get() << std::endl << sSub << " transformHostFunctions" << " {" << std::endl << tab << sMy;
|
||||
*streamPtr.get() << std::endl << sSub << " transformHostFunctions" << " {" << std::endl << tab << sMy_k << std::endl;
|
||||
std::set<std::string> &funcSet = DeviceSymbolFunctions0;
|
||||
const std::string s0 = "$m += s/(?<!\\/\\/ CHECK: )($func)\\s*\\(\\s*([^,]+)\\s*,/$func\\(";
|
||||
const std::string s1 = "$m += s/(?<!\\/\\/ CHECK: )($func)\\s*\\(\\s*([^,]+)\\s*,\\s*([^,\\)]+)\\s*(,\\s*|\\))\\s*/$func\\($2, ";
|
||||
const std::string s0 = "$k += s/(?<!\\/\\/ CHECK: )($func)\\s*\\(\\s*([^,]+)\\s*,/$func\\(";
|
||||
const std::string s1 = "$k += s/(?<!\\/\\/ CHECK: )($func)\\s*\\(\\s*([^,]+)\\s*,\\s*([^,\\)]+)\\s*(,\\s*|\\))\\s*/$func\\($2, ";
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
*streamPtr.get() << tab + sForeach;
|
||||
switch (i) {
|
||||
@@ -223,7 +266,7 @@ namespace perl {
|
||||
}
|
||||
*streamPtr.get() << tab << "}" << std::endl;
|
||||
}
|
||||
*streamPtr.get() << tab << sReturn_m << "}" << std::endl;
|
||||
*streamPtr.get() << tab << sReturn_k << "}" << std::endl;
|
||||
}
|
||||
|
||||
void generateDeviceFunctions(std::unique_ptr<std::ostream>& streamPtr) {
|
||||
@@ -240,7 +283,7 @@ namespace perl {
|
||||
std::stringstream subCountSupported;
|
||||
std::stringstream subWarnUnsupported;
|
||||
std::stringstream subCommon;
|
||||
std::string sCommon = tab + sMy + tab + sForeach;
|
||||
std::string sCommon = tab + sMy_k + "\n" + tab + sForeach;
|
||||
subCountSupported << std::endl << sSub << " countSupportedDeviceFunctions" << " {" << std::endl << (countSupported ? sCommon : tab + sReturn_0);
|
||||
subWarnUnsupported << std::endl << sSub << " warnUnsupportedDeviceFunctions" << " {" << std::endl << (countUnsupported ? tab + "my $line_num = shift;\n" + sCommon : tab + sReturn_0);
|
||||
if (countSupported) {
|
||||
@@ -256,7 +299,7 @@ namespace perl {
|
||||
subCommon << double_tab << "my $mt_namespace = m/(\\w+)::($func)\\s*\\(\\s*.*\\s*\\)/g;" << std::endl;
|
||||
subCommon << double_tab << "my $mt = m/($func)\\s*\\(\\s*.*\\s*\\)/g;" << std::endl;
|
||||
subCommon << double_tab << "if ($mt && !$mt_namespace) {" << std::endl;
|
||||
subCommon << triple_tab << "$m += $mt;" << std::endl;
|
||||
subCommon << triple_tab << "$k += $mt;" << std::endl;
|
||||
}
|
||||
if (countSupported) {
|
||||
subCountSupported << subCommon.str();
|
||||
@@ -266,7 +309,7 @@ namespace perl {
|
||||
subWarnUnsupported << triple_tab << "print STDERR \" warning: $fileName:$line_num: unsupported device function \\\"$func\\\": $_\\n\";" << std::endl;
|
||||
}
|
||||
if (countSupported || countUnsupported) {
|
||||
sCommon = double_tab + "}\n" + tab + "}\n" + tab + sReturn_m;
|
||||
sCommon = double_tab + "}\n" + tab + "}\n" + tab + sReturn_k;
|
||||
}
|
||||
if (countSupported) subCountSupported << sCommon;
|
||||
if (countUnsupported) subWarnUnsupported << sCommon;
|
||||
@@ -305,6 +348,7 @@ namespace perl {
|
||||
generateStatFunctions(streamPtr);
|
||||
generateSimpleSubstitutions(streamPtr);
|
||||
generateExternShared(streamPtr);
|
||||
generateKernelLaunch(streamPtr);
|
||||
generateHostFunctions(streamPtr);
|
||||
generateDeviceFunctions(streamPtr);
|
||||
streamPtr.get()->flush();
|
||||
|
||||
Fai riferimento in un nuovo problema
Block a user