[HIPIFY][perl] Generate transformKernelLaunch

+ Update hipify-perl accordingly
+ Minor refactoring


[ROCm/hip commit: c8f73ea491]
This commit is contained in:
Evgeny Mankov
2019-10-01 11:17:27 +03:00
parent ef49c4f789
commit 34e20b8568
2 changed files with 229 additions and 183 deletions
+173 -171
View File
@@ -26,6 +26,7 @@
use Getopt::Long;
my $whitelist = "";
my $fileName = "";
GetOptions(
"examine" => \$examine # Combines -no-output and -print-stats options.
@@ -1602,184 +1603,45 @@ sub transformExternShared {
$ft{ 'extern_shared' } += $k;
}
# Count of transforms in all files
my %tt;
clearStats(\%tt, \@statNames);
$Twarnings = 0;
$TlineCount = 0;
my %TwarningTags;
my %Tkernels;
my $fileCount = @ARGV;
my $fileName = "";
# CUDA Kernel Launch Syntax
sub transformKernelLaunch {
my $TkernRef = @_;
no warnings qw/uninitialized/;
my $k = 0;
while (@ARGV) {
$fileName=shift (@ARGV);
if ($inplace) {
my $file_prehip = "$fileName" . ".prehip";
my $infile;
my $outfile;
if (-e $file_prehip) {
$infile = $file_prehip;
$outfile = $fileName;
} else {
system ("cp $fileName $file_prehip");
$infile = $file_prehip;
$outfile = $fileName;
}
open(INFILE,"<", $infile) or die "error: could not open $infile";
open(OUTFILE,">", $outfile) or die "error: could not open $outfile";
$OUTFILE = OUTFILE;
} else {
open(INFILE,"<", $fileName) or die "error: could not open $fileName";
$OUTFILE = STDOUT;
}
# Note : \b is used in perl to indicate the start of a word
# Count of transforms in this file
my %ft;
clearStats(\%ft, \@statNames);
my $countIncludes = 0;
my $countKeywords = 0;
my $warnings = 0;
my %warningTags;
my $lineCount = 0;
undef $/;
# Read whole file at once, so we can match newlines
while (<INFILE>)
{
simpleSubstitutions();
# Handle the kern<...><<<Dg, Db, Ns, S>>>() syntax with empty args:
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6)/g;
# Handle the kern<<<Dg, Db, Ns, S>>>() syntax with empty args:
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5)/g;
$countKeywords += m/__global__/;
$countKeywords += m/__shared__/;
# Handle the kern<...><<<Dg, Db, Ns, S>>>(...) syntax with non-empty args:
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6, /g;
# Handle the kern<<<Dg, Db, Ns, S>>>(...) syntax with non-empty args:
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5, /g;
transformExternShared();
# Handle the kern<...><<<Dg, Db, Ns>>>() syntax with empty args:
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0)/g;
# Handle the kern<<<Dg, Db, Ns>>>() syntax with empty args:
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0)/g;
# CUDA Launch Syntax. Note these only work if launch is on a single line.
{
# Match uses ? for <.*> which will be unitialized if this is not present in launch syntax
no warnings qw/uninitialized/;
my $k = 0;
# Handle the kern<...><<Dg, Db, Ns>>>(...) syntax with non-empty args:
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0, /g;
# Handle the kern<<<Dg, Db, Ns>>>(...) syntax with non-empty args:
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0, /g;
# Handle the kern<...><<<numBlocks, blockDim, sharedSize, stream>>> syntax with empty args:
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6)/g;
# Handle the kern<<<numBlocks, blockDim, sharedSize, stream>>> syntax with empty args:
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5)/g;
# Handle the kern<...><<<Dg, Db>>>() syntax with empty args:
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0)/g;
# Handle the kern<<<Dg, Db>>>() syntax with empty args:
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0)/g;
# Handle the kern<...><<<numBlocks, blockDim, sharedSize, stream>>> syntax with non-empty args:
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6, /g;
# Handle the kern<<<numBlocks, blockDim, sharedSize, stream>>> syntax with non-empty args:
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5, /g;
# Handle the kern<...><<<Dg, Db>>>(...) syntax with non-empty args:
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0, /g;
# Handle the kern<<<Dg, Db>>>(...) syntax with non-empty args:
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0, /g;
# Handle the kern<...><<<numBlocks, blockDim, sharedSize>>> syntax with empty args:
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0)/g;
# Handle the kern<<<numBlocks, blockDim, sharedSize>>> syntax with empty args:
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0)/g;
# Handle the kern<...><<<numBlocks, blockDim, sharedSize>>> syntax with non-empty args:
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0, /g;
# Handle the kern<<<numBlocks, blockDim, sharedSize>>> syntax with non-empty args:
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0, /g;
# Handle the kern<...><<<numBlocks, blockDim>>> syntax with empty args:
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0)/g;
# Handle the kern<<<numBlocks, blockDim>>> syntax with empty args:
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0)/g;
# Handle the kern<...><<<numBlocks, blockDim>>> syntax with non-empty args:
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0, /g;
# Handle the kern<<<numBlocks, blockDim>>> syntax with non-empty args:
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0, /g;
if ($k) {
$ft{'kernel_launch'} += $k;
$Tkernels{$1} ++;
}
}
if ($print_stats) {
while (/(\bhip[A-Z]\w+\b)/g) {
$convertedTags{$1}++;
}
}
my $hasDeviceCode = $countKeywords + $ft{'device_function'};
unless ($quiet_warnings) {
# Copy into array of lines, process line-by-line to show warnings
if ($hasDeviceCode or (/\bcu/) or (/\bCU_/) or (/\bCUDA_/) or (/<<<.*>>>/)) {
my @lines = split /\n/, $_;
# Copy the whole file
my $tmp = $_;
my $line_num = 0;
foreach (@lines) {
$line_num ++;
# Remove any whitelisted words
foreach $w (@whitelist) {
s/\b$w\b/ZAP/
}
my $tag;
if ((/(\bcuda[A-Z]\w+)/) or (/<<<.*>>>/)) {
# Flag any remaining code that look like cuda API calls: may want to add these to hipify
$tag = (defined $1) ? $1 : "Launch";
}
if (defined $tag) {
$warnings++;
$warningTags{$tag}++;
print STDERR " warning: $fileName:#$line_num : $_";
print STDERR "\n";
}
$s = warnUnsupportedDeviceFunctions($line_num);
$warnings += $s;
}
$_ = $tmp;
}
}
if ($hasDeviceCode > 0) {
$ft{'device_function'} += countSupportedDeviceFunctions();
}
transformHostFunctions();
# TODO: would like to move this code outside loop but it uses $_ which contains the whole file
unless ($no_output) {
my $apiCalls = $ft{'error'} + $ft{'init'} + $ft{'version'} + $ft{'device'} + $ft{'context'} + $ft{'module'} + $ft{'memory'} + $ft{'addressing'} + $ft{'stream'} + $ft{'event'} + $ft{'external_resource_interop'} + $ft{'stream_memory'} + $ft{'execution'} + $ft{'graph'} + $ft{'occupancy'} + $ft{'texture'} + $ft{'surface'} + $ft{'peer'} + $ft{'graphics'} + $ft{'profiler'} + $ft{'openGL'} + $ft{'D3D9'} + $ft{'D3D10'} + $ft{'D3D11'} + $ft{'VDPAU'} + $ft{'EGL'} + $ft{'thread'} + $ft{'complex'} + $ft{'library'} + $ft{'device_library'} + $ft{'include'} + $ft{'include_cuda_main_header'} + $ft{'type'} + $ft{'literal'} + $ft{'numeric_literal'} + $ft{'define'};
my $kernStuff = $hasDeviceCode + $ft{'kernel_launch'} + $ft{'device_function'};
my $totalCalls = $apiCalls + $kernStuff;
$is_dos = m/\r\n$/;
if ($totalCalls and ($countIncludes == 0) and ($kernStuff != 0)) {
# TODO: implement hipify-clang's logic with header files AMAP
print $OUTFILE '#include "hip/hip_runtime.h"' . ($is_dos ? "\r\n" : "\n");
}
print $OUTFILE "$_";
}
$lineCount = $_ =~ tr/\n//;
}
my $totalConverted = totalStats(\%ft);
if (($totalConverted+$warnings) and $print_stats) {
printStats(" info: converted", \@statNames, \%ft, $warnings, $lineCount);
print STDERR " in '$fileName'\n";
}
# Update totals for all files
addStats(\%tt, \%ft);
$Twarnings += $warnings;
$TlineCount += $lineCount;
foreach $key (keys %warningTags) {
$TwarningTags{$key} += $warningTags{$key};
}
}
# Print total stats for all files processed:
if ($print_stats and ($fileCount > 1)) {
print STDERR "\n";
printStats(" info: TOTAL-converted", \@statNames, \%tt, $Twarnings, $TlineCount);
print STDERR "\n";
foreach my $key (sort { $TwarningTags{$b} <=> $TwarningTags{$a} } keys %TwarningTags) {
printf STDERR " warning: unconverted %s : %d\n", $key, $TwarningTags{$key};
}
my $kernelCnt = keys %Tkernels;
printf STDERR " kernels (%d total) : ", $kernelCnt;
foreach my $key (sort { $Tkernels{$b} <=> $Tkernels{$a} } keys %Tkernels) {
printf STDERR " %s(%d)", $key, $Tkernels{$key};
}
print STDERR "\n";
print STDERR "\n";
}
if ($print_stats) {
foreach my $key (sort { $convertedTags{$b} <=> $convertedTags{$a} } keys %convertedTags) {
printf STDERR " %s %d\n", $key, $convertedTags{$key};
if ($k) {
$ft{'kernel_launch'} += $k;
@$TkernRef{$1} ++;
}
}
@@ -2420,3 +2282,143 @@ sub warnUnsupportedDeviceFunctions {
}
return $m;
}
# Count of transforms in all files
my %tt;
clearStats(\%tt, \@statNames);
$Twarnings = 0;
$TlineCount = 0;
my %TwarningTags;
my %Tkernels;
my $fileCount = @ARGV;
while (@ARGV) {
$fileName=shift (@ARGV);
if ($inplace) {
my $file_prehip = "$fileName" . ".prehip";
my $infile;
my $outfile;
if (-e $file_prehip) {
$infile = $file_prehip;
$outfile = $fileName;
} else {
system ("cp $fileName $file_prehip");
$infile = $file_prehip;
$outfile = $fileName;
}
open(INFILE,"<", $infile) or die "error: could not open $infile";
open(OUTFILE,">", $outfile) or die "error: could not open $outfile";
$OUTFILE = OUTFILE;
} else {
open(INFILE,"<", $fileName) or die "error: could not open $fileName";
$OUTFILE = STDOUT;
}
# Note : \b is used in perl to indicate the start of a word
# Count of transforms in this file
my %ft;
clearStats(\%ft, \@statNames);
my $countIncludes = 0;
my $countKeywords = 0;
my $warnings = 0;
my %warningTags;
my $lineCount = 0;
undef $/;
# Read whole file at once, so we can match newlines
while (<INFILE>)
{
simpleSubstitutions();
$countKeywords += m/__global__/;
$countKeywords += m/__shared__/;
transformExternShared();
transformKernelLaunch(\%Tkernels);
if ($print_stats) {
while (/(\bhip[A-Z]\w+\b)/g) {
$convertedTags{$1}++;
}
}
my $hasDeviceCode = $countKeywords + $ft{'device_function'};
unless ($quiet_warnings) {
# Copy into array of lines, process line-by-line to show warnings
if ($hasDeviceCode or (/\bcu/) or (/\bCU_/) or (/\bCUDA_/) or (/<<<.*>>>/)) {
my @lines = split /\n/, $_;
# Copy the whole file
my $tmp = $_;
my $line_num = 0;
foreach (@lines) {
$line_num ++;
# Remove any whitelisted words
foreach $w (@whitelist) {
s/\b$w\b/ZAP/
}
my $tag;
if ((/(\bcuda[A-Z]\w+)/) or (/<<<.*>>>/)) {
# Flag any remaining code that look like cuda API calls: may want to add these to hipify
$tag = (defined $1) ? $1 : "Launch";
}
if (defined $tag) {
$warnings++;
$warningTags{$tag}++;
print STDERR " warning: $fileName:#$line_num : $_";
print STDERR "\n";
}
$s = warnUnsupportedDeviceFunctions($line_num);
$warnings += $s;
}
$_ = $tmp;
}
}
if ($hasDeviceCode > 0) {
$ft{'device_function'} += countSupportedDeviceFunctions();
}
transformHostFunctions();
# TODO: would like to move this code outside loop but it uses $_ which contains the whole file
unless ($no_output) {
my $apiCalls = $ft{'error'} + $ft{'init'} + $ft{'version'} + $ft{'device'} + $ft{'context'} + $ft{'module'} + $ft{'memory'} + $ft{'addressing'} + $ft{'stream'} + $ft{'event'} + $ft{'external_resource_interop'} + $ft{'stream_memory'} + $ft{'execution'} + $ft{'graph'} + $ft{'occupancy'} + $ft{'texture'} + $ft{'surface'} + $ft{'peer'} + $ft{'graphics'} + $ft{'profiler'} + $ft{'openGL'} + $ft{'D3D9'} + $ft{'D3D10'} + $ft{'D3D11'} + $ft{'VDPAU'} + $ft{'EGL'} + $ft{'thread'} + $ft{'complex'} + $ft{'library'} + $ft{'device_library'} + $ft{'include'} + $ft{'include_cuda_main_header'} + $ft{'type'} + $ft{'literal'} + $ft{'numeric_literal'} + $ft{'define'};
my $kernStuff = $hasDeviceCode + $ft{'kernel_launch'} + $ft{'device_function'};
my $totalCalls = $apiCalls + $kernStuff;
$is_dos = m/\r\n$/;
if ($totalCalls and ($countIncludes == 0) and ($kernStuff != 0)) {
# TODO: implement hipify-clang's logic with header files AMAP
print $OUTFILE '#include "hip/hip_runtime.h"' . ($is_dos ? "\r\n" : "\n");
}
print $OUTFILE "$_";
}
$lineCount = $_ =~ tr/\n//;
}
my $totalConverted = totalStats(\%ft);
if (($totalConverted+$warnings) and $print_stats) {
printStats(" info: converted", \@statNames, \%ft, $warnings, $lineCount);
print STDERR " in '$fileName'\n";
}
# Update totals for all files
addStats(\%tt, \%ft);
$Twarnings += $warnings;
$TlineCount += $lineCount;
foreach $key (keys %warningTags) {
$TwarningTags{$key} += $warningTags{$key};
}
}
# Print total stats for all files processed:
if ($print_stats and ($fileCount > 1)) {
print STDERR "\n";
printStats(" info: TOTAL-converted", \@statNames, \%tt, $Twarnings, $TlineCount);
print STDERR "\n";
foreach my $key (sort { $TwarningTags{$b} <=> $TwarningTags{$a} } keys %TwarningTags) {
printf STDERR " warning: unconverted %s : %d\n", $key, $TwarningTags{$key};
}
my $kernelCnt = keys %Tkernels;
printf STDERR " kernels (%d total) : ", $kernelCnt;
foreach my $key (sort { $Tkernels{$b} <=> $Tkernels{$a} } keys %Tkernels) {
printf STDERR " %s(%d)", $key, $Tkernels{$key};
}
print STDERR "\n";
print STDERR "\n";
}
if ($print_stats) {
foreach my $key (sort { $convertedTags{$b} <=> $convertedTags{$a} } keys %convertedTags) {
printf STDERR " %s %d\n", $key, $convertedTags{$key};
}
}
+56 -12
View File
@@ -64,10 +64,11 @@ namespace perl {
const std::string triple_tab = double_tab + tab;
const std::string sSub = "sub";
const std::string sReturn_0 = "return 0;\n";
const std::string sReturn_m = "return $m;\n";
const std::string sReturn_k = "return $k;\n";
const std::string sForeach = "foreach $func (\n";
const std::string sMy = "my $m = 0;\n";
const std::string sMy_k = "my $k = 0;";
const std::string sNoWarns = "no warnings qw/uninitialized/;";
const std::string sCudaDevice = "cudaDevice";
const std::string sCudaDeviceId = "cudaDeviceId";
const std::string sCudaDevices = "cudaDevices";
@@ -182,17 +183,59 @@ namespace perl {
void generateExternShared(std::unique_ptr<std::ostream>& streamPtr) {
*streamPtr.get() << std::endl << "# CUDA extern __shared__ syntax replace with HIP_DYNAMIC_SHARED() macro" << std::endl;
*streamPtr.get() << sSub << " transformExternShared" << " {" << std::endl;
*streamPtr.get() << tab << "no warnings qw/uninitialized/;" << std::endl;
*streamPtr.get() << tab << "my $k = 0;" << std::endl;
*streamPtr.get() << tab << sNoWarns << std::endl;
*streamPtr.get() << tab << sMy_k << std::endl;
*streamPtr.get() << tab << "$k += s/extern\\s+([\\w\\(\\)]+)?\\s*__shared__\\s+([\\w:<>\\s]+)\\s+(\\w+)\\s*\\[\\s*\\]\\s*;/HIP_DYNAMIC_SHARED($1 $2, $3)/g;" << std::endl;
*streamPtr.get() << tab << "$ft{ 'extern_shared' } += $k;" << std::endl << "}" << std::endl;
}
void generateKernelLaunch(std::unique_ptr<std::ostream>& streamPtr) {
*streamPtr.get() << std::endl << "# CUDA Kernel Launch Syntax" << std::endl;
*streamPtr.get() << sSub << " transformKernelLaunch" << " {" << std::endl;
*streamPtr.get() << tab << "my $TkernRef = @_;" << std::endl;
*streamPtr.get() << tab << sNoWarns << std::endl;
*streamPtr.get() << tab << sMy_k << std::endl << std::endl;
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db, Ns, S>>>() syntax with empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6)/g;" << std::endl;
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db, Ns, S>>>() syntax with empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5)/g;" << std::endl << std::endl;
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db, Ns, S>>>(...) syntax with non-empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6, /g;" << std::endl;
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db, Ns, S>>>(...) syntax with non-empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5, /g;" << std::endl << std::endl;
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db, Ns>>>() syntax with empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0)/g;" << std::endl;
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db, Ns>>>() syntax with empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0)/g;" << std::endl << std::endl;
*streamPtr.get() << tab << "# Handle the kern<...><<Dg, Db, Ns>>>(...) syntax with non-empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0, /g;" << std::endl;
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db, Ns>>>(...) syntax with non-empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0, /g;" << std::endl << std::endl;
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db>>>() syntax with empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0)/g;" << std::endl;
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db>>>() syntax with empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0)/g;" << std::endl << std::endl;
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db>>>(...) syntax with non-empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0, /g;" << std::endl;
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db>>>(...) syntax with non-empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0, /g;" << std::endl << std::endl;
*streamPtr.get() << tab << "if ($k) {" << std::endl;
*streamPtr.get() << double_tab << "$ft{'kernel_launch'} += $k;" << std::endl;
*streamPtr.get() << double_tab << "@$TkernRef{$1} ++;" << std::endl << tab << "}" << std::endl << "}" << std::endl;
}
void generateHostFunctions(std::unique_ptr<std::ostream>& streamPtr) {
*streamPtr.get() << std::endl << sSub << " transformHostFunctions" << " {" << std::endl << tab << sMy;
*streamPtr.get() << std::endl << sSub << " transformHostFunctions" << " {" << std::endl << tab << sMy_k << std::endl;
std::set<std::string> &funcSet = DeviceSymbolFunctions0;
const std::string s0 = "$m += s/(?<!\\/\\/ CHECK: )($func)\\s*\\(\\s*([^,]+)\\s*,/$func\\(";
const std::string s1 = "$m += s/(?<!\\/\\/ CHECK: )($func)\\s*\\(\\s*([^,]+)\\s*,\\s*([^,\\)]+)\\s*(,\\s*|\\))\\s*/$func\\($2, ";
const std::string s0 = "$k += s/(?<!\\/\\/ CHECK: )($func)\\s*\\(\\s*([^,]+)\\s*,/$func\\(";
const std::string s1 = "$k += s/(?<!\\/\\/ CHECK: )($func)\\s*\\(\\s*([^,]+)\\s*,\\s*([^,\\)]+)\\s*(,\\s*|\\))\\s*/$func\\($2, ";
for (int i = 0; i < 4; ++i) {
*streamPtr.get() << tab + sForeach;
switch (i) {
@@ -223,7 +266,7 @@ namespace perl {
}
*streamPtr.get() << tab << "}" << std::endl;
}
*streamPtr.get() << tab << sReturn_m << "}" << std::endl;
*streamPtr.get() << tab << sReturn_k << "}" << std::endl;
}
void generateDeviceFunctions(std::unique_ptr<std::ostream>& streamPtr) {
@@ -240,7 +283,7 @@ namespace perl {
std::stringstream subCountSupported;
std::stringstream subWarnUnsupported;
std::stringstream subCommon;
std::string sCommon = tab + sMy + tab + sForeach;
std::string sCommon = tab + sMy_k + "\n" + tab + sForeach;
subCountSupported << std::endl << sSub << " countSupportedDeviceFunctions" << " {" << std::endl << (countSupported ? sCommon : tab + sReturn_0);
subWarnUnsupported << std::endl << sSub << " warnUnsupportedDeviceFunctions" << " {" << std::endl << (countUnsupported ? tab + "my $line_num = shift;\n" + sCommon : tab + sReturn_0);
if (countSupported) {
@@ -256,7 +299,7 @@ namespace perl {
subCommon << double_tab << "my $mt_namespace = m/(\\w+)::($func)\\s*\\(\\s*.*\\s*\\)/g;" << std::endl;
subCommon << double_tab << "my $mt = m/($func)\\s*\\(\\s*.*\\s*\\)/g;" << std::endl;
subCommon << double_tab << "if ($mt && !$mt_namespace) {" << std::endl;
subCommon << triple_tab << "$m += $mt;" << std::endl;
subCommon << triple_tab << "$k += $mt;" << std::endl;
}
if (countSupported) {
subCountSupported << subCommon.str();
@@ -266,7 +309,7 @@ namespace perl {
subWarnUnsupported << triple_tab << "print STDERR \" warning: $fileName:$line_num: unsupported device function \\\"$func\\\": $_\\n\";" << std::endl;
}
if (countSupported || countUnsupported) {
sCommon = double_tab + "}\n" + tab + "}\n" + tab + sReturn_m;
sCommon = double_tab + "}\n" + tab + "}\n" + tab + sReturn_k;
}
if (countSupported) subCountSupported << sCommon;
if (countUnsupported) subWarnUnsupported << sCommon;
@@ -305,6 +348,7 @@ namespace perl {
generateStatFunctions(streamPtr);
generateSimpleSubstitutions(streamPtr);
generateExternShared(streamPtr);
generateKernelLaunch(streamPtr);
generateHostFunctions(streamPtr);
generateDeviceFunctions(streamPtr);
streamPtr.get()->flush();