From c8f73ea49173653bba4f3cd625879b111d3e98f9 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 1 Oct 2019 11:17:27 +0300 Subject: [PATCH] [HIPIFY][perl] Generate transformKernelLaunch + Update hipify-perl accordingly + Minor refactoring --- bin/hipify-perl | 344 +++++++++++++++-------------- hipify-clang/src/CUDA2HIP_Perl.cpp | 68 +++++- 2 files changed, 229 insertions(+), 183 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 877349d847..fbb135757b 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -26,6 +26,7 @@ use Getopt::Long; my $whitelist = ""; +my $fileName = ""; GetOptions( "examine" => \$examine # Combines -no-output and -print-stats options. @@ -1602,184 +1603,45 @@ sub transformExternShared { $ft{ 'extern_shared' } += $k; } -# Count of transforms in all files -my %tt; -clearStats(\%tt, \@statNames); -$Twarnings = 0; -$TlineCount = 0; -my %TwarningTags; -my %Tkernels; -my $fileCount = @ARGV; -my $fileName = ""; +# CUDA Kernel Launch Syntax +sub transformKernelLaunch { + my $TkernRef = @_; + no warnings qw/uninitialized/; + my $k = 0; -while (@ARGV) { - $fileName=shift (@ARGV); - if ($inplace) { - my $file_prehip = "$fileName" . ".prehip"; - my $infile; - my $outfile; - if (-e $file_prehip) { - $infile = $file_prehip; - $outfile = $fileName; - } else { - system ("cp $fileName $file_prehip"); - $infile = $file_prehip; - $outfile = $fileName; - } - open(INFILE,"<", $infile) or die "error: could not open $infile"; - open(OUTFILE,">", $outfile) or die "error: could not open $outfile"; - $OUTFILE = OUTFILE; - } else { - open(INFILE,"<", $fileName) or die "error: could not open $fileName"; - $OUTFILE = STDOUT; - } - # Note : \b is used in perl to indicate the start of a word - # Count of transforms in this file - my %ft; - clearStats(\%ft, \@statNames); - my $countIncludes = 0; - my $countKeywords = 0; - my $warnings = 0; - my %warningTags; - my $lineCount = 0; - undef $/; - # Read whole file at once, so we can match newlines - while () - { - simpleSubstitutions(); + # Handle the kern<...><<>>() syntax with empty args: + $k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6)/g; + # Handle the kern<<>>() syntax with empty args: + $k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5)/g; - $countKeywords += m/__global__/; - $countKeywords += m/__shared__/; + # Handle the kern<...><<>>(...) syntax with non-empty args: + $k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6, /g; + # Handle the kern<<>>(...) syntax with non-empty args: + $k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5, /g; - transformExternShared(); + # Handle the kern<...><<>>() syntax with empty args: + $k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0)/g; + # Handle the kern<<>>() syntax with empty args: + $k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0)/g; - # CUDA Launch Syntax. Note these only work if launch is on a single line. - { - # Match uses ? for <.*> which will be unitialized if this is not present in launch syntax - no warnings qw/uninitialized/; - my $k = 0; + # Handle the kern<...><>>(...) syntax with non-empty args: + $k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0, /g; + # Handle the kern<<>>(...) syntax with non-empty args: + $k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0, /g; - # Handle the kern<...><<>> syntax with empty args: - $k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6)/g; - # Handle the kern<<>> syntax with empty args: - $k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5)/g; + # Handle the kern<...><<>>() syntax with empty args: + $k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0)/g; + # Handle the kern<<>>() syntax with empty args: + $k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0)/g; - # Handle the kern<...><<>> syntax with non-empty args: - $k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6, /g; - # Handle the kern<<>> syntax with non-empty args: - $k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5, /g; + # Handle the kern<...><<>>(...) syntax with non-empty args: + $k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0, /g; + # Handle the kern<<>>(...) syntax with non-empty args: + $k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0, /g; - # Handle the kern<...><<>> syntax with empty args: - $k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0)/g; - # Handle the kern<<>> syntax with empty args: - $k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0)/g; - - # Handle the kern<...><<>> syntax with non-empty args: - $k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0, /g; - # Handle the kern<<>> syntax with non-empty args: - $k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0, /g; - - # Handle the kern<...><<>> syntax with empty args: - $k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0)/g; - # Handle the kern<<>> syntax with empty args: - $k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0)/g; - - # Handle the kern<...><<>> syntax with non-empty args: - $k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0, /g; - # Handle the kern<<>> syntax with non-empty args: - $k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0, /g; - - if ($k) { - $ft{'kernel_launch'} += $k; - $Tkernels{$1} ++; - } - } - if ($print_stats) { - while (/(\bhip[A-Z]\w+\b)/g) { - $convertedTags{$1}++; - } - } - my $hasDeviceCode = $countKeywords + $ft{'device_function'}; - unless ($quiet_warnings) { - # Copy into array of lines, process line-by-line to show warnings - if ($hasDeviceCode or (/\bcu/) or (/\bCU_/) or (/\bCUDA_/) or (/<<<.*>>>/)) { - my @lines = split /\n/, $_; - # Copy the whole file - my $tmp = $_; - my $line_num = 0; - foreach (@lines) { - $line_num ++; - # Remove any whitelisted words - foreach $w (@whitelist) { - s/\b$w\b/ZAP/ - } - my $tag; - if ((/(\bcuda[A-Z]\w+)/) or (/<<<.*>>>/)) { - # Flag any remaining code that look like cuda API calls: may want to add these to hipify - $tag = (defined $1) ? $1 : "Launch"; - } - if (defined $tag) { - $warnings++; - $warningTags{$tag}++; - print STDERR " warning: $fileName:#$line_num : $_"; - print STDERR "\n"; - } - $s = warnUnsupportedDeviceFunctions($line_num); - $warnings += $s; - } - $_ = $tmp; - } - } - if ($hasDeviceCode > 0) { - $ft{'device_function'} += countSupportedDeviceFunctions(); - } - transformHostFunctions(); - # TODO: would like to move this code outside loop but it uses $_ which contains the whole file - unless ($no_output) { - my $apiCalls = $ft{'error'} + $ft{'init'} + $ft{'version'} + $ft{'device'} + $ft{'context'} + $ft{'module'} + $ft{'memory'} + $ft{'addressing'} + $ft{'stream'} + $ft{'event'} + $ft{'external_resource_interop'} + $ft{'stream_memory'} + $ft{'execution'} + $ft{'graph'} + $ft{'occupancy'} + $ft{'texture'} + $ft{'surface'} + $ft{'peer'} + $ft{'graphics'} + $ft{'profiler'} + $ft{'openGL'} + $ft{'D3D9'} + $ft{'D3D10'} + $ft{'D3D11'} + $ft{'VDPAU'} + $ft{'EGL'} + $ft{'thread'} + $ft{'complex'} + $ft{'library'} + $ft{'device_library'} + $ft{'include'} + $ft{'include_cuda_main_header'} + $ft{'type'} + $ft{'literal'} + $ft{'numeric_literal'} + $ft{'define'}; - my $kernStuff = $hasDeviceCode + $ft{'kernel_launch'} + $ft{'device_function'}; - my $totalCalls = $apiCalls + $kernStuff; - $is_dos = m/\r\n$/; - if ($totalCalls and ($countIncludes == 0) and ($kernStuff != 0)) { - # TODO: implement hipify-clang's logic with header files AMAP - print $OUTFILE '#include "hip/hip_runtime.h"' . ($is_dos ? "\r\n" : "\n"); - } - print $OUTFILE "$_"; - } - $lineCount = $_ =~ tr/\n//; - } - my $totalConverted = totalStats(\%ft); - if (($totalConverted+$warnings) and $print_stats) { - printStats(" info: converted", \@statNames, \%ft, $warnings, $lineCount); - print STDERR " in '$fileName'\n"; - } - # Update totals for all files - addStats(\%tt, \%ft); - $Twarnings += $warnings; - $TlineCount += $lineCount; - foreach $key (keys %warningTags) { - $TwarningTags{$key} += $warningTags{$key}; - } -} -# Print total stats for all files processed: -if ($print_stats and ($fileCount > 1)) { - print STDERR "\n"; - printStats(" info: TOTAL-converted", \@statNames, \%tt, $Twarnings, $TlineCount); - print STDERR "\n"; - foreach my $key (sort { $TwarningTags{$b} <=> $TwarningTags{$a} } keys %TwarningTags) { - printf STDERR " warning: unconverted %s : %d\n", $key, $TwarningTags{$key}; - } - my $kernelCnt = keys %Tkernels; - printf STDERR " kernels (%d total) : ", $kernelCnt; - foreach my $key (sort { $Tkernels{$b} <=> $Tkernels{$a} } keys %Tkernels) { - printf STDERR " %s(%d)", $key, $Tkernels{$key}; - } - print STDERR "\n"; - print STDERR "\n"; -} -if ($print_stats) { - foreach my $key (sort { $convertedTags{$b} <=> $convertedTags{$a} } keys %convertedTags) { - printf STDERR " %s %d\n", $key, $convertedTags{$key}; + if ($k) { + $ft{'kernel_launch'} += $k; + @$TkernRef{$1} ++; } } @@ -2420,3 +2282,143 @@ sub warnUnsupportedDeviceFunctions { } return $m; } + +# Count of transforms in all files +my %tt; +clearStats(\%tt, \@statNames); +$Twarnings = 0; +$TlineCount = 0; +my %TwarningTags; +my %Tkernels; +my $fileCount = @ARGV; + +while (@ARGV) { + $fileName=shift (@ARGV); + if ($inplace) { + my $file_prehip = "$fileName" . ".prehip"; + my $infile; + my $outfile; + if (-e $file_prehip) { + $infile = $file_prehip; + $outfile = $fileName; + } else { + system ("cp $fileName $file_prehip"); + $infile = $file_prehip; + $outfile = $fileName; + } + open(INFILE,"<", $infile) or die "error: could not open $infile"; + open(OUTFILE,">", $outfile) or die "error: could not open $outfile"; + $OUTFILE = OUTFILE; + } else { + open(INFILE,"<", $fileName) or die "error: could not open $fileName"; + $OUTFILE = STDOUT; + } + # Note : \b is used in perl to indicate the start of a word + # Count of transforms in this file + my %ft; + clearStats(\%ft, \@statNames); + my $countIncludes = 0; + my $countKeywords = 0; + my $warnings = 0; + my %warningTags; + my $lineCount = 0; + undef $/; + # Read whole file at once, so we can match newlines + while () + { + simpleSubstitutions(); + + $countKeywords += m/__global__/; + $countKeywords += m/__shared__/; + + transformExternShared(); + transformKernelLaunch(\%Tkernels); + + if ($print_stats) { + while (/(\bhip[A-Z]\w+\b)/g) { + $convertedTags{$1}++; + } + } + my $hasDeviceCode = $countKeywords + $ft{'device_function'}; + unless ($quiet_warnings) { + # Copy into array of lines, process line-by-line to show warnings + if ($hasDeviceCode or (/\bcu/) or (/\bCU_/) or (/\bCUDA_/) or (/<<<.*>>>/)) { + my @lines = split /\n/, $_; + # Copy the whole file + my $tmp = $_; + my $line_num = 0; + foreach (@lines) { + $line_num ++; + # Remove any whitelisted words + foreach $w (@whitelist) { + s/\b$w\b/ZAP/ + } + my $tag; + if ((/(\bcuda[A-Z]\w+)/) or (/<<<.*>>>/)) { + # Flag any remaining code that look like cuda API calls: may want to add these to hipify + $tag = (defined $1) ? $1 : "Launch"; + } + if (defined $tag) { + $warnings++; + $warningTags{$tag}++; + print STDERR " warning: $fileName:#$line_num : $_"; + print STDERR "\n"; + } + $s = warnUnsupportedDeviceFunctions($line_num); + $warnings += $s; + } + $_ = $tmp; + } + } + if ($hasDeviceCode > 0) { + $ft{'device_function'} += countSupportedDeviceFunctions(); + } + transformHostFunctions(); + # TODO: would like to move this code outside loop but it uses $_ which contains the whole file + unless ($no_output) { + my $apiCalls = $ft{'error'} + $ft{'init'} + $ft{'version'} + $ft{'device'} + $ft{'context'} + $ft{'module'} + $ft{'memory'} + $ft{'addressing'} + $ft{'stream'} + $ft{'event'} + $ft{'external_resource_interop'} + $ft{'stream_memory'} + $ft{'execution'} + $ft{'graph'} + $ft{'occupancy'} + $ft{'texture'} + $ft{'surface'} + $ft{'peer'} + $ft{'graphics'} + $ft{'profiler'} + $ft{'openGL'} + $ft{'D3D9'} + $ft{'D3D10'} + $ft{'D3D11'} + $ft{'VDPAU'} + $ft{'EGL'} + $ft{'thread'} + $ft{'complex'} + $ft{'library'} + $ft{'device_library'} + $ft{'include'} + $ft{'include_cuda_main_header'} + $ft{'type'} + $ft{'literal'} + $ft{'numeric_literal'} + $ft{'define'}; + my $kernStuff = $hasDeviceCode + $ft{'kernel_launch'} + $ft{'device_function'}; + my $totalCalls = $apiCalls + $kernStuff; + $is_dos = m/\r\n$/; + if ($totalCalls and ($countIncludes == 0) and ($kernStuff != 0)) { + # TODO: implement hipify-clang's logic with header files AMAP + print $OUTFILE '#include "hip/hip_runtime.h"' . ($is_dos ? "\r\n" : "\n"); + } + print $OUTFILE "$_"; + } + $lineCount = $_ =~ tr/\n//; + } + my $totalConverted = totalStats(\%ft); + if (($totalConverted+$warnings) and $print_stats) { + printStats(" info: converted", \@statNames, \%ft, $warnings, $lineCount); + print STDERR " in '$fileName'\n"; + } + # Update totals for all files + addStats(\%tt, \%ft); + $Twarnings += $warnings; + $TlineCount += $lineCount; + foreach $key (keys %warningTags) { + $TwarningTags{$key} += $warningTags{$key}; + } +} +# Print total stats for all files processed: +if ($print_stats and ($fileCount > 1)) { + print STDERR "\n"; + printStats(" info: TOTAL-converted", \@statNames, \%tt, $Twarnings, $TlineCount); + print STDERR "\n"; + foreach my $key (sort { $TwarningTags{$b} <=> $TwarningTags{$a} } keys %TwarningTags) { + printf STDERR " warning: unconverted %s : %d\n", $key, $TwarningTags{$key}; + } + my $kernelCnt = keys %Tkernels; + printf STDERR " kernels (%d total) : ", $kernelCnt; + foreach my $key (sort { $Tkernels{$b} <=> $Tkernels{$a} } keys %Tkernels) { + printf STDERR " %s(%d)", $key, $Tkernels{$key}; + } + print STDERR "\n"; + print STDERR "\n"; +} +if ($print_stats) { + foreach my $key (sort { $convertedTags{$b} <=> $convertedTags{$a} } keys %convertedTags) { + printf STDERR " %s %d\n", $key, $convertedTags{$key}; + } +} diff --git a/hipify-clang/src/CUDA2HIP_Perl.cpp b/hipify-clang/src/CUDA2HIP_Perl.cpp index 0ccd69da55..5d7a8790d0 100644 --- a/hipify-clang/src/CUDA2HIP_Perl.cpp +++ b/hipify-clang/src/CUDA2HIP_Perl.cpp @@ -64,10 +64,11 @@ namespace perl { const std::string triple_tab = double_tab + tab; const std::string sSub = "sub"; const std::string sReturn_0 = "return 0;\n"; - const std::string sReturn_m = "return $m;\n"; + const std::string sReturn_k = "return $k;\n"; const std::string sForeach = "foreach $func (\n"; - const std::string sMy = "my $m = 0;\n"; - + const std::string sMy_k = "my $k = 0;"; + const std::string sNoWarns = "no warnings qw/uninitialized/;"; + const std::string sCudaDevice = "cudaDevice"; const std::string sCudaDeviceId = "cudaDeviceId"; const std::string sCudaDevices = "cudaDevices"; @@ -182,17 +183,59 @@ namespace perl { void generateExternShared(std::unique_ptr& streamPtr) { *streamPtr.get() << std::endl << "# CUDA extern __shared__ syntax replace with HIP_DYNAMIC_SHARED() macro" << std::endl; *streamPtr.get() << sSub << " transformExternShared" << " {" << std::endl; - *streamPtr.get() << tab << "no warnings qw/uninitialized/;" << std::endl; - *streamPtr.get() << tab << "my $k = 0;" << std::endl; + *streamPtr.get() << tab << sNoWarns << std::endl; + *streamPtr.get() << tab << sMy_k << std::endl; *streamPtr.get() << tab << "$k += s/extern\\s+([\\w\\(\\)]+)?\\s*__shared__\\s+([\\w:<>\\s]+)\\s+(\\w+)\\s*\\[\\s*\\]\\s*;/HIP_DYNAMIC_SHARED($1 $2, $3)/g;" << std::endl; *streamPtr.get() << tab << "$ft{ 'extern_shared' } += $k;" << std::endl << "}" << std::endl; } + void generateKernelLaunch(std::unique_ptr& streamPtr) { + *streamPtr.get() << std::endl << "# CUDA Kernel Launch Syntax" << std::endl; + *streamPtr.get() << sSub << " transformKernelLaunch" << " {" << std::endl; + *streamPtr.get() << tab << "my $TkernRef = @_;" << std::endl; + *streamPtr.get() << tab << sNoWarns << std::endl; + *streamPtr.get() << tab << sMy_k << std::endl << std::endl; + + *streamPtr.get() << tab << "# Handle the kern<...><<>>() syntax with empty args:" << std::endl; + *streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6)/g;" << std::endl; + *streamPtr.get() << tab << "# Handle the kern<<>>() syntax with empty args:" << std::endl; + *streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5)/g;" << std::endl << std::endl; + + *streamPtr.get() << tab << "# Handle the kern<...><<>>(...) syntax with non-empty args:" << std::endl; + *streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6, /g;" << std::endl; + *streamPtr.get() << tab << "# Handle the kern<<>>(...) syntax with non-empty args:" << std::endl; + *streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5, /g;" << std::endl << std::endl; + + *streamPtr.get() << tab << "# Handle the kern<...><<>>() syntax with empty args:" << std::endl; + *streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0)/g;" << std::endl; + *streamPtr.get() << tab << "# Handle the kern<<>>() syntax with empty args:" << std::endl; + *streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0)/g;" << std::endl << std::endl; + + *streamPtr.get() << tab << "# Handle the kern<...><>>(...) syntax with non-empty args:" << std::endl; + *streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0, /g;" << std::endl; + *streamPtr.get() << tab << "# Handle the kern<<>>(...) syntax with non-empty args:" << std::endl; + *streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0, /g;" << std::endl << std::endl; + + *streamPtr.get() << tab << "# Handle the kern<...><<>>() syntax with empty args:" << std::endl; + *streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0)/g;" << std::endl; + *streamPtr.get() << tab << "# Handle the kern<<>>() syntax with empty args:" << std::endl; + *streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0)/g;" << std::endl << std::endl; + + *streamPtr.get() << tab << "# Handle the kern<...><<>>(...) syntax with non-empty args:" << std::endl; + *streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0, /g;" << std::endl; + *streamPtr.get() << tab << "# Handle the kern<<>>(...) syntax with non-empty args:" << std::endl; + *streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0, /g;" << std::endl << std::endl; + + *streamPtr.get() << tab << "if ($k) {" << std::endl; + *streamPtr.get() << double_tab << "$ft{'kernel_launch'} += $k;" << std::endl; + *streamPtr.get() << double_tab << "@$TkernRef{$1} ++;" << std::endl << tab << "}" << std::endl << "}" << std::endl; + } + void generateHostFunctions(std::unique_ptr& streamPtr) { - *streamPtr.get() << std::endl << sSub << " transformHostFunctions" << " {" << std::endl << tab << sMy; + *streamPtr.get() << std::endl << sSub << " transformHostFunctions" << " {" << std::endl << tab << sMy_k << std::endl; std::set &funcSet = DeviceSymbolFunctions0; - const std::string s0 = "$m += s/(?& streamPtr) { @@ -240,7 +283,7 @@ namespace perl { std::stringstream subCountSupported; std::stringstream subWarnUnsupported; std::stringstream subCommon; - std::string sCommon = tab + sMy + tab + sForeach; + std::string sCommon = tab + sMy_k + "\n" + tab + sForeach; subCountSupported << std::endl << sSub << " countSupportedDeviceFunctions" << " {" << std::endl << (countSupported ? sCommon : tab + sReturn_0); subWarnUnsupported << std::endl << sSub << " warnUnsupportedDeviceFunctions" << " {" << std::endl << (countUnsupported ? tab + "my $line_num = shift;\n" + sCommon : tab + sReturn_0); if (countSupported) { @@ -256,7 +299,7 @@ namespace perl { subCommon << double_tab << "my $mt_namespace = m/(\\w+)::($func)\\s*\\(\\s*.*\\s*\\)/g;" << std::endl; subCommon << double_tab << "my $mt = m/($func)\\s*\\(\\s*.*\\s*\\)/g;" << std::endl; subCommon << double_tab << "if ($mt && !$mt_namespace) {" << std::endl; - subCommon << triple_tab << "$m += $mt;" << std::endl; + subCommon << triple_tab << "$k += $mt;" << std::endl; } if (countSupported) { subCountSupported << subCommon.str(); @@ -266,7 +309,7 @@ namespace perl { subWarnUnsupported << triple_tab << "print STDERR \" warning: $fileName:$line_num: unsupported device function \\\"$func\\\": $_\\n\";" << std::endl; } if (countSupported || countUnsupported) { - sCommon = double_tab + "}\n" + tab + "}\n" + tab + sReturn_m; + sCommon = double_tab + "}\n" + tab + "}\n" + tab + sReturn_k; } if (countSupported) subCountSupported << sCommon; if (countUnsupported) subWarnUnsupported << sCommon; @@ -305,6 +348,7 @@ namespace perl { generateStatFunctions(streamPtr); generateSimpleSubstitutions(streamPtr); generateExternShared(streamPtr); + generateKernelLaunch(streamPtr); generateHostFunctions(streamPtr); generateDeviceFunctions(streamPtr); streamPtr.get()->flush();