2020-04-17 01:01:06 -04:00
#!/usr/bin/perl
use strict ;
use File::Copy ;
use File::Spec ;
use File::Basename ;
use File::Which ;
use Cwd 'realpath' ;
use Getopt::Std ;
use List::Util qw( max ) ;
sub usage {
print ( "Usage: $0 [OPTION]... -i <input>\n" ) ;
print ( "Extract the device kernels from an hcc executable.\n\n" ) ;
print ( "-h \t\t\t\tshow this help message\n" ) ;
print ( "-i <input> \t\t\t\tinput file\n" ) ;
exit ;
}
my $ debug = 0 ;
# use clang offload bundler (instead of "dd")
# to extract device object from the bundle
my $ use_clang_offload_bundler = 1 ;
my % options = ( ) ;
getopts ( 'hi:' , \ % options ) ;
if ( ! % options || defined $ options { h } ) {
usage ( ) ;
}
my $ input_file ;
defined $ options { i } || die ( "input not specified" ) ;
$ input_file = $ options { i } ;
( - f $ input_file ) || die ( "can't find $input_file" ) ;
2020-05-08 07:09:20 -04:00
# derive HIP_PATH via env var or use parent directory of extractkernel
my $ HIP_PATH = $ ENV { 'HIP_PATH' } // dirname ( Cwd:: abs_path ( "$0/../" ) ) ;
my $ HIP_COMPILER = `$HIP_PATH/bin/hipconfig --compiler` ;
my $ ROCM_PATH = `$HIP_PATH/bin/hipconfig --rocmpath` ;
my $ HIP_CLANG_PATH = `$HIP_PATH/bin/hipconfig --hipclangpath` ;
2020-04-17 01:01:06 -04:00
# look for llvm-objdump and clang-offload-bundler
my $ tools_path_prefix ;
my $ llvm_objdump ;
my $ clang_offload_bundler ;
2020-05-08 07:09:20 -04:00
if ( defined $ HIP_COMPILER and $ HIP_COMPILER eq "clang" ) {
# Search the path with respect to HIP_CLANG_PATH
$ tools_path_prefix = $ HIP_CLANG_PATH ;
2020-04-17 01:01:06 -04:00
}
else {
2020-05-08 07:09:20 -04:00
if ( defined $ HIP_COMPILER and $ HIP_COMPILER eq "hcc" ) {
# Search the path with respect to HCC_HOME if it is set, else search in ROCM_PATH
if ( defined $ ENV { 'HCC_HOME' } ) {
$ tools_path_prefix = File::Spec - > catfile ( $ ENV { 'HCC_HOME' } , "bin" ) ;
}
else {
$ tools_path_prefix = realpath ( $ ROCM_PATH . "/hcc/bin" ) ;
}
2020-04-17 01:01:06 -04:00
}
}
2020-05-08 07:09:20 -04:00
# Find llvm-objdump and clang-offload-bundler in the path set above
$ llvm_objdump = File::Spec - > catfile ( $ tools_path_prefix , "llvm-objdump" ) ;
$ clang_offload_bundler = File::Spec - > catfile ( $ tools_path_prefix , "clang-offload-bundler" ) ;
2020-04-17 01:01:06 -04:00
if ( ! ( - f $ llvm_objdump ) ) {
$ llvm_objdump = which ( "llvm-objdump" ) ;
if ( ! ( - f $ llvm_objdump ) ) {
die ( "Can't find llvm-objdump\n" ) ;
}
}
if ( ! ( - f $ clang_offload_bundler ) ) {
$ clang_offload_bundler = which ( "clang-offload-bundler" ) ;
if ( ! ( - f $ clang_offload_bundler ) ) {
die ( "Can't find clang-offload-bundler\n" ) ;
}
}
# kernel section information for HCC
my $ kernel_section_name = ".kernel" ;
my $ kernel_triple = "hcc-amdgcn-amd-amdhsa--" ;
my $ kernel_blob_alignment = 1 ;
my $ kernel_section_size = hex ( `objdump -h $input_file | grep $kernel_section_name | awk '{print \$3}'` ) ;
if ( ! $ kernel_section_size ) {
# If there isn't a section created by HCC,
# try to detect a kernel section created by HIP-Clang
$ kernel_section_name = ".hip_fatbin" ;
$ kernel_triple = "hip-amdgcn-amd-amdhsa-" ;
$ kernel_blob_alignment = 8 ;
$ kernel_section_size = hex ( `objdump -h $input_file | grep $kernel_section_name | awk '{print \$3}'` ) ;
$ kernel_section_size or die ( "No kernel section found\n" ) ;
}
my $ kernel_section_offset = hex ( `objdump -h $input_file | grep $kernel_section_name | awk '{print \$6}'` ) ;
my $ kernel_section_end = $ kernel_section_offset + $ kernel_section_size ;
if ( $ debug ) {
print "kernel section size: $kernel_section_size\n" ;
print "kernel section offset: $kernel_section_offset\n" ;
print "kernel section end: $kernel_section_end\n" ;
}
# parse kernel bundle header
open INPUT_FP , $ input_file || die $! ;
binmode INPUT_FP ;
my $ current_blob_offset = $ kernel_section_offset ;
my $ num_blobs = 0 ;
#while ($current_blob_offset < $kernel_section_end) {
while ( 1 ) {
# adjust the offset to the blob alignment
$ current_blob_offset = int ( ( $ current_blob_offset + ( $ kernel_blob_alignment - 1 ) ) / $ kernel_blob_alignment ) * $ kernel_blob_alignment ;
if ( $ debug ) {
print "Current blob offset: $current_blob_offset\n" ;
}
if ( $ current_blob_offset >= $ kernel_section_end ) {
if ( $ debug ) {
print "reached end of kernel section\n" ;
}
last ;
}
seek ( INPUT_FP , $ current_blob_offset , 0 ) ;
# skip OFFLOAD_BUNDLER_MAGIC_STR
my $ magic_str ;
my $ read_bytes = read ( INPUT_FP , $ magic_str , 24 ) ;
if ( ( $ read_bytes != 24 ) || ( $ magic_str ne "__CLANG_OFFLOAD_BUNDLE__" ) ) {
# didn't detect the bundle magic string
if ( $ debug ) {
print "Offload bundle magic string not detected\n" ;
}
last ;
}
# read number of bundles
my $ num_bundles ;
$ read_bytes = read ( INPUT_FP , $ num_bundles , 8 ) ;
$ read_bytes == 8 or die ( "Fail to parse number of bundles\n" ) ;
$ num_bundles = unpack ( "Q" , $ num_bundles ) ;
if ( $ debug ) {
print "Blob $num_blobs, number of bundles: $num_bundles\n" ;
}
# detected GPU targets
my @ asic_target_array ;
my $ last_bundle_offset = 0 ;
my $ last_bundle_size = 0 ;
# strings for creating new files
my $ file_blob_number = sprintf ( "%03d" , $ num_blobs ) ;
my $ filename_prefix = "${input_file}-${file_blob_number}" ;
my $ clang_offloadbundler_outputs = "-outputs=/dev/null" ;
my $ clang_offloadbundler_targets = "-targets=host-x86_64-unknown-linux" ;
for ( my $ iter = 0 ; $ iter < $ num_bundles ; $ iter + + ) {
# read bundle offset
my $ offset ;
$ read_bytes = read ( INPUT_FP , $ offset , 8 ) ;
$ read_bytes == 8 or die ( "Fail to parse bundle offset\n" ) ;
$ offset = unpack ( "Q" , $ offset ) ;
$ last_bundle_offset = max ( $ last_bundle_offset , $ offset ) ;
# read bundle size
my $ size ;
$ read_bytes = read ( INPUT_FP , $ size , 8 ) ;
$ read_bytes == 8 or die ( "Fail to parse bundle size\n" ) ;
$ size = unpack ( "Q" , $ size ) ;
if ( $ last_bundle_offset == $ offset ) {
$ last_bundle_size = $ size ;
}
# read triple size
my $ triple_size ;
$ read_bytes = read ( INPUT_FP , $ triple_size , 8 ) ;
$ read_bytes == 8 or die ( "Fail to parse triple size\n" ) ;
$ triple_size = unpack ( "Q" , $ triple_size ) ;
# triple
my $ triple ;
$ read_bytes = read ( INPUT_FP , $ triple , $ triple_size ) ;
$ read_bytes == $ triple_size or die ( "Fail to parse triple\n" ) ;
if ( $ debug ) {
print ( "\t bundle $iter: offset=$offset, size=$size, triple_size=$triple_size, triple=$triple\n" ) ;
}
# Only process GPU targets, skip host targets
my $ triple_pattern = "^$kernel_triple" ;
if ( $ triple =~ /$triple_pattern/ ) {
my $ asic_target = substr ( $ triple , length ( $ kernel_triple ) ) ;
# augment arguments for clang-offload-bundler
my $ hsaco_file_name = "${filename_prefix}-${asic_target}.hsaco" ;
$ clang_offloadbundler_outputs = "${clang_offloadbundler_outputs},${hsaco_file_name}" ;
$ clang_offloadbundler_targets = "${clang_offloadbundler_targets},${triple}" ;
# add into asic_target_array
$ asic_target_array [ $# asic_target_array + 1 ] = $ asic_target ;
if ( ! $ use_clang_offload_bundler ) {
my $ offset_for_hsaco = $ current_blob_offset + $ offset ;
my $ dd_command = "dd if=${input_file} of=${hsaco_file_name} skip=$offset_for_hsaco count=$size bs=1 status=none" ;
if ( $ debug ) {
print ( "extract code bundle with dd: $dd_command\n" ) ;
}
system ( $ dd_command ) == 0
or die ( "Fail to extract code bundle with dd\n" ) ;
}
} else {
#print("Host target: " . $Triple . "\n");
}
}
# extract the code blob
my $ blob_filename = "${filename_prefix}.bundle" ;
my $ write_bytes = $ last_bundle_offset + $ last_bundle_size ;
system ( "dd if=$input_file of=$blob_filename skip=$current_blob_offset count=$write_bytes bs=1 status=none" ) == 0
or die ( "Extracting kernel bundle file failed: $?" ) ;
if ( $ use_clang_offload_bundler ) {
# use clang-offload-bundler to unbundle HSACO
my $ command = "${clang_offload_bundler} -unbundle -type=o -inputs=${blob_filename} ${clang_offloadbundler_outputs} ${clang_offloadbundler_targets}" ;
if ( $ debug ) {
print ( "clang offload bundler command: $command\n" ) ;
}
system ( $ command ) == 0
or die ( "Fail to execute clang-offload-bundler" ) ;
}
for ( my $ iter = 0 ; $ iter <= $# asic_target_array ; $ iter + + ) {
my $ asic_target = $ asic_target_array [ $ iter ] ;
my $ hsaco_file_name = "${filename_prefix}-${asic_target}.hsaco" ;
my $ isa_file_name = "${filename_prefix}-${asic_target}.isa" ;
# use llvm-objdump to dump out GCN ISA
2020-04-23 12:21:33 -04:00
system ( "$llvm_objdump --disassemble --mcpu=$asic_target $hsaco_file_name > $isa_file_name" ) == 0 or die ( "Fail to disassemble AMDGPU ISA for target" . $ asic_target ) ;
2020-04-17 01:01:06 -04:00
if ( $ debug ) {
print ( "Generated GCN ISA for " . $ asic_target . " at: " . $ isa_file_name . "\n" ) ;
}
}
$ current_blob_offset = $ current_blob_offset + $ last_bundle_offset + $ last_bundle_size ;
$ num_blobs + + ;
}
$ num_blobs or die ( "No device code found.\n" ) ;
exit ( 0 ) ;