add new code object tooling: roc-obj-ls and roc-obj-extract

SWDEV-229776 Change-Id: Icb11c4552515d3eff3bcf303c4a470ab90d1dd58
2020-06-30 12:09:30 -04:00
commit cb859b7263
@@ -111,6 +111,8 @@ else()
 endif()
 message ( "Using CPACK_DEBIAN_PACKAGE_RELEASE ${CPACK_DEBIAN_PACKAGE_RELEASE}" )

+set(CPACK_DEBIAN_PACKAGE_DEPENDS "liburi-encode-perl libfile-basedir-perl, libfile-copy-recursive-perl, libfile-listing-perl, libfile-which-perl")
+
 ## RPM package specific variables
 if ( DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE} )
  set ( CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE} )
@@ -118,6 +120,8 @@ else()
  set ( CPACK_RPM_PACKAGE_RELEASE "local" )
 endif()

+set(CPACK_RPM_PACKAGE_REQUIRES "perl(URI::Encode), perl(File::Copy), perl(File::Listing), perl(File::Which), perl(File::BaseDir)")
+
 ## 'dist' breaks manual builds on debian systems due to empty Provides
 execute_process( COMMAND rpm --eval %{?dist}
                 RESULT_VARIABLE PROC_RESULT
@@ -41,6 +41,7 @@ HIP releases are typically naming convention for each ROCM release to help diffe
 - [HIP Porting Driver Guide](docs/markdown/hip_porting_driver_api.md)
 - [HIP Programming Guide](docs/markdown/hip_programming_guide.md)
 - [HIP Logging ](docs/markdown/hip_logging.md)
+- [Code Object tooling ] (docs/markdown/obj_tooling.md)
 - [HIP Terminology](docs/markdown/hip_terms2.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/OpenCL)
 - [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md)
 - Supported CUDA APIs:
@@ -0,0 +1,209 @@
+#!/usr/bin/perl
+use strict;
+use File::Copy;
+use File::Spec;
+use File::Basename;
+use File::Which;
+use Cwd 'realpath';
+use Getopt::Std;
+use List::Util qw(max);
+use URI::Encode;
+
+my $extract_range_specifier;
+my $extract_pid;
+my $extract_file;
+my $output_file;
+my $output_path;
+my $extract_offset;
+my $extract_size;
+my $pid_running;
+my $verbose=0;
+my $error=0;
+my $output_to_stdout=0;
+
+sub usage {
+  print("Usage: $0 [-o|v|h] URI... \n");
+  print("  URIs can be read from STDIN, one per line.\n");
+  print("  From the URIs specified, extracts code objects into files named: ");
+  print("<executable_name>-[pid<number>]-offset<number>-size<number>.co\n\n");
+  print("Options:\n");
+  print("  -o <path> \tPath for output. If \"-\" specified, code object is printed to STDOUT.\n");
+  print("  -v        \tVerbose output to STDOUT (includes Entry ID).\n");
+  print("  -h        \tShow this help message.\n");
+  print("\nURI syntax:\n");
+  print("\tcode_object_uri ::== file_uri | memory_uri\n");
+  print("\tfile_uri        ::== \"file://\" extract_file [ range_specifier ]\n");
+  print("\tmemory_uri      ::== \"memory://\" process_id range_specifier\n");
+  print("\trange_specifier ::== [ \"#\" | \"?\" ] \"offset=\" number \"&\" \"size=\" number\n");
+  print("\textract_file    ::== URI_ENCODED_OS_FILE_PATH\n");
+  print("\tprocess_id      ::== DECIMAL_NUMBER\n");
+  print("\tnumber          ::== HEX_NUMBER \| DECIMAL_NUMBER \| OCTAL_NUMBER\n\n");
+  print("\tExample: file://dir1/dir2/hello_world#offset=133&size=14472 \n");
+  print("\t         memory://1234#offset=0x20000&size=3000\n\n");
+
+  exit($error);
+}
+
+# Process options
+my %options=();
+getopts('vho:', \%options);
+
+if (defined $options{h}) {
+  usage();
+}
+
+if (defined $options{v}) {
+  $verbose = 1;
+}
+
+if (defined $options{o}) {
+  $output_path = $options{o};
+  if ($output_path eq "-") {
+    $output_to_stdout=1;
+  } else {
+    (-d $output_path) || die("Error: Path \'$output_path\' cannot be found.\n");
+  }
+}
+
+# push STDIN to ARGV array.
+push @ARGV, <STDIN> unless -t STDIN;
+
+# error check: enough arguments presented.
+if ($#ARGV < 0) {
+  print(STDERR "Error: No arguments.\n"); $error++;
+  usage();
+}
+
+# error check: command dd is available.
+my $dd_cmd = which("dd");
+(-f $dd_cmd) || die("Error: Can't find dd command\n");
+
+foreach my $uri_str(@ARGV) {
+  chomp $uri_str;
+
+  # we expect the URI to follow this BNF syntax:
+  #
+  #   code_object_uri ::== file_uri | memory_uri
+  #   file_uri        ::== "file://" extract_file [ range_specifier ]
+  #   memory_uri      ::== "memory://" process_id range_specifier
+  #     range_specifier  ::== [ "#" | "?" ] "offset=" number "&" "size=" number
+  #     extract_file        ::== URI_ENCODED_OS_FILE_PATH
+  #     process_id       ::== DECIMAL_NUMBER
+  #     number           ::== HEX_NUMBER | DECIMAL_NUMBER | OCTAL_NUMBER
+
+  # Example: file://dir1/dir2/hello_world#offset=133&size=14472
+  #          memory://1234#offset=0x20000&size=3000
+
+  my ($uri_protocol, $specs) = split(/:\/\//,$uri_str);
+  my $obj_uri_encode = URI::Encode->new();
+  my $decoded_extract_file;
+
+  if (lc($uri_protocol) eq "file") {
+    # expect file path
+    ($extract_file, $extract_range_specifier) = split(/[#,?]/,$specs);
+
+    # decode the file name. URIs may have file/path names with non-alphanumeric characters, which will be encoded with %.  We need to decode these.
+    $decoded_extract_file = $obj_uri_encode->decode($extract_file);
+
+    # verify file exists:
+    if (! -e $decoded_extract_file) {
+      print(STDERR "Error: can't find file: $decoded_extract_file\n"); $error++;
+      next;
+    }
+
+    # use the output_path is specified, otherwise use current working dir.
+    if ($output_path ne "") {
+      $output_file = File::Spec->catfile($output_path, basename($decoded_extract_file));
+    } else {
+      $output_file = basename($decoded_extract_file);
+    }
+
+  } elsif ( lc($uri_protocol) eq "memory") {
+    # expect memory specifier
+    ($extract_pid, $extract_range_specifier) = split(/[#,?]/,$specs);
+
+    # verify pid is currently running
+    $pid_running = kill 0, $extract_pid;
+    if (! $pid_running) {
+      print(STDERR "Error: PID: $extract_pid is NOT running\n"); $error++;
+      next;
+    }
+
+    # get pid filename:
+    $extract_file = "/proc/$extract_pid/mem";
+   
+    # verify file exists:
+    if (! -e $extract_file) {
+      print(STDERR "Error: can't find file: $extract_file\n"); $error++;
+      next;
+    }
+
+    # for extracting from a pid, make the output file in the current dir/path with the pid value as a name.
+    $output_file = "pid${extract_pid}";
+    
+    # need to set $decoded_extract_file, because later we use this for other checks.
+    $decoded_extract_file = $extract_file;
+
+  } else {
+    # error, unrecognized Code Object URI
+    print(STDERR "Error: \'$uri_protocol\' is not recognized as a supported code object URI.\n"); $error++;
+    next;
+  }
+
+  # it is valid to not give a range specifier in a URI, in which case the entire code object will be extracted.
+  if ($extract_range_specifier ne "") {
+    ($extract_offset, $extract_size) = split(/[&]/,$extract_range_specifier);
+    (undef, $extract_offset) = split(/=/,$extract_offset);
+    (undef, $extract_size) = split(/=/,$extract_size);
+  } else {
+    # Error if URI is a memory request, and we have no range_specifier.
+    if ($pid_running) {
+      print(STDERR "Error: must specify a Range Specifier (offset and size) for a memory URI: $uri_str\n"); $error++;
+      next;
+    }
+
+    $extract_offset = 0;
+    $extract_size = -s $decoded_extract_file;
+  }
+
+  # We should have at least a valid size to extract; ignore cases with size=0.
+  if ($extract_size != 0) {
+    print("Reading input file \"$extract_file\" ...\n") if ($verbose);
+
+    # only if this is a File URI.
+    if (lc($uri_protocol) eq "file") {
+      # verify that offset+size does not exceed file size:
+      my $file_size = -s $decoded_extract_file;
+      my $size = int($extract_offset) + int($extract_size);
+      if ( $size > $file_size ) {
+        print(STDERR "Error: requested offset($extract_offset) + size($extract_size) exceeds file size($file_size) for file \"$decoded_extract_file\".\n"); $error++;
+        next;
+      }
+    }
+
+    open(INPUT_FP, "<", $decoded_extract_file) || die $!;
+    binmode INPUT_FP;
+
+    # extract the code object
+    my $co_filename;
+    if (!$output_to_stdout) {
+      $co_filename = "of=\'${output_file}-offset${extract_offset}-size${extract_size}.co\'";
+    }
+
+    my $dd_cmd_str = "$dd_cmd if=\'$decoded_extract_file\' $co_filename skip=$extract_offset count=$extract_size bs=1 status=none";
+
+    print("DD Command: $dd_cmd_str\n") if ($verbose);
+
+    my $dd_ret = system($dd_cmd_str);
+    if ($dd_ret != 0) {
+       print(STDERR "Error: DD command ($dd_cmd_str)  failed with RC: $dd_ret\n"); $error++;
+    }
+
+    print("Extract request:  file: $extract_file offset: $extract_offset size: $extract_size\n") if ($verbose);
+  } else {
+    print("Warning: trying to extract from $extract_file at offset=$extract_offset with size=0.  Nothing to extract.\n") if ($verbose);
+  }
+
+} # end of for each (URI) argument
+
+exit($error);
@@ -0,0 +1,136 @@
+#!/usr/bin/perl
+use strict;
+use File::Copy;
+use File::Spec;
+use File::Basename;
+use File::Which;
+use Cwd 'realpath';
+use Getopt::Std;
+use List::Util qw(max);
+use URI::Encode;
+
+sub usage {
+  print("Usage: $0 [-v|h] executable...\n");
+  print("List the URIs of the code objects embedded in the specfied host executables.\n");
+  print("-v \tVerbose output (includes Entry ID)\n");
+  print("-h \tShow this help message\n");
+  exit;
+}
+
+# sub to read a qword. 1st arg is a FP, 2nd arg is ref to destination var.
+sub readq {
+ my ($input_fp, $qword) = @_;
+ read($input_fp, my $bytes, 8) == 8 or die("Error: Failed to read 8 bytes\n");
+ ${$qword} = unpack("Q<", $bytes);
+}
+
+# Process options
+my %options=();
+getopts('vhd', \%options);
+
+if (defined $options{h}) {
+  usage();
+}
+
+my $verbose = $options{v};
+my $debug = $options{d};
+
+# look for objdump
+my $objdump = which("objdump");
+(-f $objdump) || die("Error: Can't find objdump command\n");
+
+# for each argument (which should be an executable):
+foreach my $executable_file(@ARGV) {
+
+  # debug message
+  print("Reading input file \"$executable_file\" ...\n") if ($debug);
+
+  # verify/open file specified.
+  open (INPUT_FP, "<", $executable_file) || die("Error: failed to open file: $executable_file\n");
+  binmode INPUT_FP;
+
+  # kernel section information
+  my $escaped_name=quotemeta($executable_file);
+  my $bundle_section_name = ".hip_fatbin";
+  my $bundle_section_size = hex(`$objdump -h $escaped_name | grep $bundle_section_name | awk '{print \$3}'`);
+  my $bundle_section_offset =  hex(`$objdump -h $escaped_name | grep $bundle_section_name | awk '{print \$6}'`);
+
+  $bundle_section_size or die("Error: No kernel section found\n");
+
+  my $bundle_section_end = $bundle_section_offset + $bundle_section_size;
+
+  if ($debug) {
+    print "Code Objects Bundle section size: $bundle_section_size\n";
+    print "Code Objects Bundle section offset: $bundle_section_offset\n";
+    print "Code Objects Bundle section end: $bundle_section_end\n";
+  }
+
+  my $current_bundle_offset = $bundle_section_offset;
+  print "Current Bundle offset: $current_bundle_offset\n" if ($debug);
+
+  # move fp to current_bundle_offset.
+  seek(INPUT_FP, $current_bundle_offset, 0);
+
+  # skip OFFLOAD_BUNDLER_MAGIC_STR
+  my $magic_str;
+  my $read_bytes = read(INPUT_FP, $magic_str, 24);
+  if (($read_bytes != 24) || ($magic_str ne "__CLANG_OFFLOAD_BUNDLE__")) {
+    print(STDERR "Error: Offload bundle magic string not detected\n") if ($debug);
+    last;
+  }
+
+  # read number of bundle entries, which are code objects.
+  my $num_codeobjects;
+  readq(\*INPUT_FP,\$num_codeobjects);
+  # $num_codeobjects = unpack("Q<", $num_codeobjects);
+
+  # Listing
+  print "Bundle of $num_codeobjects HIP Code Objects:\n" if ($verbose);
+
+  # strings for creating new files
+  my $file_co_number = sprintf("%03d", $num_codeobjects);
+  my $filename_prefix = "${executable_file}-${file_co_number}";
+
+  print("Entry ID:\t\t\tURI:\n") if ($verbose);
+
+  # for each Bundle entry (code object)  ....
+  for (my $iter = 0; $iter < $num_codeobjects; $iter++) {
+
+    # read bundle entry (code object) offset
+    my $entry_offset;
+    my $abs_offset;
+    readq(*INPUT_FP,\$entry_offset);
+    print("entry_offset: $entry_offset\n") if $debug;
+
+    # read bundle entry (code object) size
+    my $entry_size;
+    readq(*INPUT_FP,\$entry_size);
+    print("entry_size: $entry_size\n") if $debug;
+
+    # read triple size
+    my $triple_size;
+    readq(*INPUT_FP,\$triple_size);
+    print("triple_size: $triple_size\n") if $debug;
+
+    # read triple string
+    my $triple;
+    my $read_bytes = read(INPUT_FP, $triple, $triple_size);
+    $read_bytes == $triple_size or die("Error: Fail to parse triple\n");
+    print("triple: $triple\n") if $debug;
+
+    # because the bundle entry's offset is relative to the beginning of the bundled code object section.
+    $abs_offset = int($entry_offset) + $bundle_section_offset;
+
+    my $obj_uri_encode = URI::Encode->new();
+    my $encoded_executable_file = $obj_uri_encode->encode($executable_file);
+
+    if ($verbose) {
+      print(STDOUT "$triple\tfile:\/\/$encoded_executable_file#offset=$abs_offset\&size=$entry_size\n");
+    } else {
+      print(STDOUT "file:\/\/$encoded_executable_file#offset=$abs_offset\&size=$entry_size\n");
+    }
+
+  } # End of for each Bundle entry (code object) ...
+} # End of for each command line argument
+
+exit(0);
@@ -0,0 +1,67 @@
+# ROCm Code Object tooling
+
+ROCm compiler generated code objects (executables, object files, and shared object libraries) can be examined and code objects extracted with the following tools.
+
+## URI syntax:
+
+  ROCm Code Objects can be listed/accessed using the following URI syntax:
+```
+	code_object_uri ::== file_uri | memory_uri
+	file_uri        ::== file:// extract_file [ range_specifier ]
+	memory_uri      ::== memory:// process_id range_specifier
+	range_specifier ::== [ # | ? ] offset= number & size= number
+	extract_file    ::== URI_ENCODED_OS_FILE_PATH
+	process_id      ::== DECIMAL_NUMBER
+	number          ::== HEX_NUMBER | DECIMAL_NUMBER | OCTAL_NUMBER
+```
+  Example: file://dir1/dir2/hello_world#offset=133&size=14472
+           memory://1234#offset=0x20000&size=3000
+
+
+## List available ROCm Code Objects: rocm-obj-ls
+
+  Use this tool to list available ROCm code objects.  Code objects are listed using URI syntax.
+
+  Usage: roc-obj-ls [-v|h] executable...
+  List the URIs of the code objects embedded in the specfied host executables.
+    -v Verbose output (includes Entry ID)
+    -h Show this help message
+
+
+## Extract ROCm Code Objects: rocm-obj-extract
+
+  Extracts available ROCm code objects from specified URI.
+
+  Usage: rocm-obj-extract [-o|v|h] URI...
+    - URIs can be read from STDIN, one per line.
+    - From the URIs specified, extracts code objects into files named: <executable_name>-[pid<number>]-offset<number>-size<number>.co
+
+  Options:
+    -o <path> Path for output. If "-" specified, code object is printed to STDOUT.
+    -v        Verbose output (includes Entry ID).
+    -h        Show this help message
+
+  Note, when specifying a URI argument to roc-obj-extract, if cut and pasting the output from roc-obj-ls you need to escape the '&' character or your shell will interpret it as the option to run the command as a background process.
+  As an example, if roc-obj-ls generates a URI like this ```file://my_exe#offset=24576&size=46816xxi```, you need to use the following argument to roc-obj-extract: ```file://my_exe#offset=24576\&size=46816```
+
+## Examples:
+
+### Dump all code objects to current directory:
+    roc-obj-ls <exe> | roc-obj-extract
+
+### Dump the ISA for gfx906:
+    roc-obj-ls -v <exe> | grep "gfx906" | awk '{print $2}' | roc-obj-extract -o - | llvm-objdump -d - > <exe>.gfx906.isa
+
+### Check the e_flags of the gfx908 code object:
+    roc-obj-ls -v <exe> | grep "gfx908" | awk '{print $2}' | roc-obj-extract -o - | llvm-readelf -h - | grep Flags
+
+### Disassemble the fourth code object:
+    roc-obj-ls <exe> | sed -n 4p | roc-obj-extract -o - | llvm-objdump -d -
+
+### Sort embedded code objects by size:
+    for uri in $(roc-obj-ls <exe>); do printf "%d: %s\n" "$(roc-obj-extract -o - "$uri" | wc -c)" "$uri"; done | sort -n
+
+### Compare disassembly of gfx803 and gfx900 code objects:
+    dis() { roc-obj-ls -v <exe> | grep "$1" | awk '{print $2}' | roc-obj-extract -o - | llvm-objdump -d -; }
+    diff <(dis gfx803) <(dis gfx900)
+