83f0c4402e
Change-Id: I48d6332502774485d7ced3fee065a74f15774500
259 wiersze
8.4 KiB
Perl
Executable File
259 wiersze
8.4 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
use strict;
|
|
use File::Copy;
|
|
use File::Spec;
|
|
use File::Basename;
|
|
use File::Which;
|
|
use Cwd 'realpath';
|
|
use Getopt::Std;
|
|
use List::Util qw(max);
|
|
|
|
sub usage {
|
|
print("Usage: $0 [OPTION]... -i <input>\n");
|
|
print("Extract the device kernels from an hcc executable.\n\n");
|
|
print("-h \t\t\t\tshow this help message\n");
|
|
print("-i <input> \t\t\t\tinput file\n");
|
|
exit;
|
|
}
|
|
|
|
my $debug = 0;
|
|
|
|
# use clang offload bundler (instead of "dd")
|
|
# to extract device object from the bundle
|
|
my $use_clang_offload_bundler = 1;
|
|
|
|
my %options=();
|
|
getopts('hi:', \%options);
|
|
|
|
if (!%options || defined $options{h}) {
|
|
usage();
|
|
}
|
|
|
|
my $input_file;
|
|
defined $options{i} || die("input not specified");
|
|
$input_file = $options{i};
|
|
(-f $input_file) || die("can't find $input_file");
|
|
|
|
# derive HIP_PATH via env var or use parent directory of extractkernel
|
|
my $HIP_PATH=$ENV{'HIP_PATH'} // dirname(Cwd::abs_path("$0/../"));
|
|
my $HIP_COMPILER = `$HIP_PATH/bin/hipconfig --compiler`;
|
|
my $ROCM_PATH = `$HIP_PATH/bin/hipconfig --rocmpath`;
|
|
my $HIP_CLANG_PATH = `$HIP_PATH/bin/hipconfig --hipclangpath`;
|
|
|
|
# look for llvm-objdump and clang-offload-bundler
|
|
my $tools_path_prefix;
|
|
my $llvm_objdump;
|
|
my $clang_offload_bundler;
|
|
|
|
if (defined $HIP_COMPILER and $HIP_COMPILER eq "clang"){
|
|
# Search the path with respect to HIP_CLANG_PATH
|
|
$tools_path_prefix = $HIP_CLANG_PATH;
|
|
}
|
|
else {
|
|
if (defined $HIP_COMPILER and $HIP_COMPILER eq "hcc") {
|
|
# Search the path with respect to HCC_HOME if it is set, else search in ROCM_PATH
|
|
if (defined $ENV{'HCC_HOME'}) {
|
|
$tools_path_prefix = File::Spec->catfile($ENV{'HCC_HOME'}, "bin");
|
|
}
|
|
else {
|
|
$tools_path_prefix = realpath($ROCM_PATH."/hcc/bin");
|
|
}
|
|
}
|
|
}
|
|
# Find llvm-objdump and clang-offload-bundler in the path set above
|
|
$llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump");
|
|
$clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler");
|
|
|
|
if (!(-f $llvm_objdump)) {
|
|
$llvm_objdump = which("llvm-objdump");
|
|
if (!(-f $llvm_objdump)) {
|
|
die("Can't find llvm-objdump\n");
|
|
}
|
|
}
|
|
|
|
if (!(-f $clang_offload_bundler)) {
|
|
$clang_offload_bundler = which("clang-offload-bundler");
|
|
if (!(-f $clang_offload_bundler)) {
|
|
die("Can't find clang-offload-bundler\n");
|
|
}
|
|
}
|
|
|
|
# kernel section information for HCC
|
|
my $kernel_section_name = ".kernel";
|
|
my $kernel_triple = "hcc-amdgcn-amd-amdhsa--";
|
|
my $kernel_blob_alignment = 1;
|
|
|
|
my $kernel_section_size = hex(`objdump -h $input_file | grep $kernel_section_name | awk '{print \$3}'`);
|
|
if (!$kernel_section_size) {
|
|
|
|
# If there isn't a section created by HCC,
|
|
# try to detect a kernel section created by HIP-Clang
|
|
$kernel_section_name = ".hip_fatbin";
|
|
$kernel_triple = "hip-amdgcn-amd-amdhsa-";
|
|
$kernel_blob_alignment = 8;
|
|
|
|
$kernel_section_size = hex(`objdump -h $input_file | grep $kernel_section_name | awk '{print \$3}'`);
|
|
$kernel_section_size or die("No kernel section found\n");
|
|
}
|
|
|
|
my $kernel_section_offset = hex(`objdump -h $input_file | grep $kernel_section_name | awk '{print \$6}'`);
|
|
my $kernel_section_end = $kernel_section_offset + $kernel_section_size;
|
|
if ($debug) {
|
|
print "kernel section size: $kernel_section_size\n";
|
|
print "kernel section offset: $kernel_section_offset\n";
|
|
print "kernel section end: $kernel_section_end\n";
|
|
}
|
|
|
|
# parse kernel bundle header
|
|
open INPUT_FP, $input_file || die $!;
|
|
binmode INPUT_FP;
|
|
|
|
my $current_blob_offset = $kernel_section_offset;
|
|
my $num_blobs = 0;
|
|
#while ($current_blob_offset < $kernel_section_end) {
|
|
while(1) {
|
|
|
|
# adjust the offset to the blob alignment
|
|
$current_blob_offset = int(($current_blob_offset + ($kernel_blob_alignment - 1)) / $kernel_blob_alignment) * $kernel_blob_alignment;
|
|
if ($debug) {
|
|
print "Current blob offset: $current_blob_offset\n";
|
|
}
|
|
|
|
if ($current_blob_offset >= $kernel_section_end) {
|
|
if ($debug) {
|
|
print "reached end of kernel section\n";
|
|
}
|
|
last;
|
|
}
|
|
|
|
seek(INPUT_FP, $current_blob_offset, 0);
|
|
|
|
# skip OFFLOAD_BUNDLER_MAGIC_STR
|
|
my $magic_str;
|
|
my $read_bytes = read(INPUT_FP, $magic_str, 24);
|
|
if (($read_bytes != 24) || ($magic_str ne "__CLANG_OFFLOAD_BUNDLE__")) {
|
|
# didn't detect the bundle magic string
|
|
if ($debug) {
|
|
print "Offload bundle magic string not detected\n";
|
|
}
|
|
last;
|
|
}
|
|
# read number of bundles
|
|
my $num_bundles;
|
|
$read_bytes = read(INPUT_FP, $num_bundles, 8);
|
|
$read_bytes == 8 or die("Fail to parse number of bundles\n");
|
|
$num_bundles = unpack("Q", $num_bundles);
|
|
if ($debug) {
|
|
print "Blob $num_blobs, number of bundles: $num_bundles\n";
|
|
}
|
|
|
|
# detected GPU targets
|
|
my @asic_target_array;
|
|
|
|
my $last_bundle_offset = 0;
|
|
my $last_bundle_size = 0;
|
|
|
|
# strings for creating new files
|
|
my $file_blob_number = sprintf("%03d", $num_blobs);
|
|
my $filename_prefix = "${input_file}-${file_blob_number}";
|
|
|
|
my $clang_offloadbundler_outputs="-outputs=/dev/null";
|
|
my $clang_offloadbundler_targets="-targets=host-x86_64-unknown-linux";
|
|
|
|
for (my $iter = 0; $iter < $num_bundles; $iter++) {
|
|
# read bundle offset
|
|
my $offset;
|
|
$read_bytes = read(INPUT_FP, $offset, 8);
|
|
$read_bytes == 8 or die("Fail to parse bundle offset\n");
|
|
$offset = unpack("Q", $offset);
|
|
$last_bundle_offset = max($last_bundle_offset, $offset);
|
|
|
|
# read bundle size
|
|
my $size;
|
|
$read_bytes = read(INPUT_FP, $size, 8);
|
|
$read_bytes == 8 or die("Fail to parse bundle size\n");
|
|
$size = unpack("Q", $size);
|
|
if ($last_bundle_offset == $offset) {
|
|
$last_bundle_size = $size;
|
|
}
|
|
|
|
# read triple size
|
|
my $triple_size;
|
|
$read_bytes = read(INPUT_FP, $triple_size, 8);
|
|
$read_bytes == 8 or die("Fail to parse triple size\n");
|
|
$triple_size = unpack("Q", $triple_size);
|
|
|
|
# triple
|
|
my $triple;
|
|
$read_bytes = read(INPUT_FP, $triple, $triple_size);
|
|
$read_bytes == $triple_size or die("Fail to parse triple\n");
|
|
|
|
if ($debug) {
|
|
print("\t bundle $iter: offset=$offset, size=$size, triple_size=$triple_size, triple=$triple\n");
|
|
}
|
|
|
|
# Only process GPU targets, skip host targets
|
|
my $triple_pattern = "^$kernel_triple";
|
|
if ($triple =~ /$triple_pattern/) {
|
|
my $asic_target = substr($triple, length($kernel_triple));
|
|
|
|
# augment arguments for clang-offload-bundler
|
|
my $hsaco_file_name = "${filename_prefix}-${asic_target}.hsaco";
|
|
$clang_offloadbundler_outputs = "${clang_offloadbundler_outputs},${hsaco_file_name}";
|
|
$clang_offloadbundler_targets = "${clang_offloadbundler_targets},${triple}";
|
|
|
|
# add into asic_target_array
|
|
$asic_target_array[$#asic_target_array + 1]=$asic_target;
|
|
|
|
if (!$use_clang_offload_bundler) {
|
|
my $offset_for_hsaco = $current_blob_offset + $offset;
|
|
my $dd_command ="dd if=${input_file} of=${hsaco_file_name} skip=$offset_for_hsaco count=$size bs=1 status=none";
|
|
if ($debug) {
|
|
print("extract code bundle with dd: $dd_command\n");
|
|
}
|
|
system($dd_command) == 0
|
|
or die("Fail to extract code bundle with dd\n");
|
|
}
|
|
|
|
} else {
|
|
#print("Host target: " . $Triple . "\n");
|
|
}
|
|
}
|
|
|
|
# extract the code blob
|
|
my $blob_filename = "${filename_prefix}.bundle";
|
|
my $write_bytes = $last_bundle_offset + $last_bundle_size;
|
|
system("dd if=$input_file of=$blob_filename skip=$current_blob_offset count=$write_bytes bs=1 status=none") == 0
|
|
or die("Extracting kernel bundle file failed: $?");
|
|
|
|
if ($use_clang_offload_bundler) {
|
|
# use clang-offload-bundler to unbundle HSACO
|
|
my $command = "${clang_offload_bundler} -unbundle -type=o -inputs=${blob_filename} ${clang_offloadbundler_outputs} ${clang_offloadbundler_targets}";
|
|
if ($debug) {
|
|
print("clang offload bundler command: $command\n");
|
|
}
|
|
system($command) == 0
|
|
or die("Fail to execute clang-offload-bundler");
|
|
}
|
|
|
|
for (my $iter = 0; $iter <= $#asic_target_array; $iter++) {
|
|
my $asic_target = $asic_target_array[$iter];
|
|
my $hsaco_file_name = "${filename_prefix}-${asic_target}.hsaco";
|
|
my $isa_file_name = "${filename_prefix}-${asic_target}.isa";
|
|
|
|
# use llvm-objdump to dump out GCN ISA
|
|
system("$llvm_objdump --disassemble --mcpu=$asic_target $hsaco_file_name > $isa_file_name") == 0 or die("Fail to disassemble AMDGPU ISA for target" . $asic_target);
|
|
|
|
if ($debug) {
|
|
print("Generated GCN ISA for " . $asic_target . " at: " . $isa_file_name . "\n");
|
|
}
|
|
}
|
|
|
|
$current_blob_offset = $current_blob_offset + $last_bundle_offset + $last_bundle_size;
|
|
$num_blobs++;
|
|
}
|
|
|
|
$num_blobs or die("No device code found.\n");
|
|
exit(0);
|
|
|