Files
rocm-systems/samples/BitionicSort/bitonic_sort_kernel.hsail
T
Ding, Wei (xN/A) TX 41dc8c58e7 ECR #333755 - Added HSA SDK app: BitonicSort
[git-p4: depot-paths = "//depot/stg/hsa/drivers/hsa/runtime/": change = 1125122]
2015-02-25 18:21:47 -05:00

98 lines
3.3 KiB
Plaintext

version 0:20140528:$full:$large;
extension "amd:gcn";
extension "IMAGE";
decl prog function &abort()();
prog kernel &__OpenCL_bitonicSort_kernel(
kernarg_u64 %__global_offset_0,
kernarg_u64 %__global_offset_1,
kernarg_u64 %__global_offset_2,
kernarg_u64 %__printf_buffer,
kernarg_u64 %__vqueue_pointer,
kernarg_u64 %__aqlwrap_pointer,
kernarg_u64 %theArray,
kernarg_u32 %stage,
kernarg_u32 %passOfStage,
kernarg_u32 %direction)
{
pragma "AMD RTI", "ARGSTART:__OpenCL_bitonicSort_kernel";
pragma "AMD RTI", "version:3:1:104";
pragma "AMD RTI", "device:generic";
pragma "AMD RTI", "uniqueid:1024";
pragma "AMD RTI", "memory:private:0";
pragma "AMD RTI", "memory:region:0";
pragma "AMD RTI", "memory:local:0";
pragma "AMD RTI", "value:__global_offset_0:u64:1:1:0";
pragma "AMD RTI", "value:__global_offset_1:u64:1:1:16";
pragma "AMD RTI", "value:__global_offset_2:u64:1:1:32";
pragma "AMD RTI", "pointer:__printf_buffer:u8:1:1:48:uav:7:1:RW:0:0:0";
pragma "AMD RTI", "value:__vqueue_pointer:u64:1:1:64";
pragma "AMD RTI", "value:__aqlwrap_pointer:u64:1:1:80";
pragma "AMD RTI", "pointer:theArray:u32:1:1:96:uav:7:4:RW:0:0:0";
pragma "AMD RTI", "value:stage:u32:1:1:112";
pragma "AMD RTI", "constarg:7:stage";
pragma "AMD RTI", "value:passOfStage:u32:1:1:128";
pragma "AMD RTI", "constarg:8:passOfStage";
pragma "AMD RTI", "value:direction:u32:1:1:144";
pragma "AMD RTI", "constarg:9:direction";
pragma "AMD RTI", "function:1:0";
pragma "AMD RTI", "memory:64bitABI";
pragma "AMD RTI", "privateid:8";
pragma "AMD RTI", "enqueue_kernel:0";
pragma "AMD RTI", "kernel_index:0";
pragma "AMD RTI", "reflection:0:size_t";
pragma "AMD RTI", "reflection:1:size_t";
pragma "AMD RTI", "reflection:2:size_t";
pragma "AMD RTI", "reflection:3:size_t";
pragma "AMD RTI", "reflection:4:size_t";
pragma "AMD RTI", "reflection:5:size_t";
pragma "AMD RTI", "reflection:6:uint*";
pragma "AMD RTI", "reflection:7:uint";
pragma "AMD RTI", "reflection:8:uint";
pragma "AMD RTI", "reflection:9:uint";
pragma "AMD RTI", "ARGEND:__OpenCL_bitonicSort_kernel";
@__OpenCL_bitonicSort_kernel_entry:
// BB#0: // %entry
workitemabsid_u32 $s0, 0;
cvt_u64_u32 $d0, $s0;
ld_kernarg_align(8)_width(all)_u64 $d1, [%__global_offset_0];
add_u64 $d0, $d0, $d1;
cvt_u32_u64 $s1, $d0;
ld_kernarg_align(4)_width(all)_u32 $s2, [%stage];
shl_u32 $s0, 1, $s2;
and_b32 $s0, $s1, $s0;
ld_kernarg_align(4)_width(all)_u32 $s3, [%direction];
sub_u32 $s4, 1, $s3;
cmp_eq_b1_s32 $c0, $s0, 0;
cmov_b32 $s0, $c0, $s3, $s4;
ld_kernarg_align(4)_width(all)_u32 $s3, [%passOfStage];
sub_u32 $s4, $s2, $s3;
shl_u32 $s2, 1, $s4;
add_u32 $s3, $s2, 4294967295;
and_b32 $s3, $s1, $s3;
shr_u32 $s1, $s1, $s4;
mul_u32 $s1, $s2, $s1;
shl_u32 $s1, $s1, 1;
ld_kernarg_align(8)_width(all)_u64 $d1, [%theArray];
add_u32 $s3, $s1, $s3;
cvt_u64_u32 $d0, $s3;
shl_u64 $d0, $d0, 2;
add_u64 $d0, $d1, $d0;
ld_global_align(4)_u32 $s1, [$d0];
add_u32 $s2, $s3, $s2;
cvt_u64_u32 $d2, $s2;
shl_u64 $d2, $d2, 2;
add_u64 $d1, $d1, $d2;
ld_global_align(4)_u32 $s3, [$d1];
max_u32 $s2, $s1, $s3;
min_u32 $s1, $s1, $s3;
cmp_eq_b1_s32 $c0, $s0, 0;
cmov_b32 $s0, $c0, $s1, $s2;
cmov_b32 $s1, $c0, $s2, $s1;
st_global_align(4)_u32 $s1, [$d0];
st_global_align(4)_u32 $s0, [$d1];
ret;
};