39 KiB
39 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBuffer.kd | 0 | 0 | 0 | 937113 | 937118 | 33554432 | 256 | 0 | 0 | 8 | 32 | 6464 | 0x0 | 0x7f12eac04180 | 12076635381227990 | 12076635381273678 | 12076635381598795 | 12076635381689318 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 937113 | 937118 | 32768 | 256 | 0 | 0 | 24 | 24 | 12480 | 0x0 | 0x7f12eac35100 | 12076635396787522 | 12076635396887551 | 12076635396894271 | 12076635396915901 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 4 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 12928 | 0x7f141aca3900 | 0x7f12eac35140 | 12076635396946107 | 12076635396958910 | 12076635397089629 | 12076635397093471 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 6 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 36 | 24 | 13632 | 0x7f141aca3800 | 0x7f12eac35180 | 12076635397153844 | 12076635397172188 | 12076635397417626 | 12076635397502262 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 8 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 14080 | 0x7f141aca3700 | 0x7f12eac351c0 | 12076635397530284 | 12076635397541945 | 12076635397788343 | 12076635397870096 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 10 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14528 | 0x7f141aca3600 | 0x7f12eac35200 | 12076635397898148 | 12076635397909782 | 12076635398035381 | 12076635398039221 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 12 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14976 | 0x7f141aca3500 | 0x7f12eac35240 | 12076635398076520 | 12076635398088341 | 12076635398213780 | 12076635398217131 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 14 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 15424 | 0x7f141aca3400 | 0x7f12eac35280 | 12076635398270070 | 12076635398281939 | 12076635398406418 | 12076635398409309 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 16 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16128 | 0x7f141aca3300 | 0x7f12eac352c0 | 12076635398437431 | 12076635398448658 | 12076635398696176 | 12076635398741717 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 18 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16832 | 0x7f141aca3a00 | 0x7f12eac35300 | 12076635398764199 | 12076635398777615 | 12076635399025613 | 12076635399071551 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 20 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 17536 | 0x7f141aca3900 | 0x7f12eac35340 | 12076635399094343 | 12076635399107533 | 12076635399232652 | 12076635399235516 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 22 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18240 | 0x7f141aca3800 | 0x7f12eac35380 | 12076635399263287 | 12076635399276171 | 12076635399401610 | 12076635399404741 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 24 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18944 | 0x7f141aca3700 | 0x7f12eac353c0 | 12076635399444655 | 12076635399456170 | 12076635399581449 | 12076635399584315 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 26 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 19904 | 0x7f141aca3600 | 0x7f12eac35400 | 12076635399611585 | 12076635399623689 | 12076635399870247 | 12076635399893239 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 28 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 20608 | 0x7f141aca3500 | 0x7f12eac35440 | 12076635399918546 | 12076635399929606 | 12076635400177284 | 12076635400223223 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 30 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 21312 | 0x7f141aca3400 | 0x7f12eac35480 | 12076635400245675 | 12076635400257444 | 12076635400382403 | 12076635400385244 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 32 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22016 | 0x7f141aca3300 | 0x7f12eac354c0 | 12076635400412726 | 12076635400425762 | 12076635400550881 | 12076635400554048 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 34 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22720 | 0x7f141aca3a00 | 0x7f12eac35500 | 12076635400591819 | 12076635400602881 | 12076635400727040 | 12076635400730136 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 36 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 23680 | 0x7f141aca3900 | 0x7f12eac35540 | 12076635400756735 | 12076635400768959 | 12076635401016317 | 12076635401041185 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 38 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 24384 | 0x7f141aca3800 | 0x7f12eac35580 | 12076635401064799 | 12076635401076317 | 12076635401318875 | 12076635401363364 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 40 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25088 | 0x7f141aca3700 | 0x7f12eac355c0 | 12076635401384904 | 12076635401395994 | 12076635401520153 | 12076635401523091 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 42 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25792 | 0x7f141aca3600 | 0x7f12eac35600 | 12076635401549149 | 12076635401560473 | 12076635401685592 | 12076635401688719 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 44 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 26496 | 0x7f141aca3500 | 0x7f12eac35640 | 12076635401727772 | 12076635401738712 | 12076635401862391 | 12076635401865458 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 46 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 27712 | 0x7f141aca3400 | 0x7f12eac35680 | 12076635401892097 | 12076635401906710 | 12076635402152788 | 12076635402202364 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 48 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 28416 | 0x7f141aca3300 | 0x7f12eac356c0 | 12076635402225738 | 12076635402238868 | 12076635402483186 | 12076635402528471 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 50 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 29120 | 0x7f141aca3a00 | 0x7f12eac35700 | 12076635402552285 | 12076635402565585 | 12076635402689584 | 12076635402692556 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 52 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30080 | 0x7f141aca3900 | 0x7f12eac35740 | 12076635402719877 | 12076635402731344 | 12076635402856303 | 12076635402859487 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 54 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30784 | 0x7f141aca3800 | 0x7f12eac35780 | 12076635402897117 | 12076635402908142 | 12076635403032941 | 12076635403036506 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 56 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32000 | 0x7f141aca3700 | 0x7f12eac357c0 | 12076635403063136 | 12076635403075181 | 12076635403325419 | 12076635403348526 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 58 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32960 | 0x7f141aca3600 | 0x7f12eac35800 | 12076635403370758 | 12076635403382378 | 12076635403629736 | 12076635403674102 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 60 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 33920 | 0x7f141aca3500 | 0x7f12eac35840 | 12076635403695322 | 12076635403708456 | 12076635403832775 | 12076635403835713 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 62 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 34880 | 0x7f141aca3400 | 0x7f12eac35880 | 12076635403861561 | 12076635403873894 | 12076635403999973 | 12076635404017561 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 64 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 35840 | 0x7f141aca3300 | 0x7f12eac358c0 | 12076635404049080 | 12076635404060933 | 12076635404185572 | 12076635404188699 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 66 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 37312 | 0x7f141aca3a00 | 0x7f12eac35900 | 12076635404214627 | 12076635404226211 | 12076635404472609 | 12076635404497944 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 68 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 38272 | 0x7f141aca3900 | 0x7f12eac35940 | 12076635404521208 | 12076635404532769 | 12076635404779487 | 12076635404800427 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 70 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 39232 | 0x7f141aca3800 | 0x7f12eac35980 | 12076635404822338 | 12076635404835327 | 12076635404959806 | 12076635404962619 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 72 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 40192 | 0x7f141aca3700 | 0x7f12eac359c0 | 12076635404990110 | 12076635405002845 | 12076635405128444 | 12076635405131603 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 74 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 41152 | 0x7f141aca3600 | 0x7f12eac35a00 | 12076635405168913 | 12076635405183324 | 12076635405307323 | 12076635405310125 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 76 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 42624 | 0x7f141aca3500 | 0x7f12eac35a40 | 12076635405337246 | 12076635405350202 | 12076635405599000 | 12076635405648124 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 78 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 43584 | 0x7f141aca3400 | 0x7f12eac35a80 | 12076635405670105 | 12076635405682840 | 12076635405928118 | 12076635405950376 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 80 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 44544 | 0x7f141aca3300 | 0x7f12eac35ac0 | 12076635405973099 | 12076635405985877 | 12076635406110996 | 12076635406114031 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 82 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 45760 | 0x7f141aca3a00 | 0x7f12eac35b00 | 12076635406142003 | 12076635406154996 | 12076635406281235 | 12076635406284337 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 84 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 46720 | 0x7f141aca3900 | 0x7f12eac35b40 | 12076635406320164 | 12076635406332754 | 12076635406457553 | 12076635406460555 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 86 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 48448 | 0x7f141aca3800 | 0x7f12eac35b80 | 12076635406486964 | 12076635406499633 | 12076635406758511 | 12076635406779148 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 88 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 49408 | 0x7f141aca3700 | 0x7f12eac35bc0 | 12076635406801009 | 12076635406814031 | 12076635407059469 | 12076635407081470 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 90 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 50368 | 0x7f141aca3600 | 0x7f12eac35c00 | 12076635407105164 | 12076635407117868 | 12076635407242507 | 12076635407245415 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 92 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 51584 | 0x7f141aca3500 | 0x7f12eac35c40 | 12076635407272956 | 12076635407285547 | 12076635407412426 | 12076635407415111 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 94 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 52544 | 0x7f141aca3400 | 0x7f12eac35c80 | 12076635407453933 | 12076635407466825 | 12076635407592104 | 12076635407594845 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 96 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 54272 | 0x7f141aca3300 | 0x7f12eac35cc0 | 12076635407622326 | 12076635407634984 | 12076635407894822 | 12076635407915882 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 98 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 55488 | 0x7f141aca3a00 | 0x7f12eac35d00 | 12076635407936881 | 12076635407950021 | 12076635408198179 | 12076635408219407 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 100 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 56704 | 0x7f141aca3900 | 0x7f12eac35d40 | 12076635408241548 | 12076635408253539 | 12076635408378498 | 12076635408381258 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 102 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 58176 | 0x7f141aca3800 | 0x7f12eac35d80 | 12076635408408118 | 12076635408420098 | 12076635408548897 | 12076635408551755 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 104 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 59392 | 0x7f141aca3700 | 0x7f12eac35dc0 | 12076635408587912 | 12076635408600736 | 12076635408726175 | 12076635408729045 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 106 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 61376 | 0x7f141aca3600 | 0x7f12eac35e00 | 12076635408756175 | 12076635408769215 | 12076635409028413 | 12076635409055091 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 108 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 62592 | 0x7f141aca3500 | 0x7f12eac35e40 | 12076635409078054 | 12076635409090492 | 12076635409336090 | 12076635409357213 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 110 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 63808 | 0x7f141aca3400 | 0x7f12eac35e80 | 12076635409381038 | 12076635409393050 | 12076635409518009 | 12076635409521078 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 112 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 65280 | 0x7f141aca3300 | 0x7f12eac35ec0 | 12076635409548990 | 12076635409561048 | 12076635409691127 | 12076635409693990 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 114 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 66496 | 0x7f141aca3a00 | 0x7f12eac35f00 | 12076635409730868 | 12076635409743767 | 12076635409869366 | 12076635409872251 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 116 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 68480 | 0x7f141aca3900 | 0x7f12eac35f40 | 12076635409898220 | 12076635409911446 | 12076635410171763 | 12076635410196424 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 118 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 69696 | 0x7f141aca3800 | 0x7f12eac35f80 | 12076635410219377 | 12076635410232083 | 12076635410475601 | 12076635410496182 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 120 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 70912 | 0x7f141aca3700 | 0x7f12eac35fc0 | 12076635410518233 | 12076635410531761 | 12076635410656720 | 12076635410659586 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 122 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 72384 | 0x7f141aca3600 | 0x7f12eac36000 | 12076635410686656 | 12076635410698799 | 12076635410835438 | 12076635410838318 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 124 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 73600 | 0x7f141aca3500 | 0x7f12eac36040 | 12076635410876219 | 12076635410889358 | 12076635411013837 | 12076635411016830 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 126 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 75840 | 0x7f141aca3400 | 0x7f12eac36080 | 12076635411045444 | 12076635411057996 | 12076635411323434 | 12076635411347035 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 128 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 77056 | 0x7f141aca3300 | 0x7f12eac360c0 | 12076635411368895 | 12076635411381194 | 12076635411663431 | 12076635411684903 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 130 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 78272 | 0x7f141aca3a00 | 0x7f12eac36100 | 12076635411706433 | 12076635411719591 | 12076635411844870 | 12076635411848087 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 132 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 80000 | 0x7f141aca3900 | 0x7f12eac36140 | 12076635411875357 | 12076635411888710 | 12076635412030948 | 12076635412034073 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 134 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 81216 | 0x7f141aca3800 | 0x7f12eac36180 | 12076635412071552 | 12076635412083908 | 12076635412208867 | 12076635412211873 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 136 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 83456 | 0x7f141aca3700 | 0x7f12eac361c0 | 12076635412238493 | 12076635412251267 | 12076635412509185 | 12076635412532029 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 138 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 84928 | 0x7f141aca3600 | 0x7f12eac36200 | 12076635412553829 | 12076635412566624 | 12076635412815422 | 12076635412838900 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 140 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 86400 | 0x7f141aca3500 | 0x7f12eac36240 | 12076635412861842 | 12076635412874782 | 12076635413000861 | 12076635413015598 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 142 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 88128 | 0x7f141aca3400 | 0x7f12eac36280 | 12076635413038771 | 12076635413051260 | 12076635413201019 | 12076635413203899 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 144 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 89600 | 0x7f141aca3300 | 0x7f12eac362c0 | 12076635413254533 | 12076635413268058 | 12076635413393977 | 12076635413397338 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 146 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 92096 | 0x7f141aca3a00 | 0x7f12eac36300 | 12076635413425220 | 12076635413437177 | 12076635413692695 | 12076635413714188 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 148 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 93568 | 0x7f141aca3900 | 0x7f12eac36340 | 12076635413736950 | 12076635413748855 | 12076635413996053 | 12076635414023323 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 150 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 95040 | 0x7f141aca3800 | 0x7f12eac36380 | 12076635414046696 | 12076635414059412 | 12076635414185011 | 12076635414188169 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 152 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 96768 | 0x7f141aca3700 | 0x7f12eac363c0 | 12076635414216081 | 12076635414228851 | 12076635414386609 | 12076635414389504 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 154 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 98240 | 0x7f141aca3600 | 0x7f12eac36400 | 12076635414427425 | 12076635414440049 | 12076635414564528 | 12076635414567485 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 156 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 100736 | 0x7f141aca3500 | 0x7f12eac36440 | 12076635414594676 | 12076635414607408 | 12076635414869965 | 12076635414894984 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 158 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 102208 | 0x7f141aca3400 | 0x7f12eac36480 | 12076635414916214 | 12076635414928365 | 12076635415172683 | 12076635415199030 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 160 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 103680 | 0x7f141aca3300 | 0x7f12eac364c0 | 12076635415220600 | 12076635415233003 | 12076635415358602 | 12076635415361933 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 162 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 105664 | 0x7f141aca3a00 | 0x7f12eac36500 | 12076635415388332 | 12076635415401481 | 12076635415566920 | 12076635415570130 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 164 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 107136 | 0x7f141aca3900 | 0x7f12eac36540 | 12076635415606828 | 12076635415618919 | 12076635415744198 | 12076635415747270 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 166 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 109888 | 0x7f141aca3800 | 0x7f12eac36580 | 12076635415776765 | 12076635415790918 | 12076635416046596 | 12076635416067956 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 168 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 111360 | 0x7f141aca3700 | 0x7f12eac365c0 | 12076635416090648 | 12076635416103236 | 12076635416352994 | 12076635416374456 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 170 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 112832 | 0x7f141aca3600 | 0x7f12eac36600 | 12076635416397138 | 12076635416410273 | 12076635416534592 | 12076635416537670 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 172 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 114816 | 0x7f141aca3500 | 0x7f12eac36640 | 12076635416564570 | 12076635416576672 | 12076635416748990 | 12076635416751969 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 174 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 116288 | 0x7f141aca3400 | 0x7f12eac36680 | 12076635416789027 | 12076635416801950 | 12076635416927389 | 12076635416930350 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 176 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 119040 | 0x7f141aca3300 | 0x7f12eac366c0 | 12076635416957781 | 12076635416970429 | 12076635417226906 | 12076635417249093 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 178 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 120768 | 0x7f141aca3a00 | 0x7f12eac36700 | 12076635417270323 | 12076635417283226 | 12076635417531864 | 12076635417553399 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 180 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 122496 | 0x7f141aca3900 | 0x7f12eac36740 | 12076635417575741 | 12076635417587864 | 12076635417713303 | 12076635417716332 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 182 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 124736 | 0x7f141aca3800 | 0x7f12eac36780 | 12076635417743282 | 12076635417755862 | 12076635417938421 | 12076635417941381 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 184 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 126464 | 0x7f141aca3700 | 0x7f12eac367c0 | 12076635417979311 | 12076635417991540 | 12076635418116979 | 12076635418120384 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 186 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 20 | 24 | 127936 | 0x7f141aca3600 | 0x7f12eac36800 | 12076635418147274 | 12076635418159699 | 12076635418423217 | 12076635418446440 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 188 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 129664 | 0x7f141aca3500 | 0x7f12eac36840 | 12076635418471607 | 12076635418484016 | 12076635418732334 | 12076635418753562 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 190 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 131392 | 0x7f141aca3400 | 0x7f12eac36880 | 12076635418775743 | 12076635418787854 | 12076635418914253 | 12076635418917236 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 192 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 133632 | 0x7f141aca3300 | 0x7f12eac368c0 | 12076635418943816 | 12076635418956812 | 12076635419147851 | 12076635419163013 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 194 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 135360 | 0x7f141aca3a00 | 0x7f12eac36900 | 12076635419194602 | 12076635419207370 | 12076635419333769 | 12076635419337057 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 196 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 20 | 24 | 136832 | 0x7f141aca3900 | 0x7f12eac36940 | 12076635419364268 | 12076635419377609 | 12076635419660647 | 12076635419684864 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 198 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 138560 | 0x7f141aca3800 | 0x7f12eac36980 | 12076635419706344 | 12076635419718566 | 12076635419964484 | 12076635419988038 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 200 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 140288 | 0x7f141aca3700 | 0x7f12eac369c0 | 12076635420018034 | 12076635420031364 | 12076635420157603 | 12076635420160659 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 202 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 142784 | 0x7f141aca3600 | 0x7f12eac36a00 | 12076635420188791 | 12076635420201602 | 12076635420411681 | 12076635420432535 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 204 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 144768 | 0x7f141aca3500 | 0x7f12eac36a40 | 12076635420463543 | 12076635420476320 | 12076635420603359 | 12076635420606388 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 206 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 20 | 24 | 145728 | 0x7f141aca3400 | 0x7f12eac36a80 | 12076635420633869 | 12076635420647039 | 12076635420953436 | 12076635420974854 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 208 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 147712 | 0x7f141aca3300 | 0x7f12eac36ac0 | 12076635420996624 | 12076635421013596 | 12076635421265754 | 12076635421288186 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 210 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 149696 | 0x7f141aca3a00 | 0x7f12eac36b00 | 12076635421308895 | 12076635421322073 | 12076635421450552 | 12076635421453774 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 212 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 152192 | 0x7f141aca3900 | 0x7f12eac36b40 | 12076635421480304 | 12076635421492792 | 12076635421720630 | 12076635421746299 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 214 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 154176 | 0x7f141aca3800 | 0x7f12eac36b80 | 12076635421779290 | 12076635421791990 | 12076635421919189 | 12076635421922366 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 216 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 20 | 24 | 155648 | 0x7f141aca3700 | 0x7f12eac36bc0 | 12076635421949787 | 12076635421962868 | 12076635422291346 | 12076635422314596 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 218 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 157632 | 0x7f141aca3600 | 0x7f12eac36c00 | 12076635422336677 | 12076635422349105 | 12076635422596943 | 12076635422617269 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 220 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 159616 | 0x7f141aca3500 | 0x7f12eac36c40 | 12076635422639530 | 12076635422651983 | 12076635422779182 | 12076635422782316 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 222 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 162368 | 0x7f141aca3400 | 0x7f12eac36c80 | 12076635422809646 | 12076635422822221 | 12076635423065899 | 12076635423091801 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 224 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 164608 | 0x7f141aca3300 | 0x7f12eac36cc0 | 12076635423124723 | 12076635423139659 | 12076635423270058 | 12076635423273309 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 226 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 20 | 24 | 166336 | 0x7f141aca3a00 | 0x7f12eac36d00 | 12076635423301031 | 12076635423312777 | 12076635423687974 | 12076635423709731 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 228 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 168576 | 0x7f141aca3900 | 0x7f12eac36d40 | 12076635423730770 | 12076635423742694 | 12076635424001732 | 12076635424023525 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 230 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 170816 | 0x7f141aca3800 | 0x7f12eac36d80 | 12076635424047569 | 12076635424060451 | 12076635424190530 | 12076635424193631 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 232 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 174080 | 0x7f141aca3700 | 0x7f12eac36dc0 | 12076635424219148 | 12076635424231010 | 12076635424509408 | 12076635424534595 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 234 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 176576 | 0x7f141aca3600 | 0x7f12eac36e00 | 12076635424565022 | 12076635424577727 | 12076635424713406 | 12076635424716554 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 236 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 20 | 24 | 177792 | 0x7f141aca3500 | 0x7f12eac36e40 | 12076635424744405 | 12076635424757086 | 12076635425204442 | 12076635425230269 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 238 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 180288 | 0x7f141aca3400 | 0x7f12eac36e80 | 12076635425252380 | 12076635425264602 | 12076635425524119 | 12076635425549924 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 240 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 182784 | 0x7f141aca3300 | 0x7f12eac36ec0 | 12076635425572696 | 12076635425584759 | 12076635425719318 | 12076635425722324 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 242 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 186304 | 0x7f141aca3a00 | 0x7f12eac36f00 | 12076635425748804 | 12076635425761078 | 12076635426074995 | 12076635426102401 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 244 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 189312 | 0x7f141aca3900 | 0x7f12eac36f40 | 12076635426135062 | 12076635426147634 | 12076635426299153 | 12076635426302303 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 246 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 20 | 24 | 190784 | 0x7f141aca3800 | 0x7f12eac36f80 | 12076635426328512 | 12076635426341713 | 12076635426859309 | 12076635426879818 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 248 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 193792 | 0x7f141aca3700 | 0x7f12eac36fc0 | 12076635426901057 | 12076635426913388 | 12076635427212746 | 12076635427240078 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 250 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 196800 | 0x7f141aca3600 | 0x7f12eac37000 | 12076635427265745 | 12076635427278665 | 12076635427434024 | 12076635427437244 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 252 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 201088 | 0x7f141aca3500 | 0x7f12eac37040 | 12076635427463754 | 12076635427476104 | 12076635427860741 | 12076635427886119 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 254 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 204608 | 0x7f141aca3400 | 0x7f12eac37080 | 12076635427916947 | 12076635427930180 | 12076635428104899 | 12076635428108794 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 256 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 20 | 24 | 206080 | 0x7f141aca3300 | 0x7f12eac370c0 | 12076635428135433 | 12076635428147778 | 12076635428758333 | 12076635428784581 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 258 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 209600 | 0x7f141aca3a00 | 0x7f12eac37100 | 12076635428805870 | 12076635428818013 | 12076635429175290 | 12076635429199422 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 260 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 213120 | 0x7f141aca3900 | 0x7f12eac37140 | 12076635429223247 | 12076635429236089 | 12076635429413208 | 12076635429416767 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 262 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 218176 | 0x7f141aca3800 | 0x7f12eac37180 | 12076635429444298 | 12076635429456888 | 12076635429912884 | 12076635429938858 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 264 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 222208 | 0x7f141aca3700 | 0x7f12eac371c0 | 12076635429972841 | 12076635429985363 | 12076635430184402 | 12076635430205363 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 266 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 20 | 24 | 223936 | 0x7f141aca3600 | 0x7f12eac37200 | 12076635430227164 | 12076635430240401 | 12076635430944556 | 12076635430970617 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 268 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 227968 | 0x7f141aca3500 | 0x7f12eac37240 | 12076635430994101 | 12076635431006155 | 12076635431378312 | 12076635431409674 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 270 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 232000 | 0x7f141aca3400 | 0x7f12eac37280 | 12076635431433167 | 12076635431445832 | 12076635431648870 | 12076635431673885 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 272 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 237824 | 0x7f141aca3300 | 0x7f12eac372c0 | 12076635431696076 | 12076635431708549 | 12076635432234465 | 12076635432258733 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 274 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 242368 | 0x7f141aca3a00 | 0x7f12eac37300 | 12076635432296844 | 12076635432309185 | 12076635432533183 | 12076635432559693 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 276 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 20 | 24 | 243584 | 0x7f141aca3900 | 0x7f12eac37340 | 12076635432582325 | 12076635432595102 | 12076635433394136 | 12076635433420905 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 278 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 248128 | 0x7f141aca3800 | 0x7f12eac37380 | 12076635433443767 | 12076635433456215 | 12076635433877812 | 12076635433901298 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 280 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 252672 | 0x7f141aca3700 | 0x7f12eac373c0 | 12076635433923951 | 12076635433936851 | 12076635434162610 | 12076635434191508 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 282 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 259264 | 0x7f141aca3600 | 0x7f12eac37400 | 12076635434215783 | 12076635434227889 | 12076635434824844 | 12076635434854411 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 284 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 264832 | 0x7f141aca3500 | 0x7f12eac37440 | 12076635434890048 | 12076635434903884 | 12076635435203881 | 12076635435232575 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 286 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 20 | 24 | 266304 | 0x7f141aca3400 | 0x7f12eac37480 | 12076635435256760 | 12076635435270281 | 12076635436256033 | 12076635436283199 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 288 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 28 | 24 | 271872 | 0x7f141aca3300 | 0x7f12eac374c0 | 12076635436306232 | 12076635436319232 | 12076635436835228 | 12076635436862476 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 290 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 277440 | 0x7f141aca3a00 | 0x7f12eac37500 | 12076635436887413 | 12076635436900028 | 12076635437171865 | 12076635437199734 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 292 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 12 | 24 | 279936 | 0x7f141aca3900 | 0x7f12eac37540 | 12076635437222847 | 12076635437235065 | 12076635437973619 | 12076635437994582 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 294 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 286528 | 0x7f141aca3800 | 0x7f12eac37580 | 12076635438036270 | 12076635438049938 | 12076635438394735 | 12076635438417038 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 296 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 20 | 24 | 288000 | 0x7f141aca3700 | 0x7f12eac375c0 | 12076635438442155 | 12076635438454575 | 12076635439629286 | 12076635439659199 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 298 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 32 | 24 | 294592 | 0x7f141aca3600 | 0x7f12eac37600 | 12076635439681911 | 12076635439694245 | 12076635440303680 | 12076635440333854 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 300 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 301184 | 0x7f141aca3500 | 0x7f12eac37640 | 12076635440356846 | 12076635440369280 | 12076635440687677 | 12076635440715453 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 302 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 12 | 24 | 304960 | 0x7f141aca3400 | 0x7f12eac37680 | 12076635440738787 | 12076635440751356 | 12076635441629109 | 12076635441659079 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 304 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 24 | 24 | 313600 | 0x7f141aca3300 | 0x7f12eac376c0 | 12076635441694455 | 12076635441707029 | 12076635442116465 | 12076635442145554 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 306 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 20 | 24 | 314816 | 0x7f141aca3a00 | 0x7f12eac37700 | 12076635442166994 | 12076635442179825 | 12076635443731492 | 12076635443782178 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 308 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 20 | 24 | 317568 | 0x7f141aca3900 | 0x7f12eac37740 | 12076635443805832 | 12076635443818496 | 12076635444614331 | 12076635444671001 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 310 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 12 | 24 | 320320 | 0x7f141aca3800 | 0x7f12eac37780 | 12076635444692712 | 12076635444705209 | 12076635445119766 | 12076635445169909 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 312 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 12 | 24 | 324096 | 0x7f141aca3700 | 0x7f12eac377c0 | 12076635445192501 | 12076635445205686 | 12076635446364558 | 12076635446415336 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 314 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 12 | 24 | 328896 | 0x7f141aca3600 | 0x7f12eac37800 | 12076635446449770 | 12076635446462477 | 12076635447247272 | 12076635447297717 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 316 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 20 | 24 | 330112 | 0x7f141aca3500 | 0x7f12eac37840 | 12076635447320820 | 12076635447334311 | 12076635450390770 | 12076635450441415 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 318 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 20 | 24 | 332864 | 0x7f141aca3400 | 0x7f12eac37880 | 12076635450465380 | 12076635450477969 | 12076635452023558 | 12076635452074603 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 320 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 12 | 24 | 335616 | 0x7f141aca3300 | 0x7f12eac378c0 | 12076635452099299 | 12076635452111878 | 12076635452901312 | 12076635452951614 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 322 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 12 | 24 | 339392 | 0x7f141aca3a00 | 0x7f12eac37900 | 12076635452975879 | 12076635452989471 | 12076635455272336 | 12076635455325831 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 324 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 12 | 24 | 344192 | 0x7f141aca3900 | 0x7f12eac37940 | 12076635455360435 | 12076635455373615 | 12076635456908484 | 12076635456958598 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 326 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 20 | 24 | 345408 | 0x7f141aca3800 | 0x7f12eac37980 | 12076635456981360 | 12076635456994724 | 12076635463054681 | 12076635463108808 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 328 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 20 | 24 | 348160 | 0x7f141aca3700 | 0x7f12eac379c0 | 12076635463132151 | 12076635463145081 | 12076635466191299 | 12076635466241976 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 330 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 12 | 24 | 350912 | 0x7f141aca3600 | 0x7f12eac37a00 | 12076635466265570 | 12076635466278339 | 12076635467816888 | 12076635467867189 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 332 | 937113 | 937118 | 4194304 | 256 | 0 | 0 | 12 | 24 | 0 | 0x7f141aca3500 | 0x7f12eac37a40 | 12076635467890693 | 12076635467903287 | 12076635472429176 | 12076635472482624 |