46 KiB
46 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | GRBM_COUNT | GRBM_GUI_ACTIVE | SQ_WAVES | SQ_IFETCH | SQ_IFETCH_LEVEL | SQ_ACCUM_PREV_HIRES | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBuffer.kd | 0 | 0 | 0 | 935069 | 935074 | 33554432 | 256 | 0 | 0 | 8 | 32 | 6464 | 0x0 | 0x7fb1e8e04180 | 504417 | 504417 | 524288 | 6291456 | 791548 | 101536232 | 12076606776836515 | 12076607020949616 | 12076607021273775 | 12076607021386317 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 935069 | 935074 | 32768 | 256 | 0 | 0 | 24 | 24 | 12480 | 0x0 | 0x7fb1e8e35100 | 27457 | 27457 | 512 | 8192 | 8659 | 1112808 | 12076607036018836 | 12076607036328752 | 12076607036334672 | 12076607036343359 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 4 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 12928 | 0x7fb2f49b3900 | 0x7fb1e8e35140 | 221451 | 221451 | 65536 | 917504 | 137700 | 17627108 | 12076607036410164 | 12076607036634350 | 12076607036769870 | 12076607036773860 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 6 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 36 | 24 | 13632 | 0x7fb2f49b3800 | 0x7fb1e8e35180 | 398529 | 398529 | 65536 | 1245184 | 176363 | 22595116 | 12076607036850743 | 12076607037034989 | 12076607037288428 | 12076607037361443 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 8 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 14080 | 0x7fb2f49b3700 | 0x7fb1e8e351c0 | 410715 | 410715 | 65536 | 983040 | 136833 | 17475532 | 12076607037393453 | 12076607037585546 | 12076607037846505 | 12076607037914191 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 10 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14528 | 0x7fb2f49b3600 | 0x7fb1e8e35200 | 226705 | 226705 | 65536 | 1048576 | 153290 | 19537144 | 12076607037942794 | 12076607038138984 | 12076607038278504 | 12076607038282236 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 12 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14976 | 0x7fb2f49b3500 | 0x7fb1e8e35240 | 228547 | 228547 | 65536 | 983040 | 147995 | 18962320 | 12076607038336266 | 12076607038515783 | 12076607038656262 | 12076607038659818 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 14 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 15424 | 0x7fb2f49b3400 | 0x7fb1e8e35280 | 216161 | 216161 | 65536 | 1048576 | 146752 | 18784452 | 12076607038742652 | 12076607038911941 | 12076607039044420 | 12076607039048191 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 16 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16128 | 0x7fb2f49b3300 | 0x7fb1e8e352c0 | 407219 | 407219 | 65536 | 1572864 | 245751 | 31439136 | 12076607039102291 | 12076607039289699 | 12076607039549378 | 12076607039593976 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 18 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16832 | 0x7fb2f49b3a00 | 0x7fb1e8e35300 | 402961 | 402961 | 65536 | 1179648 | 166989 | 21330680 | 12076607039617790 | 12076607039813377 | 12076607040069536 | 12076607040112089 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 20 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 17536 | 0x7fb2f49b3900 | 0x7fb1e8e35340 | 211699 | 211699 | 65536 | 1114112 | 161801 | 20744520 | 12076607040145602 | 12076607040328415 | 12076607040457535 | 12076607040461169 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 22 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18240 | 0x7fb2f49b3800 | 0x7fb1e8e35380 | 228737 | 228737 | 65536 | 1114112 | 157285 | 20107324 | 12076607040513937 | 12076607040691774 | 12076607040832413 | 12076607040835916 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 24 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18944 | 0x7fb2f49b3700 | 0x7fb1e8e353c0 | 217083 | 217083 | 65536 | 1179648 | 164032 | 20966992 | 12076607040903351 | 12076607041083932 | 12076607041216731 | 12076607041220842 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 26 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 19904 | 0x7fb2f49b3600 | 0x7fb1e8e35400 | 406313 | 406313 | 65536 | 1835008 | 252183 | 32259532 | 12076607041274622 | 12076607041448410 | 12076607041705849 | 12076607041774261 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 28 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 20608 | 0x7fb2f49b3500 | 0x7fb1e8e35440 | 404715 | 404715 | 65536 | 1310720 | 179910 | 23054132 | 12076607041801262 | 12076607041986328 | 12076607042243607 | 12076607042312442 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 30 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 21312 | 0x7fb2f49b3400 | 0x7fb1e8e35480 | 216817 | 216817 | 65536 | 1245184 | 174001 | 22306216 | 12076607042340044 | 12076607042526486 | 12076607042659445 | 12076607042663235 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 32 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22016 | 0x7fb2f49b3300 | 0x7fb1e8e354c0 | 219347 | 219347 | 65536 | 1310720 | 181451 | 23167172 | 12076607042717035 | 12076607042886484 | 12076607043020084 | 12076607043024036 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 34 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22720 | 0x7fb2f49b3a00 | 0x7fb1e8e35500 | 216545 | 216545 | 65536 | 1310720 | 180617 | 23118392 | 12076607043091932 | 12076607043261843 | 12076607043394642 | 12076607043398342 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 36 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 23680 | 0x7fb2f49b3900 | 0x7fb1e8e35540 | 410411 | 410411 | 65536 | 2097152 | 298957 | 38165200 | 12076607043453104 | 12076607043629521 | 12076607043891600 | 12076607043936203 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 38 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 24384 | 0x7fb2f49b3800 | 0x7fb1e8e35580 | 405265 | 405265 | 65536 | 1441792 | 197254 | 25187912 | 12076607043960438 | 12076607044156879 | 12076607044415758 | 12076607044457943 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 40 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25088 | 0x7fb2f49b3700 | 0x7fb1e8e355c0 | 211955 | 211955 | 65536 | 1376256 | 190093 | 24361152 | 12076607044494992 | 12076607044662797 | 12076607044791757 | 12076607044795531 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 42 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25792 | 0x7fb2f49b3600 | 0x7fb1e8e35600 | 224401 | 224401 | 65536 | 1507328 | 211513 | 27052060 | 12076607044848650 | 12076607045029676 | 12076607045167755 | 12076607045172192 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 44 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 26496 | 0x7fb2f49b3500 | 0x7fb1e8e35640 | 213291 | 213291 | 65536 | 1441792 | 206408 | 26511396 | 12076607045238255 | 12076607045408394 | 12076607045538153 | 12076607045541819 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 46 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 27712 | 0x7fb2f49b3400 | 0x7fb1e8e35680 | 406521 | 406521 | 65536 | 2359296 | 322321 | 41245052 | 12076607045594057 | 12076607045769673 | 12076607046029831 | 12076607046098796 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 48 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 28416 | 0x7fb2f49b3300 | 0x7fb1e8e356c0 | 402387 | 402387 | 65536 | 1572864 | 210221 | 26926876 | 12076607046130294 | 12076607046318790 | 12076607046576069 | 12076607046643980 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 50 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 29120 | 0x7fb2f49b3a00 | 0x7fb1e8e35700 | 213697 | 213697 | 65536 | 1507328 | 217132 | 27733596 | 12076607046675619 | 12076607046861028 | 12076607046992387 | 12076607046996125 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 52 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30080 | 0x7fb2f49b3900 | 0x7fb1e8e35740 | 229571 | 229571 | 65536 | 1703936 | 233010 | 29842576 | 12076607047049554 | 12076607047231586 | 12076607047372866 | 12076607047376693 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 54 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30784 | 0x7fb2f49b3800 | 0x7fb1e8e35780 | 221441 | 221441 | 65536 | 1572864 | 215600 | 27520968 | 12076607047442275 | 12076607047610785 | 12076607047746944 | 12076607047750738 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 56 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32000 | 0x7fb2f49b3700 | 0x7fb1e8e357c0 | 397083 | 397083 | 65536 | 2621440 | 359612 | 46041044 | 12076607047804589 | 12076607047973503 | 12076607048228062 | 12076607048272499 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 58 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32960 | 0x7fb2f49b3600 | 0x7fb1e8e35800 | 401329 | 401329 | 65536 | 1703936 | 230733 | 29537124 | 12076607048299870 | 12076607048484861 | 12076607048741500 | 12076607048782758 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 60 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 33920 | 0x7fb2f49b3500 | 0x7fb1e8e35840 | 220467 | 220467 | 65536 | 1638400 | 229834 | 29380464 | 12076607048815619 | 12076607048987739 | 12076607049123099 | 12076607049127129 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 62 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 34880 | 0x7fb2f49b3400 | 0x7fb1e8e35880 | 229225 | 229225 | 65536 | 1900544 | 266452 | 34181504 | 12076607049180618 | 12076607049356698 | 12076607049498297 | 12076607049502086 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 64 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 35840 | 0x7fb2f49b3300 | 0x7fb1e8e358c0 | 213827 | 213827 | 65536 | 1703936 | 240960 | 30802172 | 12076607049568109 | 12076607049740696 | 12076607049871896 | 12076607049875561 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 66 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 37312 | 0x7fb2f49b3a00 | 0x7fb1e8e35900 | 406265 | 406265 | 65536 | 2883584 | 391865 | 50146776 | 12076607049928539 | 12076607050118295 | 12076607050379573 | 12076607050448426 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 68 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 38272 | 0x7fb2f49b3900 | 0x7fb1e8e35940 | 399859 | 399859 | 65536 | 1835008 | 244685 | 31280692 | 12076607050474846 | 12076607050659252 | 12076607050915251 | 12076607050982089 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 70 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 39232 | 0x7fb2f49b3800 | 0x7fb1e8e35980 | 221417 | 221417 | 65536 | 1769472 | 277053 | 35437688 | 12076607051013768 | 12076607051199890 | 12076607051336209 | 12076607051339925 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 72 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 40192 | 0x7fb2f49b3700 | 0x7fb1e8e359c0 | 222395 | 222395 | 65536 | 2097152 | 309591 | 39541216 | 12076607051393314 | 12076607051562449 | 12076607051699568 | 12076607051703210 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 74 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 41152 | 0x7fb2f49b3600 | 0x7fb1e8e35a00 | 211073 | 211073 | 65536 | 1835008 | 253693 | 32484200 | 12076607051769283 | 12076607051935407 | 12076607052065166 | 12076607052068881 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 76 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 42624 | 0x7fb2f49b3500 | 0x7fb1e8e35a40 | 400595 | 400595 | 65536 | 3145728 | 419612 | 53657848 | 12076607052120667 | 12076607052299565 | 12076607052557164 | 12076607052598215 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 78 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 43584 | 0x7fb2f49b3400 | 0x7fb1e8e35a80 | 396953 | 396953 | 65536 | 1966080 | 272534 | 34812732 | 12076607052631397 | 12076607052799243 | 12076607053054122 | 12076607053123302 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 80 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 44544 | 0x7fb2f49b3300 | 0x7fb1e8e35ac0 | 221611 | 221611 | 65536 | 1900544 | 278506 | 35717872 | 12076607053150412 | 12076607053348841 | 12076607053484521 | 12076607053488431 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 82 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 45760 | 0x7fb2f49b3a00 | 0x7fb1e8e35b00 | 227921 | 227921 | 65536 | 2293760 | 307076 | 39319028 | 12076607053540548 | 12076607053727240 | 12076607053868839 | 12076607053872195 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 84 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 46720 | 0x7fb2f49b3900 | 0x7fb1e8e35b40 | 217787 | 217787 | 65536 | 1966080 | 267715 | 34185840 | 12076607053937567 | 12076607054107718 | 12076607054241317 | 12076607054244868 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 86 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 48448 | 0x7fb2f49b3800 | 0x7fb1e8e35b80 | 419897 | 419897 | 65536 | 3407872 | 482823 | 61784604 | 12076607054297807 | 12076607054473156 | 12076607054744035 | 12076607054810621 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 88 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 49408 | 0x7fb2f49b3700 | 0x7fb1e8e35bc0 | 402603 | 402603 | 65536 | 2097152 | 276300 | 35440948 | 12076607054837240 | 12076607055024194 | 12076607055282113 | 12076607055349473 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 90 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 50368 | 0x7fb2f49b3600 | 0x7fb1e8e35c00 | 228313 | 228313 | 65536 | 2031616 | 277326 | 35474084 | 12076607055376002 | 12076607055561152 | 12076607055702271 | 12076607055705706 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 92 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 51584 | 0x7fb2f49b3500 | 0x7fb1e8e35c40 | 235331 | 235331 | 65536 | 2490368 | 344616 | 44111616 | 12076607055758384 | 12076607055925150 | 12076607056071550 | 12076607056075353 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 94 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 52544 | 0x7fb2f49b3400 | 0x7fb1e8e35c80 | 215553 | 215553 | 65536 | 2097152 | 285404 | 36484628 | 12076607056141606 | 12076607056310589 | 12076607056441308 | 12076607056444911 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 96 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 54272 | 0x7fb2f49b3300 | 0x7fb1e8e35cc0 | 421987 | 421987 | 65536 | 3670016 | 495514 | 63310996 | 12076607056499983 | 12076607056676827 | 12076607056948986 | 12076607057018127 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 98 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 55488 | 0x7fb2f49b3a00 | 0x7fb1e8e35d00 | 397225 | 397225 | 65536 | 2228224 | 300078 | 38488544 | 12076607057044556 | 12076607057231865 | 12076607057487384 | 12076607057554595 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 100 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 56704 | 0x7fb2f49b3900 | 0x7fb1e8e35d40 | 214459 | 214459 | 65536 | 2162688 | 314738 | 40158468 | 12076607057578339 | 12076607057760663 | 12076607057892022 | 12076607057895589 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 102 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 58176 | 0x7fb2f49b3800 | 0x7fb1e8e35d80 | 232217 | 232217 | 65536 | 2686976 | 375703 | 48088720 | 12076607057950000 | 12076607058125141 | 12076607058269461 | 12076607058273332 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 104 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 59392 | 0x7fb2f49b3700 | 0x7fb1e8e35dc0 | 219619 | 219619 | 65536 | 2228224 | 313771 | 40142644 | 12076607058340476 | 12076607058507540 | 12076607058642899 | 12076607058646576 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 106 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 61376 | 0x7fb2f49b3600 | 0x7fb1e8e35e00 | 417929 | 417929 | 65536 | 3932160 | 534532 | 68482416 | 12076607058700897 | 12076607058872018 | 12076607059141937 | 12076607059210385 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 108 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 62592 | 0x7fb2f49b3500 | 0x7fb1e8e35e40 | 400307 | 400307 | 65536 | 2359296 | 309817 | 39559812 | 12076607059235020 | 12076607059420336 | 12076607059677935 | 12076607059745179 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 110 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 63808 | 0x7fb2f49b3400 | 0x7fb1e8e35e80 | 217825 | 217825 | 65536 | 2293760 | 319640 | 40936172 | 12076607059769354 | 12076607059950734 | 12076607060083373 | 12076607060087326 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 112 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 65280 | 0x7fb2f49b3300 | 0x7fb1e8e35ec0 | 230483 | 230483 | 65536 | 2883584 | 406531 | 51929100 | 12076607060141447 | 12076607060321932 | 12076607060465132 | 12076607060468775 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 114 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 66496 | 0x7fb2f49b3a00 | 0x7fb1e8e35f00 | 220913 | 220913 | 65536 | 2359296 | 326631 | 41693704 | 12076607060535089 | 12076607060698571 | 12076607060834890 | 12076607060838583 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 116 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 68480 | 0x7fb2f49b3900 | 0x7fb1e8e35f40 | 416051 | 416051 | 65536 | 4194304 | 617087 | 78847628 | 12076607060890480 | 12076607061063209 | 12076607061332008 | 12076607061400869 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 118 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 69696 | 0x7fb2f49b3800 | 0x7fb1e8e35f80 | 400145 | 400145 | 65536 | 2490368 | 327972 | 41982188 | 12076607061425675 | 12076607061609287 | 12076607061866726 | 12076607061933781 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 120 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 70912 | 0x7fb2f49b3700 | 0x7fb1e8e35fc0 | 224731 | 224731 | 65536 | 2424832 | 379311 | 48552392 | 12076607061957685 | 12076607062146725 | 12076607062284964 | 12076607062288951 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 122 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 72384 | 0x7fb2f49b3600 | 0x7fb1e8e36000 | 226561 | 226561 | 65536 | 3080192 | 442921 | 56866692 | 12076607062341028 | 12076607062513123 | 12076607062654403 | 12076607062658108 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 124 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 73600 | 0x7fb2f49b3500 | 0x7fb1e8e36040 | 217851 | 217851 | 65536 | 2490368 | 335219 | 42934564 | 12076607062724642 | 12076607062889602 | 12076607063023841 | 12076607063027876 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 126 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 75840 | 0x7fb2f49b3400 | 0x7fb1e8e36080 | 417505 | 417505 | 65536 | 4456448 | 603706 | 77498408 | 12076607063081515 | 12076607063256960 | 12076607063527199 | 12076607063594460 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 128 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 77056 | 0x7fb2f49b3300 | 0x7fb1e8e360c0 | 399331 | 399331 | 65536 | 2621440 | 344644 | 44144628 | 12076607063618685 | 12076607063806878 | 12076607064063517 | 12076607064106312 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 130 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 78272 | 0x7fb2f49b3a00 | 0x7fb1e8e36100 | 212497 | 212497 | 65536 | 2555904 | 364875 | 46724548 | 12076607064137640 | 12076607064315356 | 12076607064446235 | 12076607064450041 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 132 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 80000 | 0x7fb2f49b3900 | 0x7fb1e8e36140 | 230755 | 230755 | 65536 | 3276800 | 462248 | 59455836 | 12076607064507318 | 12076607064669914 | 12076607064813274 | 12076607064817124 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 134 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 81216 | 0x7fb2f49b3800 | 0x7fb1e8e36180 | 211697 | 211697 | 65536 | 2621440 | 363062 | 46410020 | 12076607064882636 | 12076607065053113 | 12076607065183512 | 12076607065187603 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 136 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 83456 | 0x7fb2f49b3700 | 0x7fb1e8e361c0 | 415971 | 415971 | 65536 | 4718592 | 619363 | 79527584 | 12076607065239670 | 12076607065414071 | 12076607065682390 | 12076607065730924 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 138 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 84928 | 0x7fb2f49b3600 | 0x7fb1e8e36200 | 399409 | 399409 | 65536 | 2752512 | 365471 | 46914260 | 12076607065754918 | 12076607065937429 | 12076607066194868 | 12076607066246503 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 140 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 86400 | 0x7fb2f49b3500 | 0x7fb1e8e36240 | 215395 | 215395 | 65536 | 2686976 | 384627 | 49205924 | 12076607066271589 | 12076607066456627 | 12076607066589106 | 12076607066592897 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 142 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 88128 | 0x7fb2f49b3400 | 0x7fb1e8e36280 | 241657 | 241657 | 65536 | 3473408 | 542279 | 69310404 | 12076607066646276 | 12076607066816785 | 12076607066967985 | 12076607066971792 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 144 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 89600 | 0x7fb2f49b3300 | 0x7fb1e8e362c0 | 224123 | 224123 | 65536 | 2752512 | 397755 | 50902000 | 12076607067062130 | 12076607067227984 | 12076607067366383 | 12076607067370233 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 146 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 92096 | 0x7fb2f49b3a00 | 0x7fb1e8e36300 | 416721 | 416721 | 65536 | 4980736 | 670509 | 85731704 | 12076607067423181 | 12076607067598862 | 12076607067868461 | 12076607067890330 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 148 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 93568 | 0x7fb2f49b3900 | 0x7fb1e8e36340 | 398979 | 398979 | 65536 | 2883584 | 380981 | 48708128 | 12076607067940624 | 12076607068110540 | 12076607068367339 | 12076607068417040 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 150 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 95040 | 0x7fb2f49b3800 | 0x7fb1e8e36380 | 222009 | 222009 | 65536 | 2818048 | 405701 | 51831776 | 12076607068441786 | 12076607068626218 | 12076607068763657 | 12076607068767502 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 152 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 96768 | 0x7fb2f49b3700 | 0x7fb1e8e363c0 | 250283 | 250283 | 65536 | 3670016 | 503137 | 64681772 | 12076607068819809 | 12076607068991496 | 12076607069147816 | 12076607069151877 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 154 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 98240 | 0x7fb2f49b3600 | 0x7fb1e8e36400 | 217937 | 217937 | 65536 | 2883584 | 391651 | 50102172 | 12076607069219864 | 12076607069389735 | 12076607069524134 | 12076607069528077 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 156 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 100736 | 0x7fb2f49b3500 | 0x7fb1e8e36440 | 412995 | 412995 | 65536 | 5177344 | 681882 | 87255116 | 12076607069580394 | 12076607069755013 | 12076607070021892 | 12076607070071918 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 158 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 102208 | 0x7fb2f49b3400 | 0x7fb1e8e36480 | 451409 | 451409 | 65536 | 3014656 | 403380 | 51732012 | 12076607070097476 | 12076607070281571 | 12076607070573730 | 12076607070622873 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 160 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 103680 | 0x7fb2f49b3300 | 0x7fb1e8e364c0 | 217907 | 217907 | 65536 | 2949120 | 423873 | 54142876 | 12076607070646778 | 12076607070838369 | 12076607070971968 | 12076607070975980 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 162 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 105664 | 0x7fb2f49b3a00 | 0x7fb1e8e36500 | 264337 | 264337 | 65536 | 3866624 | 533438 | 67881272 | 12076607071032014 | 12076607071208607 | 12076607071374846 | 12076607071379180 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 164 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 107136 | 0x7fb2f49b3900 | 0x7fb1e8e36540 | 213235 | 213235 | 65536 | 3014656 | 396179 | 50686140 | 12076607071444261 | 12076607071610526 | 12076607071741885 | 12076607071745912 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 166 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 109888 | 0x7fb2f49b3800 | 0x7fb1e8e36580 | 414657 | 414657 | 65536 | 5439488 | 704212 | 90138824 | 12076607071797729 | 12076607071969884 | 12076607072238683 | 12076607072288441 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 168 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 111360 | 0x7fb2f49b3700 | 0x7fb1e8e365c0 | 455699 | 455699 | 65536 | 3145728 | 416870 | 53478980 | 12076607072312977 | 12076607072499482 | 12076607072793721 | 12076607072842212 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 170 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 112832 | 0x7fb2f49b3600 | 0x7fb1e8e36600 | 219369 | 219369 | 65536 | 3080192 | 507763 | 64933636 | 12076607072865866 | 12076607073053560 | 12076607073188599 | 12076607073192814 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 172 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 114816 | 0x7fb2f49b3500 | 0x7fb1e8e36640 | 277547 | 277547 | 65536 | 4063232 | 553684 | 70680060 | 12076607073246313 | 12076607073417878 | 12076607073592757 | 12076607073596585 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 174 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 116288 | 0x7fb2f49b3400 | 0x7fb1e8e36680 | 223001 | 223001 | 65536 | 3145728 | 456216 | 58399568 | 12076607073663760 | 12076607073828436 | 12076607073966516 | 12076607073970330 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 176 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 119040 | 0x7fb2f49b3300 | 0x7fb1e8e366c0 | 413939 | 413939 | 65536 | 5701632 | 829430 | 106122232 | 12076607074030302 | 12076607074209235 | 12076607074477234 | 12076607074526254 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 178 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 120768 | 0x7fb2f49b3a00 | 0x7fb1e8e36700 | 403385 | 403385 | 65536 | 3276800 | 429855 | 54875156 | 12076607074550008 | 12076607074734033 | 12076607074994511 | 12076607075021405 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 180 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 122496 | 0x7fb2f49b3900 | 0x7fb1e8e36740 | 225675 | 225675 | 65536 | 3211264 | 488349 | 62521156 | 12076607075064275 | 12076607075231791 | 12076607075371630 | 12076607075375804 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 182 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 124736 | 0x7fb2f49b3800 | 0x7fb1e8e36780 | 291617 | 291617 | 65536 | 4259840 | 586782 | 74985140 | 12076607075427470 | 12076607075601229 | 12076607075784428 | 12076607075788272 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 184 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 126464 | 0x7fb2f49b3700 | 0x7fb1e8e367c0 | 225755 | 225755 | 65536 | 3276800 | 499862 | 63952216 | 12076607075856068 | 12076607076026827 | 12076607076166827 | 12076607076170813 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 186 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 20 | 24 | 127936 | 0x7fb2f49b3600 | 0x7fb1e8e36800 | 412089 | 412089 | 65536 | 7733248 | 977658 | 125097504 | 12076607076223321 | 12076607076399626 | 12076607076667145 | 12076607076719043 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 188 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 129664 | 0x7fb2f49b3500 | 0x7fb1e8e36840 | 407651 | 407651 | 65536 | 3407872 | 444297 | 56593724 | 12076607076745262 | 12076607076932583 | 12076607077195462 | 12076607077244190 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 190 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 131392 | 0x7fb2f49b3400 | 0x7fb1e8e36880 | 226969 | 226969 | 65536 | 3342336 | 458648 | 58735012 | 12076607077268305 | 12076607077456421 | 12076607077597221 | 12076607077601154 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 192 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 133632 | 0x7fb2f49b3300 | 0x7fb1e8e368c0 | 301475 | 301475 | 65536 | 4456448 | 635716 | 81143032 | 12076607077658561 | 12076607077822020 | 12076607078013859 | 12076607078060408 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 194 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 135360 | 0x7fb2f49b3a00 | 0x7fb1e8e36900 | 229737 | 229737 | 65536 | 3538944 | 531910 | 68167600 | 12076607078098489 | 12076607078264578 | 12076607078405377 | 12076607078409077 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 196 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 20 | 24 | 136832 | 0x7fb2f49b3900 | 0x7fb1e8e36940 | 435539 | 435539 | 65536 | 8978432 | 1128815 | 144500616 | 12076607078466043 | 12076607078630496 | 12076607078914335 | 12076607078935496 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 198 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 138560 | 0x7fb2f49b3800 | 0x7fb1e8e36980 | 394617 | 394617 | 65536 | 3670016 | 514021 | 66048348 | 12076607078982634 | 12076607079152734 | 12076607079408413 | 12076607079457617 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 200 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 140288 | 0x7fb2f49b3700 | 0x7fb1e8e369c0 | 227075 | 227075 | 65536 | 3604480 | 524153 | 67172096 | 12076607079483415 | 12076607079665852 | 12076607079806652 | 12076607079810473 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 202 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 142784 | 0x7fb2f49b3600 | 0x7fb1e8e36a00 | 329097 | 329097 | 65536 | 4849664 | 692690 | 87980460 | 12076607079864043 | 12076607080040891 | 12076607080251610 | 12076607080300435 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 204 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 144768 | 0x7fb2f49b3500 | 0x7fb1e8e36a40 | 226403 | 226403 | 65536 | 3801088 | 581253 | 74372408 | 12076607080338455 | 12076607080503929 | 12076607080644408 | 12076607080648081 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 206 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 20 | 24 | 145728 | 0x7fb2f49b3400 | 0x7fb1e8e36a80 | 470537 | 470537 | 65536 | 14548992 | 1828637 | 234034324 | 12076607080700419 | 12076607080878167 | 12076607081185526 | 12076607081234983 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 208 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 147712 | 0x7fb2f49b3300 | 0x7fb1e8e36ac0 | 404651 | 404651 | 65536 | 3932160 | 518104 | 66211720 | 12076607081261102 | 12076607081443605 | 12076607081705684 | 12076607081752906 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 210 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 149696 | 0x7fb2f49b3a00 | 0x7fb1e8e36b00 | 218833 | 218833 | 65536 | 3866624 | 618650 | 79262516 | 12076607081776971 | 12076607081958803 | 12076607082095282 | 12076607082099411 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 212 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 152192 | 0x7fb2f49b3900 | 0x7fb1e8e36b40 | 356915 | 356915 | 65536 | 5242880 | 719398 | 92075292 | 12076607082150867 | 12076607082327281 | 12076607082556080 | 12076607082603138 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 214 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 154176 | 0x7fb2f49b3800 | 0x7fb1e8e36b80 | 222913 | 222913 | 65536 | 4063232 | 627426 | 80319960 | 12076607082639516 | 12076607082803919 | 12076607082942479 | 12076607082946397 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 216 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 20 | 24 | 155648 | 0x7fb2f49b3700 | 0x7fb1e8e36bc0 | 503139 | 503139 | 65536 | 10027008 | 1262411 | 161220076 | 12076607082998554 | 12076607083181038 | 12076607083510156 | 12076607083559106 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 218 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 157632 | 0x7fb2f49b3600 | 0x7fb1e8e36c00 | 406481 | 406481 | 65536 | 4194304 | 545664 | 69736164 | 12076607083581979 | 12076607083767479 | 12076607084031479 | 12076607084082600 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 220 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 159616 | 0x7fb2f49b3500 | 0x7fb1e8e36c40 | 221899 | 221899 | 65536 | 4128768 | 622734 | 79652544 | 12076607084104861 | 12076607084305877 | 12076607084443476 | 12076607084446487 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 222 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 162368 | 0x7fb2f49b3400 | 0x7fb1e8e36c80 | 381041 | 381041 | 65536 | 5636096 | 770775 | 98247000 | 12076607084501569 | 12076607084677556 | 12076607084922995 | 12076607084969299 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 224 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 164608 | 0x7fb2f49b3300 | 0x7fb1e8e36cc0 | 226619 | 226619 | 65536 | 4587520 | 655899 | 83961808 | 12076607085013682 | 12076607085180914 | 12076607085321394 | 12076607085324680 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 226 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 20 | 24 | 166336 | 0x7fb2f49b3a00 | 0x7fb1e8e36d00 | 575985 | 575985 | 65536 | 11075584 | 1394189 | 178374672 | 12076607085378210 | 12076607085546673 | 12076607085925232 | 12076607085971684 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 228 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 168576 | 0x7fb2f49b3900 | 0x7fb1e8e36d40 | 401963 | 401963 | 65536 | 4718592 | 669814 | 85825632 | 12076607085995318 | 12076607086186031 | 12076607086446350 | 12076607086466625 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 230 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 170816 | 0x7fb2f49b3800 | 0x7fb1e8e36d80 | 223681 | 223681 | 65536 | 4653056 | 667946 | 85594900 | 12076607086512831 | 12076607086678350 | 12076607086817389 | 12076607086820583 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 232 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 174080 | 0x7fb2f49b3700 | 0x7fb1e8e36dc0 | 434675 | 434675 | 65536 | 6422528 | 894480 | 114787936 | 12076607086873562 | 12076607087072588 | 12076607087352747 | 12076607087401173 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 234 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 176576 | 0x7fb2f49b3600 | 0x7fb1e8e36e00 | 222969 | 222969 | 65536 | 5111808 | 791119 | 101342588 | 12076607087445996 | 12076607087616747 | 12076607087755786 | 12076607087758949 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 236 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 20 | 24 | 177792 | 0x7fb2f49b3500 | 0x7fb1e8e36e40 | 647523 | 647523 | 65536 | 15007744 | 1881597 | 241002168 | 12076607087810795 | 12076607087984265 | 12076607088411144 | 12076607088458590 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 238 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 180288 | 0x7fb2f49b3400 | 0x7fb1e8e36e80 | 409081 | 409081 | 65536 | 5242880 | 687313 | 88316524 | 12076607088483737 | 12076607088676423 | 12076607088941863 | 12076607088988656 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 240 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 182784 | 0x7fb2f49b3300 | 0x7fb1e8e36ec0 | 219755 | 219755 | 65536 | 5177344 | 789959 | 101051380 | 12076607089016728 | 12076607089211302 | 12076607089347781 | 12076607089351190 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 242 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 186304 | 0x7fb2f49b3a00 | 0x7fb1e8e36f00 | 488401 | 488401 | 65536 | 7208960 | 979036 | 126381744 | 12076607089404860 | 12076607089575941 | 12076607089891620 | 12076607089939795 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 244 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 189312 | 0x7fb2f49b3900 | 0x7fb1e8e36f40 | 244835 | 244835 | 65536 | 6160384 | 944851 | 120972172 | 12076607089986332 | 12076607090161059 | 12076607090314658 | 12076607090317918 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 246 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 20 | 24 | 190784 | 0x7fb2f49b3800 | 0x7fb1e8e36f80 | 784929 | 784929 | 65536 | 17104896 | 2148466 | 275140452 | 12076607090370406 | 12076607090554978 | 12076607091073216 | 12076607091095254 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 248 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 193792 | 0x7fb2f49b3700 | 0x7fb1e8e36fc0 | 456899 | 456899 | 65536 | 6291456 | 835568 | 107190536 | 12076607091150217 | 12076607091319295 | 12076607091617534 | 12076607091664734 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 250 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 196800 | 0x7fb2f49b3600 | 0x7fb1e8e37000 | 245729 | 245729 | 65536 | 6225920 | 915795 | 117326864 | 12076607091693597 | 12076607091877534 | 12076607092032253 | 12076607092035654 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 252 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 201088 | 0x7fb2f49b3500 | 0x7fb1e8e37040 | 593875 | 593875 | 65536 | 8781824 | 1249565 | 160133432 | 12076607092090175 | 12076607092276092 | 12076607092662171 | 12076607092708766 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 254 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 204608 | 0x7fb2f49b3400 | 0x7fb1e8e37080 | 276121 | 276121 | 65536 | 7208960 | 1008435 | 128927104 | 12076607092748720 | 12076607092914170 | 12076607093089530 | 12076607093092900 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 256 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 20 | 24 | 206080 | 0x7fb2f49b3300 | 0x7fb1e8e370c0 | 942539 | 942539 | 65536 | 19202048 | 2414294 | 308160104 | 12076607093146069 | 12076607093323929 | 12076607093946647 | 12076607093994998 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 258 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 209600 | 0x7fb2f49b3a00 | 0x7fb1e8e37100 | 505193 | 505193 | 65536 | 7340032 | 959851 | 122951072 | 12076607094019965 | 12076607094212086 | 12076607094542965 | 12076607094590246 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 260 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 213120 | 0x7fb2f49b3900 | 0x7fb1e8e37140 | 280955 | 280955 | 65536 | 7274496 | 1035974 | 132703404 | 12076607094614210 | 12076607094796245 | 12076607094974324 | 12076607094977506 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 262 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 218176 | 0x7fb2f49b3800 | 0x7fb1e8e37180 | 700225 | 700225 | 65536 | 10354688 | 1451513 | 185974588 | 12076607095031627 | 12076607095211123 | 12076607095668562 | 12076607095716130 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 264 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 222208 | 0x7fb2f49b3700 | 0x7fb1e8e371c0 | 370371 | 370371 | 65536 | 8257536 | 1151861 | 147391804 | 12076607095756805 | 12076607095923761 | 12076607096161360 | 12076607096209638 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 266 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 20 | 24 | 223936 | 0x7fb2f49b3600 | 0x7fb1e8e37200 | 1065625 | 1065625 | 65536 | 21299200 | 2669811 | 341662804 | 12076607096234704 | 12076607096420240 | 12076607097124077 | 12076607097171597 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 268 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 227968 | 0x7fb2f49b3500 | 0x7fb1e8e37240 | 567019 | 567019 | 65536 | 8388608 | 1192134 | 152811500 | 12076607097204899 | 12076607097392717 | 12076607097765835 | 12076607097812710 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 270 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 232000 | 0x7fb2f49b3400 | 0x7fb1e8e37280 | 374217 | 374217 | 65536 | 8323072 | 1289301 | 164974828 | 12076607097836945 | 12076607098034635 | 12076607098275114 | 12076607098329020 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 272 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 237824 | 0x7fb2f49b3300 | 0x7fb1e8e372c0 | 806699 | 806699 | 65536 | 11927552 | 1610806 | 206248708 | 12076607098355369 | 12076607098555593 | 12076607099083911 | 12076607099132585 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 274 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 242368 | 0x7fb2f49b3a00 | 0x7fb1e8e37300 | 349865 | 349865 | 65536 | 9306112 | 1445438 | 185100516 | 12076607099174683 | 12076607099343591 | 12076607099568710 | 12076607099616745 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 276 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 20 | 24 | 243584 | 0x7fb2f49b3900 | 0x7fb1e8e37340 | 1208963 | 1208963 | 65536 | 29163520 | 3652378 | 467598068 | 12076607099646741 | 12076607099823589 | 12076607100625027 | 12076607100675295 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 278 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 248128 | 0x7fb2f49b3800 | 0x7fb1e8e37380 | 642609 | 642609 | 65536 | 9437184 | 1259387 | 161220340 | 12076607100700642 | 12076607100885986 | 12076607101308065 | 12076607101377501 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 280 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 252672 | 0x7fb2f49b3700 | 0x7fb1e8e373c0 | 352675 | 352675 | 65536 | 9371648 | 1362401 | 174437700 | 12076607101401405 | 12076607101586944 | 12076607101812863 | 12076607101853486 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 282 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 259264 | 0x7fb2f49b3600 | 0x7fb1e8e37400 | 909297 | 909297 | 65536 | 13500416 | 1939392 | 249470212 | 12076607101889804 | 12076607102070942 | 12076607102668540 | 12076607102736529 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 284 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 264832 | 0x7fb2f49b3500 | 0x7fb1e8e37440 | 419643 | 419643 | 65536 | 11403264 | 1810727 | 231780600 | 12076607102773227 | 12076607102951099 | 12076607103221499 | 12076607103288866 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 286 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 20 | 24 | 266304 | 0x7fb2f49b3400 | 0x7fb1e8e37480 | 1486913 | 1486913 | 65536 | 33357824 | 4176847 | 534462840 | 12076607103313472 | 12076607103498618 | 12076607104484535 | 12076607104551705 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 288 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 28 | 24 | 271872 | 0x7fb2f49b3300 | 0x7fb1e8e374c0 | 776779 | 776779 | 65536 | 11534336 | 1559715 | 199237440 | 12076607104575750 | 12076607104769654 | 12076607105284692 | 12076607105351593 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 290 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 277440 | 0x7fb2f49b3a00 | 0x7fb1e8e37500 | 426753 | 426753 | 65536 | 11534336 | 1851403 | 236927948 | 12076607105381649 | 12076607105566611 | 12076607105840691 | 12076607105906605 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 292 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 12 | 24 | 279936 | 0x7fb2f49b3900 | 0x7fb1e8e37540 | 1125395 | 1125395 | 65536 | 19267584 | 2430276 | 311028800 | 12076607105932844 | 12076607106126290 | 12076607106865007 | 12076607106933055 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 294 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 286528 | 0x7fb2f49b3800 | 0x7fb1e8e37580 | 486113 | 486113 | 65536 | 13500416 | 1989643 | 254748480 | 12076607106973300 | 12076607107145487 | 12076607107461486 | 12076607107527841 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 296 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 20 | 24 | 288000 | 0x7fb2f49b3700 | 0x7fb1e8e375c0 | 1768347 | 1768347 | 65536 | 37552128 | 4698100 | 601656016 | 12076607107554941 | 12076607107742125 | 12076607108914921 | 12076607108982587 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 298 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 32 | 24 | 294592 | 0x7fb2f49b3600 | 0x7fb1e8e37600 | 916145 | 916145 | 65536 | 13631488 | 1837594 | 235009216 | 12076607109018424 | 12076607109204200 | 12076607109814118 | 12076607109881229 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 300 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 301184 | 0x7fb2f49b3500 | 0x7fb1e8e37640 | 492811 | 492811 | 65536 | 13631488 | 2078014 | 266048836 | 12076607109909411 | 12076607110110597 | 12076607110430276 | 12076607110499398 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 302 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 12 | 24 | 304960 | 0x7fb2f49b3400 | 0x7fb1e8e37680 | 1336065 | 1336065 | 65536 | 21692416 | 2740602 | 350269840 | 12076607110534754 | 12076607110704516 | 12076607111582753 | 12076607111651411 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 304 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 24 | 24 | 313600 | 0x7fb2f49b3300 | 0x7fb1e8e376c0 | 632747 | 632747 | 65536 | 17694720 | 2788850 | 356809268 | 12076607111700342 | 12076607111867392 | 12076607112280991 | 12076607112347516 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 306 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 20 | 24 | 314816 | 0x7fb2f49b3a00 | 0x7fb1e8e37700 | 2332393 | 2332393 | 65536 | 57475072 | 7193674 | 920847108 | 12076607112378243 | 12076607112564030 | 12076607114112985 | 12076607114179414 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 308 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 20 | 24 | 317568 | 0x7fb2f49b3900 | 0x7fb1e8e37740 | 1200323 | 1200323 | 65536 | 19660800 | 2471365 | 316217244 | 12076607114210191 | 12076607114395704 | 12076607115190902 | 12076607115257670 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 310 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 12 | 24 | 320320 | 0x7fb2f49b3800 | 0x7fb1e8e37780 | 640169 | 640169 | 65536 | 20381696 | 2574270 | 329482260 | 12076607115287555 | 12076607115473301 | 12076607115888660 | 12076607115954926 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 312 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 12 | 24 | 324096 | 0x7fb2f49b3700 | 0x7fb1e8e377c0 | 1757027 | 1757027 | 65536 | 28704768 | 3613734 | 462492340 | 12076607115982508 | 12076607116170899 | 12076607117329615 | 12076607117396298 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 314 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 12 | 24 | 328896 | 0x7fb2f49b3600 | 0x7fb1e8e37800 | 1200825 | 1200825 | 65536 | 36634624 | 4627052 | 592393124 | 12076607117434739 | 12076607117604014 | 12076607118390892 | 12076607118457622 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 316 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 20 | 24 | 330112 | 0x7fb2f49b3500 | 0x7fb1e8e37840 | 4583811 | 4583811 | 65536 | 105840640 | 13238877 | 1694936080 | 12076607118490763 | 12076607118680971 | 12076607121732962 | 12076607121800290 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 318 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 20 | 24 | 332864 | 0x7fb2f49b3400 | 0x7fb1e8e37880 | 2324345 | 2324345 | 65536 | 38535168 | 4833306 | 618039328 | 12076607121831808 | 12076607122027681 | 12076607123570556 | 12076607123639781 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 320 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 12 | 24 | 335616 | 0x7fb2f49b3300 | 0x7fb1e8e378c0 | 1202627 | 1202627 | 65536 | 39124992 | 4909655 | 628517764 | 12076607123663065 | 12076607123857915 | 12076607124647353 | 12076607124713499 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 322 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 12 | 24 | 339392 | 0x7fb2f49b3a00 | 0x7fb1e8e37900 | 3442929 | 3442929 | 65536 | 55771136 | 7010935 | 897488352 | 12076607124738455 | 12076607124929752 | 12076607127212465 | 12076607127280584 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 324 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 12 | 24 | 344192 | 0x7fb2f49b3900 | 0x7fb1e8e37940 | 2322507 | 2322507 | 65536 | 72548352 | 9109453 | 1166161620 | 12076607127325978 | 12076607127497744 | 12076607129033259 | 12076607129100299 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 326 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 20 | 24 | 345408 | 0x7fb2f49b3800 | 0x7fb1e8e37980 | 9086257 | 9086257 | 65536 | 210698240 | 26342168 | 3372581544 | 12076607129130234 | 12076607129314698 | 12076607135372600 | 12076607135440141 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 328 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 20 | 24 | 348160 | 0x7fb2f49b3700 | 0x7fb1e8e379c0 | 4572891 | 4572891 | 65536 | 76283904 | 9533686 | 1221835664 | 12076607135472702 | 12076607135649719 | 12076607138693070 | 12076607138760458 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 330 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 12 | 24 | 350912 | 0x7fb2f49b3600 | 0x7fb1e8e37a00 | 2328209 | 2328209 | 65536 | 77398016 | 9689987 | 1240750856 | 12076607138789231 | 12076607138985709 | 12076607140524904 | 12076607140593778 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 332 | 935069 | 935074 | 4194304 | 256 | 0 | 0 | 12 | 24 | 0 | 0x7fb2f49b3500 | 0x7fb1e8e37a40 | 6811723 | 6811723 | 65536 | 110821376 | 13888539 | 1777979164 | 12076607140622982 | 12076607140811943 | 12076607145340089 | 12076607145411670 |