55 KiB
55 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | SQ_CYCLES | SQ_BUSY_CYCLES | SQ_WAVES | GRBM_COUNT | GRBM_GUI_ACTIVE | CPC_CPC_STAT_BUSY | CPC_CPC_STAT_IDLE | CPC_CPC_TCIU_BUSY | CPC_CPC_TCIU_IDLE | CPC_CPC_STAT_STALL | CPC_UTCL1_STALL_ON_TRANSLATION | CPC_CPC_UTCL2IU_BUSY | CPC_CPC_UTCL2IU_IDLE | CPC_CPC_UTCL2IU_STALL | CPC_ME1_BUSY_FOR_PACKET_DECODE | CPC_ME1_DC0_SPI_BUSY | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBuffer.kd | 0 | 0 | 0 | 868078 | 868083 | 33554432 | 256 | 0 | 0 | 8 | 32 | 6464 | 0x0 | 0x7f6d9ea04180 | 4014416 | 3802798 | 524288 | 501801 | 501801 | 501801 | 0 | 712 | 503497 | 10906 | 2411 | 2659 | 497544 | 0 | 17567 | 478093 | 12075272861084152 | 12075279053995252 | 12075279054317649 | 12075273110476365 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 868078 | 868083 | 32768 | 256 | 0 | 0 | 24 | 24 | 12480 | 0x0 | 0x7f6d9ea35100 | 223232 | 76342 | 512 | 27903 | 27903 | 27903 | 0 | 664 | 27117 | 10999 | 2504 | 2414 | 23380 | 0 | 21415 | 697 | 12075273125360972 | 12075279069200263 | 12075279069206983 | 12075273125740708 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 4 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 12928 | 0x7f6eceb23900 | 0x7f6d9ea35140 | 1741728 | 1576552 | 65536 | 217715 | 217715 | 217715 | 0 | 719 | 216050 | 10980 | 2531 | 2449 | 219989 | 0 | 21757 | 190896 | 12075273125788967 | 12075279069270663 | 12075279069401861 | 12075273126271604 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 6 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 36 | 24 | 13632 | 0x7f6eceb23800 | 0x7f6d9ea35180 | 3183824 | 3022120 | 65536 | 397977 | 397977 | 397977 | 0 | 719 | 393420 | 12233 | 2785 | 2444 | 385826 | 0 | 23946 | 353484 | 12075273126346023 | 12075279069462821 | 12075279069708258 | 12075273126940948 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 8 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 14080 | 0x7f6eceb23700 | 0x7f6d9ea351c0 | 3230752 | 3058737 | 65536 | 403843 | 403843 | 403843 | 0 | 721 | 393992 | 12176 | 2410 | 2846 | 389456 | 0 | 31926 | 372255 | 12075273126977055 | 12075279069843777 | 12075279070090815 | 12075273127600584 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 10 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14528 | 0x7f6eceb23600 | 0x7f6d9ea35200 | 1733264 | 1570333 | 65536 | 216657 | 216657 | 216657 | 0 | 707 | 211208 | 10752 | 2531 | 2441 | 212917 | 0 | 21883 | 194344 | 12075273127633405 | 12075279070227773 | 12075279070353052 | 12075273128050249 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 12 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14976 | 0x7f6eceb23500 | 0x7f6d9ea35240 | 1745056 | 1573307 | 65536 | 218131 | 218131 | 218131 | 0 | 723 | 223798 | 10612 | 2537 | 2456 | 210478 | 0 | 20257 | 194700 | 12075273128104991 | 12075279070395772 | 12075279070521211 | 12075273128517998 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 14 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 15424 | 0x7f6eceb23400 | 0x7f6d9ea35280 | 1744784 | 1579496 | 65536 | 218097 | 218097 | 218097 | 0 | 718 | 222261 | 10536 | 2796 | 2680 | 208358 | 0 | 23138 | 186020 | 12075273128595112 | 12075279070598810 | 12075279070722489 | 12075273128979566 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 16 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16128 | 0x7f6eceb23300 | 0x7f6d9ea352c0 | 3617184 | 3000648 | 65536 | 452147 | 452147 | 391809 | 60338 | 721 | 402168 | 11213 | 2541 | 2444 | 387706 | 0 | 31909 | 372899 | 12075273129038846 | 12075279070767448 | 12075279071013686 | 12075273129653388 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 18 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16832 | 0x7f6eceb23a00 | 0x7f6d9ea35300 | 3218640 | 3039594 | 65536 | 402329 | 402329 | 402329 | 0 | 1148 | 457450 | 11701 | 2627 | 2920 | 387814 | 0 | 30749 | 359847 | 12075273129679326 | 12075279071146005 | 12075279071392562 | 12075273130275304 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 20 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 17536 | 0x7f6eceb23900 | 0x7f6d9ea35340 | 1771488 | 1604558 | 65536 | 221435 | 221435 | 221435 | 0 | 803 | 225169 | 10935 | 2539 | 2443 | 208571 | 0 | 20292 | 181128 | 12075273130304999 | 12075279071527441 | 12075279071652080 | 12075273130732042 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 22 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18240 | 0x7f6eceb23800 | 0x7f6d9ea35380 | 1759312 | 1587301 | 65536 | 219913 | 219913 | 219913 | 0 | 691 | 216502 | 11193 | 2539 | 2405 | 214793 | 0 | 21971 | 190760 | 12075273130785672 | 12075279071698959 | 12075279071823918 | 12075273131204871 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 24 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18944 | 0x7f6eceb23700 | 0x7f6d9ea353c0 | 1748256 | 1571901 | 65536 | 218531 | 218531 | 218531 | 0 | 749 | 216919 | 11247 | 2540 | 2430 | 218208 | 0 | 22546 | 183540 | 12075273131267848 | 12075279071883277 | 12075279072007756 | 12075273131668352 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 26 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 19904 | 0x7f6eceb23600 | 0x7f6d9ea35400 | 3146320 | 2985472 | 65536 | 393289 | 393289 | 393289 | 0 | 691 | 403478 | 11150 | 2798 | 2385 | 394213 | 0 | 31099 | 370803 | 12075273131720729 | 12075279072055916 | 12075279072309993 | 12075273132316837 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 28 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 20608 | 0x7f6eceb23500 | 0x7f6d9ea35440 | 3197408 | 3036616 | 65536 | 399675 | 399675 | 399675 | 0 | 749 | 392774 | 11666 | 2588 | 2437 | 383033 | 0 | 33771 | 364873 | 12075273132350369 | 12075279072416392 | 12075279072661030 | 12075273132958289 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 30 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 21312 | 0x7f6eceb23400 | 0x7f6d9ea35480 | 1731856 | 1565588 | 65536 | 216481 | 216481 | 216481 | 0 | 691 | 215158 | 11227 | 2909 | 2439 | 208863 | 0 | 22929 | 187484 | 12075273132988766 | 12075279072762309 | 12075279072887108 | 12075273133426709 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 32 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22016 | 0x7f6eceb23300 | 0x7f6d9ea354c0 | 1731232 | 1563267 | 65536 | 216403 | 216403 | 216403 | 0 | 1101 | 288714 | 10662 | 2548 | 2979 | 217587 | 0 | 23854 | 185176 | 12075273133472104 | 12075279072929827 | 12075279073054626 | 12075273133897013 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 34 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22720 | 0x7f6eceb23a00 | 0x7f6d9ea35500 | 1721680 | 1557430 | 65536 | 215209 | 215209 | 215209 | 0 | 791 | 216853 | 10705 | 2538 | 2440 | 207966 | 0 | 21584 | 184537 | 12075273133959359 | 12075279073107266 | 12075279073231744 | 12075273134369692 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 36 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 23680 | 0x7f6eceb23900 | 0x7f6d9ea35540 | 3178528 | 3009702 | 65536 | 397315 | 397315 | 397315 | 0 | 693 | 396324 | 11609 | 2916 | 2717 | 402273 | 0 | 33245 | 375463 | 12075273134420626 | 12075279073276864 | 12075279073564381 | 12075273134999562 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 38 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 24384 | 0x7f6eceb23800 | 0x7f6d9ea35580 | 3128016 | 2958963 | 65536 | 391001 | 391001 | 391001 | 0 | 747 | 397193 | 11018 | 2957 | 2439 | 392663 | 0 | 29897 | 362661 | 12075273135036050 | 12075279073665500 | 12075279073916218 | 12075273135638209 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 40 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25088 | 0x7f6eceb23700 | 0x7f6d9ea355c0 | 1715488 | 1537652 | 65536 | 214435 | 214435 | 214435 | 0 | 690 | 214007 | 10208 | 3210 | 2438 | 213728 | 0 | 23447 | 181077 | 12075273135669477 | 12075279074021497 | 12075279074146455 | 12075273136088886 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 42 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25792 | 0x7f6eceb23600 | 0x7f6d9ea35600 | 1727632 | 1553981 | 65536 | 215953 | 215953 | 215953 | 0 | 749 | 212095 | 11070 | 2531 | 2400 | 216470 | 0 | 23532 | 185695 | 12075273136144229 | 12075279074189495 | 12075279074315094 | 12075273136556826 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 44 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 26496 | 0x7f6eceb23500 | 0x7f6d9ea35640 | 1692448 | 1523203 | 65536 | 211555 | 211555 | 211555 | 0 | 691 | 219246 | 10860 | 2532 | 2451 | 207579 | 0 | 24445 | 184875 | 12075273136617799 | 12075279074368853 | 12075279074493332 | 12075273137017261 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 46 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 27712 | 0x7f6eceb23400 | 0x7f6d9ea35680 | 3121488 | 2950377 | 65536 | 390185 | 390185 | 390185 | 0 | 749 | 398310 | 11082 | 2550 | 2444 | 385834 | 0 | 26235 | 361305 | 12075273137070921 | 12075279074535572 | 12075279074779889 | 12075273137667770 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 48 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 28416 | 0x7f6eceb23300 | 0x7f6d9ea356c0 | 3155232 | 2997721 | 65536 | 394403 | 394403 | 394403 | 0 | 689 | 405408 | 11369 | 2805 | 2435 | 384811 | 0 | 33116 | 356861 | 12075273137697866 | 12075279074879248 | 12075279075127726 | 12075273138285969 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 50 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 29120 | 0x7f6eceb23a00 | 0x7f6d9ea35700 | 1787408 | 1622246 | 65536 | 223425 | 223425 | 223425 | 0 | 748 | 210623 | 10673 | 2540 | 2447 | 207471 | 0 | 23212 | 182023 | 12075273138314121 | 12075279075256365 | 12075279075381004 | 12075273138744040 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 52 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30080 | 0x7f6eceb23900 | 0x7f6d9ea35740 | 1734304 | 1565776 | 65536 | 216787 | 216787 | 216787 | 0 | 691 | 228262 | 10939 | 2550 | 2444 | 215178 | 0 | 24058 | 182272 | 12075273138794373 | 12075279075423083 | 12075279075548042 | 12075273139220976 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 54 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30784 | 0x7f6eceb23800 | 0x7f6d9ea35780 | 1708496 | 1543981 | 65536 | 213561 | 213561 | 213561 | 0 | 749 | 215878 | 10743 | 2542 | 2439 | 210263 | 0 | 19844 | 188028 | 12075273139285275 | 12075279075606281 | 12075279075730920 | 12075273139680339 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 56 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32000 | 0x7f6eceb23700 | 0x7f6d9ea357c0 | 3126752 | 2961179 | 65536 | 390843 | 390843 | 390843 | 0 | 693 | 399764 | 11684 | 2917 | 2453 | 392129 | 0 | 33789 | 359208 | 12075273139731905 | 12075279075773320 | 12075279076020357 | 12075273140329125 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 58 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32960 | 0x7f6eceb23600 | 0x7f6d9ea35800 | 3200976 | 3032907 | 65536 | 400121 | 400121 | 400121 | 0 | 747 | 397705 | 11581 | 2557 | 2437 | 384457 | 0 | 23638 | 358407 | 12075273140360143 | 12075279076127876 | 12075279076373634 | 12075273140970527 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 60 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 33920 | 0x7f6eceb23500 | 0x7f6d9ea35840 | 1733344 | 1567740 | 65536 | 216667 | 216667 | 216667 | 0 | 689 | 215312 | 10990 | 2861 | 2455 | 211039 | 0 | 23299 | 178727 | 12075273141007906 | 12075279076484993 | 12075279076609792 | 12075273141433367 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 62 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 34880 | 0x7f6eceb23400 | 0x7f6d9ea35880 | 1754000 | 1582749 | 65536 | 219249 | 219249 | 219249 | 0 | 749 | 211959 | 10808 | 2541 | 2439 | 217527 | 0 | 24520 | 186623 | 12075273141484762 | 12075279076653151 | 12075279076779230 | 12075273141910033 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 64 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 35840 | 0x7f6eceb23300 | 0x7f6d9ea358c0 | 1687136 | 1527659 | 65536 | 210891 | 210891 | 210891 | 0 | 690 | 206207 | 10378 | 2532 | 2358 | 209368 | 0 | 23669 | 184399 | 12075273141971186 | 12075279076831709 | 12075279076956508 | 12075273142379785 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 66 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 37312 | 0x7f6eceb23a00 | 0x7f6d9ea35900 | 3150608 | 2980853 | 65536 | 393825 | 393825 | 393825 | 0 | 749 | 404927 | 11827 | 2603 | 2442 | 391196 | 0 | 32067 | 359853 | 12075273142432283 | 12075279076998748 | 12075279077246905 | 12075273143031005 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 68 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 38272 | 0x7f6eceb23900 | 0x7f6d9ea35940 | 3131552 | 2968961 | 65536 | 391443 | 391443 | 391443 | 0 | 691 | 392494 | 11358 | 2537 | 2614 | 388584 | 0 | 29668 | 355615 | 12075273143062915 | 12075279077343865 | 12075279077588982 | 12075273143675643 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 70 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 39232 | 0x7f6eceb23800 | 0x7f6d9ea35980 | 1685584 | 1516378 | 65536 | 210697 | 210697 | 210697 | 0 | 749 | 212623 | 10818 | 2929 | 2698 | 206788 | 0 | 22335 | 182073 | 12075273143709636 | 12075279077679381 | 12075279077804020 | 12075273144116643 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 72 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 40192 | 0x7f6eceb23700 | 0x7f6d9ea359c0 | 1709856 | 1543771 | 65536 | 213731 | 213731 | 213731 | 0 | 691 | 226638 | 11112 | 2537 | 2440 | 219334 | 0 | 25153 | 181713 | 12075273144167968 | 12075279077847540 | 12075279077973138 | 12075273144576457 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 74 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 41152 | 0x7f6eceb23600 | 0x7f6d9ea35a00 | 1739856 | 1569098 | 65536 | 217481 | 217481 | 217481 | 0 | 749 | 216015 | 10896 | 2530 | 2438 | 203848 | 0 | 21952 | 179351 | 12075273144636709 | 12075279078029938 | 12075279078154097 | 12075273145050237 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 76 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 42624 | 0x7f6eceb23500 | 0x7f6d9ea35a40 | 3155104 | 2986478 | 65536 | 394387 | 394387 | 394387 | 0 | 691 | 399550 | 11926 | 2554 | 2822 | 387928 | 0 | 29029 | 364321 | 12075273145103907 | 12075279078197456 | 12075279078446414 | 12075273145705866 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 78 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 43584 | 0x7f6eceb23400 | 0x7f6d9ea35a80 | 3185872 | 3022813 | 65536 | 398233 | 398233 | 398233 | 0 | 749 | 401311 | 11073 | 2547 | 2441 | 389277 | 0 | 33313 | 360876 | 12075273145736543 | 12075279078510413 | 12075279078758251 | 12075273146358729 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 80 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 44544 | 0x7f6eceb23300 | 0x7f6d9ea35ac0 | 1800544 | 1635154 | 65536 | 225067 | 225067 | 225067 | 0 | 690 | 209639 | 11054 | 2540 | 2620 | 212962 | 0 | 23635 | 185492 | 12075273146390768 | 12075279078816170 | 12075279078940649 | 12075273146815708 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 82 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 45760 | 0x7f6eceb23a00 | 0x7f6d9ea35b00 | 1743312 | 1559477 | 65536 | 217913 | 217913 | 217913 | 0 | 751 | 219061 | 11250 | 2725 | 2809 | 210205 | 0 | 27076 | 182249 | 12075273146868246 | 12075279078982729 | 12075279079108647 | 12075273147293546 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 84 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 46720 | 0x7f6eceb23900 | 0x7f6d9ea35b40 | 1724256 | 1553411 | 65536 | 215531 | 215531 | 215531 | 0 | 690 | 221751 | 10689 | 2538 | 2435 | 210371 | 0 | 22830 | 180237 | 12075273147356893 | 12075279079168007 | 12075279079293126 | 12075273147756626 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 86 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 48448 | 0x7f6eceb23800 | 0x7f6d9ea35b80 | 3288976 | 3121857 | 65536 | 411121 | 411121 | 411121 | 0 | 751 | 421892 | 11563 | 2923 | 2809 | 410117 | 0 | 33993 | 375476 | 12075273147807952 | 12075279079335685 | 12075279079591363 | 12075273148431480 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 88 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 49408 | 0x7f6eceb23700 | 0x7f6d9ea35bc0 | 3146272 | 2981026 | 65536 | 393283 | 393283 | 393283 | 0 | 691 | 397070 | 11034 | 2547 | 2845 | 395489 | 0 | 28787 | 364865 | 12075273148462167 | 12075279079646242 | 12075279079890080 | 12075273149045191 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 90 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 50368 | 0x7f6eceb23600 | 0x7f6d9ea35c00 | 1726224 | 1551138 | 65536 | 215777 | 215777 | 215777 | 0 | 749 | 214518 | 10528 | 2536 | 2435 | 211699 | 0 | 20329 | 181447 | 12075273149074826 | 12075279079941599 | 12075279080065918 | 12075273149503803 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 92 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 51584 | 0x7f6eceb23500 | 0x7f6d9ea35c40 | 1746016 | 1575703 | 65536 | 218251 | 218251 | 218251 | 0 | 691 | 221262 | 11441 | 2538 | 2908 | 219458 | 0 | 28129 | 193573 | 12075273149555108 | 12075279080109918 | 12075279080236157 | 12075273149967955 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 94 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 52544 | 0x7f6eceb23400 | 0x7f6d9ea35c80 | 1729552 | 1559917 | 65536 | 216193 | 216193 | 216193 | 0 | 748 | 213880 | 11235 | 2533 | 2370 | 209196 | 0 | 20458 | 187381 | 12075273150037074 | 12075279080288156 | 12075279080413115 | 12075273150435434 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 96 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 54272 | 0x7f6eceb23300 | 0x7f6d9ea35cc0 | 3346592 | 3176476 | 65536 | 418323 | 418323 | 418323 | 0 | 690 | 411287 | 12147 | 2542 | 2454 | 409936 | 0 | 33353 | 369312 | 12075273150485847 | 12075279080457594 | 12075279080710872 | 12075273151101181 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 98 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 55488 | 0x7f6eceb23a00 | 0x7f6d9ea35d00 | 3169104 | 3007010 | 65536 | 396137 | 396137 | 396137 | 0 | 749 | 394783 | 11299 | 2597 | 2441 | 386933 | 0 | 32102 | 356705 | 12075273151132900 | 12075279080772791 | 12075279081016469 | 12075273151737002 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 100 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 56704 | 0x7f6eceb23900 | 0x7f6d9ea35d40 | 1715936 | 1549917 | 65536 | 214491 | 214491 | 214491 | 0 | 691 | 219270 | 11010 | 2452 | 2455 | 207599 | 0 | 23490 | 183097 | 12075273151770334 | 12075279081082388 | 12075279081207347 | 12075273152197348 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 102 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 58176 | 0x7f6eceb23800 | 0x7f6d9ea35d80 | 1759632 | 1589201 | 65536 | 219953 | 219953 | 219953 | 0 | 747 | 227712 | 11762 | 2540 | 2433 | 214133 | 0 | 25576 | 190592 | 12075273152251679 | 12075279081248947 | 12075279081377265 | 12075273152661931 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 104 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 59392 | 0x7f6eceb23700 | 0x7f6d9ea35dc0 | 1750432 | 1579271 | 65536 | 218803 | 218803 | 218803 | 0 | 690 | 220719 | 11331 | 2530 | 2452 | 213538 | 0 | 22220 | 181501 | 12075273152723245 | 12075279081429585 | 12075279081554224 | 12075273153126354 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 106 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 61376 | 0x7f6eceb23600 | 0x7f6d9ea35e00 | 3248144 | 3073888 | 65536 | 406017 | 406017 | 406017 | 0 | 749 | 406638 | 12153 | 2540 | 2544 | 390350 | 0 | 31816 | 367808 | 12075273153181066 | 12075279081599343 | 12075279081855981 | 12075273153766113 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 108 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 62592 | 0x7f6eceb23500 | 0x7f6d9ea35e40 | 3204640 | 3038805 | 65536 | 400579 | 400579 | 400579 | 0 | 691 | 396718 | 11997 | 2451 | 2436 | 393282 | 0 | 34083 | 367175 | 12075273153794866 | 12075279081912780 | 12075279082197258 | 12075273154390853 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 110 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 63808 | 0x7f6eceb23400 | 0x7f6d9ea35e80 | 1779856 | 1616491 | 65536 | 222481 | 222481 | 222481 | 0 | 749 | 216887 | 10714 | 2443 | 2440 | 204430 | 0 | 24230 | 193099 | 12075273154421971 | 12075279082267017 | 12075279082392456 | 12075273154847211 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 112 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 65280 | 0x7f6eceb23300 | 0x7f6d9ea35ec0 | 1811040 | 1637987 | 65536 | 226379 | 226379 | 226379 | 0 | 690 | 213103 | 11571 | 3132 | 2429 | 223833 | 0 | 28744 | 194244 | 12075273154900801 | 12075279082435815 | 12075279082563494 | 12075273155321833 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 114 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 66496 | 0x7f6eceb23a00 | 0x7f6d9ea35f00 | 1808912 | 1639485 | 65536 | 226113 | 226113 | 226113 | 0 | 749 | 218919 | 11027 | 2571 | 2708 | 220018 | 0 | 20806 | 191691 | 12075273155386664 | 12075279082617093 | 12075279082741892 | 12075273155798128 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 116 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 68480 | 0x7f6eceb23900 | 0x7f6d9ea35f40 | 3330976 | 3159881 | 65536 | 416371 | 416371 | 416371 | 0 | 690 | 401503 | 12344 | 2465 | 2723 | 395555 | 0 | 34022 | 369588 | 12075273155849404 | 12075279082784612 | 12075279083039009 | 12075273156477922 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 118 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 69696 | 0x7f6eceb23800 | 0x7f6d9ea35f80 | 3181712 | 3021585 | 65536 | 397713 | 397713 | 397713 | 0 | 733 | 388558 | 11992 | 2538 | 2439 | 394359 | 0 | 33674 | 359711 | 12075273156509620 | 12075279083097569 | 12075279083342526 | 12075273157117520 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 120 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 70912 | 0x7f6eceb23700 | 0x7f6d9ea35fc0 | 1816992 | 1633110 | 65536 | 227123 | 227123 | 227123 | 0 | 690 | 214023 | 10633 | 2531 | 2437 | 216481 | 0 | 25348 | 179195 | 12075273157147466 | 12075279083400926 | 12075279083525885 | 12075273157576483 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 122 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 72384 | 0x7f6eceb23600 | 0x7f6d9ea36000 | 1820944 | 1637366 | 65536 | 227617 | 227617 | 227617 | 0 | 749 | 219383 | 11691 | 2529 | 2441 | 224021 | 0 | 28873 | 193717 | 12075273157631225 | 12075279083568604 | 12075279083703963 | 12075273158049852 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 124 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 73600 | 0x7f6eceb23500 | 0x7f6d9ea36040 | 1784288 | 1615068 | 65536 | 223035 | 223035 | 223035 | 0 | 679 | 214210 | 11325 | 2539 | 2406 | 208264 | 0 | 21976 | 178317 | 12075273158112839 | 12075279083755962 | 12075279083880121 | 12075273158523783 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 126 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 75840 | 0x7f6eceb23400 | 0x7f6d9ea36080 | 3359568 | 3180225 | 65536 | 419945 | 419945 | 419945 | 0 | 749 | 409935 | 12593 | 2542 | 2441 | 403213 | 0 | 35209 | 370828 | 12075273158577302 | 12075279083923001 | 12075279084174198 | 12075273159195190 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 128 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 77056 | 0x7f6eceb23300 | 0x7f6d9ea360c0 | 3147104 | 2984600 | 65536 | 393387 | 393387 | 393387 | 0 | 693 | 396876 | 12444 | 2534 | 2437 | 393121 | 0 | 29688 | 364355 | 12075273159225347 | 12075279084236758 | 12075279084480275 | 12075273159830581 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 130 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 78272 | 0x7f6eceb23a00 | 0x7f6d9ea36100 | 2329104 | 1657040 | 65536 | 291137 | 291137 | 228891 | 62246 | 749 | 205783 | 11172 | 2533 | 2899 | 276747 | 0 | 24076 | 188815 | 12075273159865055 | 12075279084539635 | 12075279084663794 | 12075273160347121 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 132 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 80000 | 0x7f6eceb23900 | 0x7f6d9ea36140 | 1863648 | 1678910 | 65536 | 232955 | 232955 | 232955 | 0 | 690 | 231351 | 11320 | 2530 | 2396 | 230042 | 0 | 27219 | 199801 | 12075273160400981 | 12075279084705873 | 12075279084847952 | 12075273160817305 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 134 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 81216 | 0x7f6eceb23800 | 0x7f6d9ea36180 | 1749776 | 1581285 | 65536 | 218721 | 218721 | 218721 | 0 | 749 | 225246 | 11351 | 2548 | 2897 | 207397 | 0 | 24294 | 185679 | 12075273160879069 | 12075279084899631 | 12075279085025070 | 12075273161288220 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 136 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 83456 | 0x7f6eceb23700 | 0x7f6d9ea361c0 | 3331104 | 3159378 | 65536 | 416387 | 416387 | 416387 | 0 | 691 | 415782 | 12254 | 2540 | 2423 | 401767 | 0 | 35519 | 367680 | 12075273161342881 | 12075279085068910 | 12075279085322667 | 12075273161953826 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 138 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 84928 | 0x7f6eceb23600 | 0x7f6d9ea36200 | 3153424 | 2991053 | 65536 | 394177 | 394177 | 394177 | 0 | 751 | 397436 | 11881 | 2531 | 2433 | 377709 | 0 | 34369 | 357229 | 12075273161982269 | 12075279085374507 | 12075279085617704 | 12075273162602562 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 140 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 86400 | 0x7f6eceb23500 | 0x7f6d9ea36240 | 1776096 | 1602990 | 65536 | 222011 | 222011 | 222011 | 0 | 690 | 216463 | 11363 | 2548 | 2437 | 205753 | 0 | 25117 | 188143 | 12075273162634772 | 12075279085676744 | 12075279085801703 | 12075273163071273 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 142 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 88128 | 0x7f6eceb23400 | 0x7f6d9ea36280 | 1928976 | 1765325 | 65536 | 241121 | 241121 | 241121 | 0 | 747 | 239641 | 11359 | 2726 | 2448 | 236822 | 0 | 28997 | 207651 | 12075273163123941 | 12075279085845862 | 12075279085994341 | 12075273163554541 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 144 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 89600 | 0x7f6eceb23300 | 0x7f6d9ea362c0 | 1805088 | 1636365 | 65536 | 225635 | 225635 | 225635 | 0 | 691 | 225318 | 11087 | 2540 | 2433 | 210389 | 0 | 21776 | 179557 | 12075273163641603 | 12075279086068580 | 12075279086194339 | 12075273164020016 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 146 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 92096 | 0x7f6eceb23a00 | 0x7f6d9ea36300 | 3382928 | 3183066 | 65536 | 422865 | 422865 | 422865 | 0 | 749 | 420087 | 12694 | 2444 | 2815 | 404839 | 0 | 34964 | 366672 | 12075273164072603 | 12075279086237698 | 12075279086496736 | 12075273164694549 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 148 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 93568 | 0x7f6eceb23900 | 0x7f6d9ea36340 | 3135520 | 2970202 | 65536 | 391939 | 391939 | 391939 | 0 | 690 | 396143 | 11531 | 2952 | 2443 | 386155 | 0 | 28986 | 351709 | 12075273164723593 | 12075279086551295 | 12075279086795453 | 12075273165333837 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 150 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 95040 | 0x7f6eceb23800 | 0x7f6d9ea36380 | 1769424 | 1605799 | 65536 | 221177 | 221177 | 221177 | 0 | 749 | 225822 | 10899 | 2539 | 2437 | 210601 | 0 | 23810 | 190207 | 12075273165364414 | 12075279086848732 | 12075279086973691 | 12075273165792028 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 152 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 96768 | 0x7f6eceb23700 | 0x7f6d9ea363c0 | 2037664 | 1869144 | 65536 | 254707 | 254707 | 254707 | 0 | 689 | 250376 | 11441 | 2537 | 2433 | 248949 | 0 | 30696 | 217289 | 12075273165845297 | 12075279087019931 | 12075279087177209 | 12075273166301465 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 154 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 98240 | 0x7f6eceb23600 | 0x7f6d9ea36400 | 1753616 | 1580710 | 65536 | 219201 | 219201 | 219201 | 0 | 1099 | 277467 | 11476 | 2539 | 2432 | 205566 | 0 | 22475 | 186893 | 12075273166363009 | 12075279087230649 | 12075279087355928 | 12075273166770126 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 156 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 100736 | 0x7f6eceb23500 | 0x7f6d9ea36440 | 3322592 | 3113905 | 65536 | 415323 | 415323 | 415323 | 0 | 803 | 411673 | 12576 | 2532 | 2886 | 396880 | 0 | 34469 | 369068 | 12075273166823174 | 12075279087398647 | 12075279087655765 | 12075273167444509 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 158 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 102208 | 0x7f6eceb23400 | 0x7f6d9ea36480 | 3184912 | 3027386 | 65536 | 398113 | 398113 | 398113 | 0 | 690 | 394159 | 11428 | 2963 | 2432 | 390158 | 0 | 28810 | 355361 | 12075273167474384 | 12075279087714484 | 12075279087962642 | 12075273168059632 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 160 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 103680 | 0x7f6eceb23300 | 0x7f6d9ea364c0 | 1738336 | 1562661 | 65536 | 217291 | 217291 | 217291 | 0 | 749 | 224358 | 11257 | 2541 | 2438 | 214064 | 0 | 24147 | 191468 | 12075273168091591 | 12075279088026161 | 12075279088151600 | 12075273168514066 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 162 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 105664 | 0x7f6eceb23a00 | 0x7f6d9ea36500 | 2104720 | 1945620 | 65536 | 263089 | 263089 | 263089 | 0 | 691 | 262478 | 11490 | 2438 | 2449 | 259069 | 0 | 29698 | 229429 | 12075273168566874 | 12075279088195919 | 12075279088360398 | 12075273169007784 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 164 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 107136 | 0x7f6eceb23900 | 0x7f6d9ea36540 | 1766240 | 1589099 | 65536 | 220779 | 220779 | 220779 | 0 | 749 | 216198 | 11202 | 3206 | 2452 | 222026 | 0 | 22702 | 188923 | 12075273169065321 | 12075279088412397 | 12075279088537836 | 12075273169469251 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 166 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 109888 | 0x7f6eceb23800 | 0x7f6d9ea36580 | 3243152 | 3082953 | 65536 | 405393 | 405393 | 405393 | 0 | 692 | 401157 | 12175 | 2446 | 2681 | 398389 | 0 | 37183 | 369304 | 12075273169519955 | 12075279088579596 | 12075279088834473 | 12075273170126402 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 168 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 111360 | 0x7f6eceb23700 | 0x7f6d9ea365c0 | 3646624 | 3024396 | 65536 | 455827 | 455827 | 394745 | 61082 | 749 | 404295 | 11507 | 3020 | 2454 | 396448 | 0 | 31160 | 365223 | 12075273170154274 | 12075279088894313 | 12075279089145670 | 12075273170807478 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 170 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 112832 | 0x7f6eceb23600 | 0x7f6d9ea36600 | 1803152 | 1625883 | 65536 | 225393 | 225393 | 225393 | 0 | 690 | 213959 | 11082 | 2530 | 2448 | 218294 | 0 | 22673 | 187632 | 12075273170836161 | 12075279089217830 | 12075279089342788 | 12075273171276038 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 172 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 114816 | 0x7f6eceb23500 | 0x7f6d9ea36640 | 2207712 | 2043467 | 65536 | 275963 | 275963 | 275963 | 0 | 749 | 276591 | 11543 | 2531 | 2435 | 272627 | 0 | 30753 | 239861 | 12075273171329578 | 12075279089385668 | 12075279089558466 | 12075273171784443 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 174 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 116288 | 0x7f6eceb23400 | 0x7f6d9ea36680 | 1805840 | 1638156 | 65536 | 225729 | 225729 | 225729 | 0 | 690 | 224935 | 10599 | 2538 | 2414 | 206576 | 0 | 25573 | 188993 | 12075273171849233 | 12075279089613026 | 12075279089738784 | 12075273172270376 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 176 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 119040 | 0x7f6eceb23300 | 0x7f6d9ea366c0 | 3202848 | 3020306 | 65536 | 400355 | 400355 | 400355 | 0 | 749 | 407559 | 12376 | 2537 | 2420 | 392906 | 0 | 36032 | 360932 | 12075273172322373 | 12075279089781504 | 12075279090036062 | 12075273172903523 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 178 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 120768 | 0x7f6eceb23a00 | 0x7f6d9ea36700 | 3187664 | 3012077 | 65536 | 398457 | 398457 | 398457 | 0 | 689 | 397808 | 12114 | 2537 | 2428 | 390794 | 0 | 32572 | 356117 | 12075273172933238 | 12075279090092861 | 12075279090340379 | 12075273173529826 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 180 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 122496 | 0x7f6eceb23900 | 0x7f6d9ea36740 | 1823968 | 1642979 | 65536 | 227995 | 227995 | 227995 | 0 | 747 | 229248 | 10929 | 2529 | 2444 | 215650 | 0 | 25701 | 187385 | 12075273173557808 | 12075279090402138 | 12075279090528697 | 12075273173987316 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 182 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 124736 | 0x7f6eceb23800 | 0x7f6d9ea36780 | 2332112 | 2151600 | 65536 | 291513 | 291513 | 291513 | 0 | 691 | 288062 | 11683 | 2538 | 2440 | 285158 | 0 | 32561 | 252595 | 12075273174040986 | 12075279090570616 | 12075279090752375 | 12075273174501462 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 184 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 126464 | 0x7f6eceb23700 | 0x7f6d9ea367c0 | 1833120 | 1665008 | 65536 | 229139 | 229139 | 229139 | 0 | 749 | 218487 | 11022 | 2414 | 2438 | 217912 | 0 | 22024 | 188389 | 12075273174563667 | 12075279090805014 | 12075279090930773 | 12075273174974040 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 186 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 20 | 24 | 127936 | 0x7f6eceb23600 | 0x7f6d9ea36800 | 3290768 | 3128858 | 65536 | 411345 | 411345 | 411345 | 0 | 691 | 407134 | 12421 | 2541 | 2434 | 407692 | 0 | 39322 | 370006 | 12075273175036446 | 12075279090973173 | 12075279091236370 | 12075273175649134 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 188 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 129664 | 0x7f6eceb23500 | 0x7f6d9ea36840 | 3190624 | 3000664 | 65536 | 398827 | 398827 | 398827 | 0 | 751 | 387125 | 11222 | 2531 | 2422 | 394424 | 0 | 30557 | 363983 | 12075273175679120 | 12075279091297169 | 12075279091544207 | 12075273176284565 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 190 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 131392 | 0x7f6eceb23400 | 0x7f6d9ea36880 | 1749584 | 1575701 | 65536 | 218697 | 218697 | 218697 | 0 | 690 | 225071 | 11080 | 2676 | 2872 | 211958 | 0 | 24067 | 182379 | 12075273176315743 | 12075279091605166 | 12075279091730445 | 12075273176739771 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 192 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 133632 | 0x7f6eceb23300 | 0x7f6d9ea368c0 | 2420384 | 2255580 | 65536 | 302547 | 302547 | 302547 | 0 | 747 | 301665 | 11871 | 2533 | 2437 | 298721 | 0 | 32546 | 266277 | 12075273176792118 | 12075279091771405 | 12075279091961643 | 12075273177281427 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 194 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 135360 | 0x7f6eceb23a00 | 0x7f6d9ea36900 | 1806096 | 1632674 | 65536 | 225761 | 225761 | 225761 | 0 | 691 | 214758 | 11183 | 2541 | 2448 | 220374 | 0 | 23949 | 191412 | 12075273177363359 | 12075279092018922 | 12075279092145961 | 12075273177757882 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 196 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 20 | 24 | 136832 | 0x7f6eceb23900 | 0x7f6d9ea36940 | 3489248 | 3322837 | 65536 | 436155 | 436155 | 436155 | 0 | 749 | 435703 | 13063 | 2532 | 2628 | 432050 | 0 | 40818 | 392015 | 12075273177808747 | 12075279092190921 | 12075279092504358 | 12075273178448626 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 198 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 138560 | 0x7f6eceb23800 | 0x7f6d9ea36980 | 3176016 | 3009258 | 65536 | 397001 | 397001 | 397001 | 0 | 691 | 402054 | 11842 | 3154 | 2448 | 391118 | 0 | 30630 | 356356 | 12075273178477630 | 12075279092560997 | 12075279092811715 | 12075273179102501 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 200 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 140288 | 0x7f6eceb23700 | 0x7f6d9ea369c0 | 1834784 | 1650363 | 65536 | 229347 | 229347 | 229347 | 0 | 749 | 226734 | 11148 | 2669 | 2779 | 220795 | 0 | 26147 | 179557 | 12075273179131685 | 12075279092871874 | 12075279092996833 | 12075273179574518 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 202 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 142784 | 0x7f6eceb23600 | 0x7f6d9ea36a00 | 2644880 | 2474510 | 65536 | 330609 | 330609 | 330609 | 0 | 691 | 327430 | 12185 | 2531 | 2437 | 326265 | 0 | 34484 | 291643 | 12075273179627386 | 12075279093041312 | 12075279093250910 | 12075273180191084 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 204 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 144768 | 0x7f6eceb23500 | 0x7f6d9ea36a40 | 1823264 | 1642058 | 65536 | 227907 | 227907 | 227907 | 0 | 749 | 222703 | 11277 | 2540 | 2427 | 211739 | 0 | 27656 | 189223 | 12075273180235787 | 12075279093324350 | 12075279093453309 | 12075273180652130 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 206 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 20 | 24 | 145728 | 0x7f6eceb23400 | 0x7f6d9ea36a80 | 4138896 | 3616948 | 65536 | 517361 | 517361 | 454828 | 62533 | 690 | 469671 | 11818 | 2961 | 2431 | 466807 | 0 | 42291 | 422885 | 12075273180704367 | 12075279093495228 | 12075279093801465 | 12075273181387847 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 208 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 147712 | 0x7f6eceb23300 | 0x7f6d9ea36ac0 | 3243488 | 3049188 | 65536 | 405435 | 405435 | 405435 | 0 | 737 | 401210 | 12103 | 2532 | 2446 | 401176 | 0 | 31994 | 355811 | 12075273181418194 | 12075279093858745 | 12075279094108662 | 12075273182036352 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 210 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 149696 | 0x7f6eceb23a00 | 0x7f6d9ea36b00 | 1798992 | 1625440 | 65536 | 224873 | 224873 | 224873 | 0 | 690 | 222479 | 11847 | 2549 | 2438 | 221808 | 0 | 26171 | 182683 | 12075273182065687 | 12075279094172022 | 12075279094299860 | 12075273182492700 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 212 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 152192 | 0x7f6eceb23900 | 0x7f6d9ea36b40 | 2865056 | 2699453 | 65536 | 358131 | 358131 | 358131 | 0 | 749 | 355294 | 12677 | 2772 | 2771 | 354787 | 0 | 36604 | 315399 | 12075273182544897 | 12075279094342580 | 12075279094568818 | 12075273183095410 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 214 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 154176 | 0x7f6eceb23800 | 0x7f6d9ea36b80 | 1740048 | 1564847 | 65536 | 217505 | 217505 | 217505 | 0 | 690 | 227607 | 11200 | 2443 | 2793 | 218653 | 0 | 25941 | 189452 | 12075273183136988 | 12075279094638577 | 12075279094764816 | 12075273183554283 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 216 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 20 | 24 | 155648 | 0x7f6eceb23700 | 0x7f6d9ea36bc0 | 4385568 | 3887012 | 65536 | 548195 | 548195 | 485686 | 62509 | 747 | 503873 | 11529 | 2958 | 2440 | 499894 | 0 | 45427 | 456525 | 12075273183607842 | 12075279094808015 | 12075279095136652 | 12075273184311670 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 218 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 157632 | 0x7f6eceb23600 | 0x7f6d9ea36c00 | 3178768 | 2995467 | 65536 | 397345 | 397345 | 397345 | 0 | 691 | 399726 | 12333 | 2530 | 2434 | 397604 | 0 | 34332 | 355144 | 12075273184342738 | 12075279095194892 | 12075279095441609 | 12075273184952962 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 220 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 159616 | 0x7f6eceb23500 | 0x7f6d9ea36c40 | 1845792 | 1663406 | 65536 | 230723 | 230723 | 230723 | 0 | 749 | 219759 | 11599 | 2787 | 2443 | 224739 | 0 | 27061 | 188356 | 12075273184986715 | 12075279095501609 | 12075279095630087 | 12075273185435659 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 222 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 162368 | 0x7f6eceb23400 | 0x7f6d9ea36c80 | 3076432 | 2901600 | 65536 | 384553 | 384553 | 384553 | 0 | 690 | 380871 | 12821 | 2546 | 2918 | 379960 | 0 | 38105 | 340081 | 12075273185487976 | 12075279095672487 | 12075279095915365 | 12075273186078333 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 224 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 164608 | 0x7f6eceb23300 | 0x7f6d9ea36cc0 | 1786976 | 1604628 | 65536 | 223371 | 223371 | 223371 | 0 | 749 | 223727 | 11349 | 2538 | 2427 | 216283 | 0 | 27277 | 187880 | 12075273186125100 | 12075279095985764 | 12075279096115203 | 12075273186538358 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 226 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 20 | 24 | 166336 | 0x7f6eceb23a00 | 0x7f6d9ea36d00 | 4595792 | 4436952 | 65536 | 574473 | 574473 | 574473 | 0 | 962 | 610551 | 13570 | 2548 | 2519 | 568599 | 0 | 49511 | 519463 | 12075273186589693 | 12075279096156322 | 12075279096556798 | 12075273187323356 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 228 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 168576 | 0x7f6eceb23900 | 0x7f6d9ea36d40 | 3220256 | 3046215 | 65536 | 402531 | 402531 | 402531 | 0 | 805 | 411015 | 12206 | 2537 | 2773 | 401249 | 0 | 41341 | 363564 | 12075273187353232 | 12075279096616638 | 12075279096874395 | 12075273187976400 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 230 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 170816 | 0x7f6eceb23800 | 0x7f6d9ea36d80 | 1793104 | 1624087 | 65536 | 224137 | 224137 | 224137 | 0 | 690 | 224519 | 11444 | 2561 | 2397 | 222993 | 0 | 29006 | 190360 | 12075273188012707 | 12075279096934715 | 12075279097065274 | 12075273188445161 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 232 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 174080 | 0x7f6eceb23700 | 0x7f6d9ea36dc0 | 3843744 | 3305534 | 65536 | 480467 | 480467 | 417519 | 62948 | 734 | 431878 | 12684 | 2538 | 2444 | 430266 | 0 | 40666 | 387985 | 12075273188497067 | 12075279097108313 | 12075279097385590 | 12075273189156573 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 234 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 176576 | 0x7f6eceb23600 | 0x7f6d9ea36e00 | 1763600 | 1581242 | 65536 | 220449 | 220449 | 220449 | 0 | 676 | 217453 | 11190 | 2540 | 2432 | 212646 | 0 | 26224 | 194593 | 12075273189199282 | 12075279097458550 | 12075279097593748 | 12075273189610857 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 236 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 20 | 24 | 177792 | 0x7f6eceb23500 | 0x7f6d9ea36e40 | 5161952 | 4995716 | 65536 | 645243 | 645243 | 645243 | 0 | 749 | 642390 | 11585 | 3064 | 2443 | 639923 | 0 | 52090 | 588239 | 12075273189663004 | 12075279097636788 | 12075279098058704 | 12075273190432584 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 238 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 180288 | 0x7f6eceb23400 | 0x7f6d9ea36e80 | 3206288 | 3035837 | 65536 | 400785 | 400785 | 400785 | 0 | 690 | 415039 | 12170 | 2548 | 2446 | 397776 | 0 | 34517 | 362208 | 12075273190463601 | 12075279098116463 | 12075279098381741 | 12075273191084645 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 240 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 182784 | 0x7f6eceb23300 | 0x7f6d9ea36ec0 | 1837152 | 1665883 | 65536 | 229643 | 229643 | 229643 | 0 | 749 | 225030 | 11232 | 2540 | 2737 | 222069 | 0 | 26639 | 190543 | 12075273191115453 | 12075279098441740 | 12075279098576139 | 12075273191552855 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 242 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 186304 | 0x7f6eceb23a00 | 0x7f6d9ea36f00 | 3905488 | 3730779 | 65536 | 488185 | 488185 | 488185 | 0 | 690 | 488159 | 13096 | 2533 | 2428 | 483042 | 0 | 43531 | 438591 | 12075273191605313 | 12075279098618538 | 12075279098933255 | 12075273192270559 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 244 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 189312 | 0x7f6eceb23900 | 0x7f6d9ea36f40 | 1947360 | 1784912 | 65536 | 243419 | 243419 | 243419 | 0 | 749 | 240543 | 11329 | 2530 | 2436 | 240274 | 0 | 28491 | 208807 | 12075273192312417 | 12075279099005415 | 12075279099156773 | 12075273192743958 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 246 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 20 | 24 | 190784 | 0x7f6eceb23800 | 0x7f6d9ea36f80 | 6274384 | 6115598 | 65536 | 784297 | 784297 | 784297 | 0 | 962 | 809655 | 14965 | 2884 | 2434 | 781820 | 0 | 60612 | 719023 | 12075273192797608 | 12075279099199973 | 12075279099715488 | 12075273193674107 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 248 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 193792 | 0x7f6eceb23700 | 0x7f6d9ea36fc0 | 3636384 | 3439655 | 65536 | 454547 | 454547 | 454547 | 0 | 801 | 449882 | 12538 | 2539 | 2437 | 452697 | 0 | 39411 | 412708 | 12075273193703822 | 12075279099772767 | 12075279100071164 | 12075273194362486 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 250 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 196800 | 0x7f6eceb23600 | 0x7f6d9ea37000 | 1979728 | 1813989 | 65536 | 247465 | 247465 | 247465 | 0 | 691 | 243094 | 11439 | 2529 | 2446 | 241704 | 0 | 29627 | 212031 | 12075273194394395 | 12075279100135484 | 12075279100289562 | 12075273194835194 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 252 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 201088 | 0x7f6eceb23500 | 0x7f6d9ea37040 | 4747232 | 4583459 | 65536 | 593403 | 593403 | 593403 | 0 | 748 | 591704 | 13680 | 2545 | 2441 | 589733 | 0 | 49396 | 538725 | 12075273194888183 | 12075279100332122 | 12075279100716918 | 12075273195628438 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 254 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 204608 | 0x7f6eceb23400 | 0x7f6d9ea37080 | 2242192 | 2075058 | 65536 | 280273 | 280273 | 280273 | 0 | 690 | 274575 | 11648 | 2683 | 2433 | 272605 | 0 | 31589 | 243705 | 12075273195669985 | 12075279100789397 | 12075279100964756 | 12075273196129098 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 256 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 20 | 24 | 206080 | 0x7f6eceb23300 | 0x7f6d9ea370c0 | 7402016 | 7234848 | 65536 | 925251 | 925251 | 925251 | 0 | 748 | 923160 | 16395 | 2539 | 2453 | 921041 | 0 | 15392 | 850531 | 12075273196181095 | 12075279101007155 | 12075279101615949 | 12075273197141299 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 258 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 209600 | 0x7f6eceb23a00 | 0x7f6d9ea37100 | 4006160 | 3844930 | 65536 | 500769 | 500769 | 500769 | 0 | 689 | 503272 | 13889 | 2539 | 2433 | 495557 | 0 | 44586 | 449921 | 12075273197172317 | 12075279101678829 | 12075279102007306 | 12075273197862970 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 260 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 213120 | 0x7f6eceb23900 | 0x7f6d9ea37140 | 2253088 | 2089839 | 65536 | 281635 | 281635 | 281635 | 0 | 749 | 280654 | 12276 | 3035 | 2358 | 278728 | 0 | 30503 | 245109 | 12075273197891483 | 12075279102069385 | 12075279102247623 | 12075273198376244 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 262 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 218176 | 0x7f6eceb23800 | 0x7f6d9ea37180 | 5580624 | 5415291 | 65536 | 697577 | 697577 | 697577 | 0 | 690 | 696823 | 14911 | 2549 | 2448 | 695062 | 0 | 55315 | 638603 | 12075273198429082 | 12075279102290023 | 12075279102745059 | 12075273199234649 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 264 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 222208 | 0x7f6eceb23700 | 0x7f6d9ea371c0 | 2511072 | 2350683 | 65536 | 313883 | 313883 | 313883 | 0 | 1101 | 366810 | 12247 | 2548 | 2443 | 309603 | 0 | 32602 | 275867 | 12075273199277138 | 12075279102818818 | 12075279103017376 | 12075273199818033 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 266 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 20 | 24 | 223936 | 0x7f6eceb23600 | 0x7f6d9ea37200 | 8601168 | 8361251 | 65536 | 1075145 | 1075145 | 1012629 | 62516 | 1099 | 1074364 | 17306 | 2886 | 2407 | 1060991 | 0 | 77061 | 982423 | 12075273199845674 | 12075279103078655 | 12075279103780408 | 12075273200905103 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 268 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 227968 | 0x7f6eceb23500 | 0x7f6d9ea37240 | 4549216 | 4375349 | 65536 | 568651 | 568651 | 568651 | 0 | 749 | 567526 | 14143 | 2542 | 2446 | 563984 | 0 | 48188 | 514481 | 12075273200936221 | 12075279103845048 | 12075279104216564 | 12075273201674823 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 270 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 232000 | 0x7f6eceb23400 | 0x7f6d9ea37280 | 2545232 | 2389839 | 65536 | 318153 | 318153 | 318153 | 0 | 962 | 370367 | 11979 | 2542 | 2434 | 315084 | 0 | 33258 | 279563 | 12075273201703957 | 12075279104280564 | 12075279104483122 | 12075273202267985 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 272 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 237824 | 0x7f6eceb23300 | 0x7f6d9ea372c0 | 6625248 | 6258517 | 65536 | 828155 | 828155 | 766622 | 61533 | 803 | 802225 | 15747 | 2540 | 2392 | 799894 | 0 | 61429 | 735981 | 12075273202300115 | 12075279104538801 | 12075279105063756 | 12075273203192814 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 274 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 242368 | 0x7f6eceb23a00 | 0x7f6d9ea37300 | 2808912 | 2635712 | 65536 | 351113 | 351113 | 351113 | 0 | 691 | 347894 | 12470 | 2538 | 2434 | 346628 | 0 | 35410 | 309675 | 12075273203234732 | 12075279105135755 | 12075279105359753 | 12075273203812555 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 276 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 20 | 24 | 243584 | 0x7f6eceb23900 | 0x7f6d9ea37340 | 9663712 | 9492340 | 65536 | 1207963 | 1207963 | 1207963 | 0 | 747 | 1205640 | 18297 | 2540 | 2774 | 1203440 | 0 | 16526 | 1117336 | 12075273203842140 | 12075279105420553 | 12075279106216865 | 12075273205024668 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 278 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 248128 | 0x7f6eceb23800 | 0x7f6d9ea37380 | 5114256 | 4939756 | 65536 | 639281 | 639281 | 639281 | 0 | 691 | 639974 | 14608 | 2539 | 2441 | 633213 | 0 | 16434 | 580597 | 12075273205054573 | 12075279106279744 | 12075279106722300 | 12075273205835514 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 280 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 252672 | 0x7f6eceb23700 | 0x7f6d9ea373c0 | 2837472 | 2671382 | 65536 | 354683 | 354683 | 354683 | 0 | 733 | 350543 | 12645 | 2539 | 2446 | 349376 | 0 | 35606 | 313247 | 12075273205868997 | 12075279106784859 | 12075279107009657 | 12075273206460836 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 282 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 259264 | 0x7f6eceb23600 | 0x7f6d9ea37400 | 7287056 | 7121740 | 65536 | 910881 | 910881 | 910881 | 0 | 690 | 908095 | 16305 | 2970 | 2442 | 906068 | 0 | 68433 | 836111 | 12075273206492164 | 12075279107070457 | 12075279107665491 | 12075273207446037 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 284 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 264832 | 0x7f6eceb23500 | 0x7f6d9ea37440 | 3357664 | 3188359 | 65536 | 419707 | 419707 | 419707 | 0 | 749 | 417326 | 13069 | 2998 | 2412 | 413306 | 0 | 40251 | 373173 | 12075273207487804 | 12075279107735570 | 12075279108003408 | 12075273208118056 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 286 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 20 | 24 | 266304 | 0x7f6eceb23400 | 0x7f6d9ea37480 | 11901776 | 11737665 | 65536 | 1487721 | 1487721 | 1425189 | 62532 | 690 | 1486063 | 20177 | 2532 | 2824 | 1483710 | 0 | 31438 | 1380169 | 12075273208154363 | 12075279108065487 | 12075279109048677 | 12075273209499282 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 288 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 28 | 24 | 271872 | 0x7f6eceb23300 | 0x7f6d9ea374c0 | 6448416 | 6066701 | 65536 | 806051 | 806051 | 742700 | 63351 | 1099 | 803500 | 15499 | 2532 | 2893 | 800609 | 0 | 62406 | 712269 | 12075273209530520 | 12075279109110277 | 12075279109625952 | 12075273210438347 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 290 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 277440 | 0x7f6eceb23a00 | 0x7f6d9ea37500 | 3387216 | 3217622 | 65536 | 423401 | 423401 | 423401 | 0 | 801 | 423650 | 13204 | 2534 | 2448 | 420254 | 0 | 41053 | 377451 | 12075273210468453 | 12075279109688991 | 12075279109959069 | 12075273211109464 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 292 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 12 | 24 | 279936 | 0x7f6eceb23900 | 0x7f6d9ea37540 | 9016032 | 8857234 | 65536 | 1127003 | 1127003 | 1127003 | 0 | 961 | 1129880 | 17707 | 2413 | 2436 | 1122210 | 0 | 16333 | 1041009 | 12075273211140191 | 12075279110024668 | 12075279110761301 | 12075273212245445 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 294 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 286528 | 0x7f6eceb23800 | 0x7f6d9ea37580 | 3908944 | 3743285 | 65536 | 488617 | 488617 | 488617 | 0 | 803 | 490649 | 11661 | 2951 | 2438 | 485888 | 0 | 45627 | 438875 | 12075273212292242 | 12075279110840500 | 12075279111154577 | 12075273212956025 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 296 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 20 | 24 | 288000 | 0x7f6eceb23700 | 0x7f6d9ea375c0 | 14152736 | 13987840 | 65536 | 1769091 | 1769091 | 1769091 | 0 | 689 | 1765656 | 22058 | 2539 | 2434 | 1763636 | 0 | 117960 | 1644233 | 12075273212989537 | 12075279111222256 | 12075279112391845 | 12075273214527996 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 298 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 32 | 24 | 294592 | 0x7f6eceb23600 | 0x7f6d9ea37600 | 7368528 | 7202136 | 65536 | 921065 | 921065 | 921065 | 0 | 733 | 918422 | 16053 | 2547 | 2445 | 917297 | 0 | 66608 | 846412 | 12075273214565426 | 12075279112453124 | 12075279113062719 | 12075273215528355 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 300 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 301184 | 0x7f6eceb23500 | 0x7f6d9ea37640 | 3933408 | 3772664 | 65536 | 491675 | 491675 | 491675 | 0 | 691 | 492750 | 13498 | 2442 | 2439 | 489631 | 0 | 45495 | 442085 | 12075273215557499 | 12075279113125118 | 12075279113445115 | 12075273216232082 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 302 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 12 | 24 | 304960 | 0x7f6eceb23400 | 0x7f6d9ea37680 | 10701136 | 10535205 | 65536 | 1337641 | 1337641 | 1337641 | 0 | 1101 | 1335906 | 19066 | 2530 | 2437 | 1332737 | 0 | 93123 | 1238593 | 12075273216262539 | 12075279113507834 | 12075279114383986 | 12075273217498536 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 304 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 24 | 24 | 313600 | 0x7f6eceb23300 | 0x7f6d9ea376c0 | 5290720 | 4853986 | 65536 | 661339 | 661339 | 598332 | 63007 | 802 | 631641 | 14541 | 2529 | 2438 | 622776 | 0 | 53716 | 569983 | 12075273217539512 | 12075279114458065 | 12075279114868141 | 12075273218334820 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 306 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 20 | 24 | 314816 | 0x7f6eceb23a00 | 0x7f6d9ea37700 | 18668112 | 18498457 | 65536 | 2333513 | 2333513 | 2270119 | 63394 | 693 | 2329540 | 25518 | 2537 | 2453 | 2328105 | 0 | 153055 | 2176379 | 12075273218366018 | 12075279114932140 | 12075279116481085 | 12075273220279423 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 308 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 20 | 24 | 317568 | 0x7f6eceb23900 | 0x7f6d9ea37740 | 9618720 | 9446872 | 65536 | 1202339 | 1202339 | 1202339 | 0 | 749 | 1199287 | 18054 | 2532 | 2937 | 1197469 | 0 | 84927 | 1110137 | 12075273220314388 | 12075279116593185 | 12075279117385498 | 12075273221473201 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 310 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 12 | 24 | 320320 | 0x7f6eceb23800 | 0x7f6d9ea37780 | 5141328 | 4981067 | 65536 | 642665 | 642665 | 642665 | 0 | 961 | 674360 | 14343 | 2683 | 2447 | 636335 | 0 | 16453 | 582397 | 12075273221510851 | 12075279117497176 | 12075279117910613 | 12075273222276854 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 312 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 12 | 24 | 324096 | 0x7f6eceb23700 | 0x7f6d9ea377c0 | 14065696 | 13901634 | 65536 | 1758211 | 1758211 | 1758211 | 0 | 802 | 1757481 | 22093 | 3122 | 2434 | 1753516 | 0 | 118685 | 1634401 | 12075273222306920 | 12075279118019892 | 12075279119177162 | 12075273223827766 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 314 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 12 | 24 | 328896 | 0x7f6eceb23600 | 0x7f6d9ea37800 | 9600848 | 9431505 | 65536 | 1200105 | 1200105 | 1137102 | 63003 | 690 | 1196175 | 18239 | 2540 | 2462 | 1194744 | 0 | 83982 | 1109393 | 12075273223867620 | 12075279119294281 | 12075279120079075 | 12075273225025211 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 316 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 20 | 24 | 330112 | 0x7f6eceb23500 | 0x7f6d9ea37840 | 36687776 | 36510444 | 65536 | 4585971 | 4585971 | 4585971 | 0 | 748 | 4582664 | 41415 | 2899 | 2437 | 4580305 | 0 | 211791 | 4296767 | 12075273225063392 | 12075279120183234 | 12075279123232648 | 12075273228471275 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 318 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 20 | 24 | 332864 | 0x7f6eceb23400 | 0x7f6d9ea37880 | 18610896 | 18439488 | 65536 | 2326361 | 2326361 | 2326361 | 0 | 676 | 2320677 | 19193 | 3390 | 2709 | 2320073 | 0 | 151158 | 2167673 | 12075273228500779 | 12075279123339048 | 12075279124878075 | 12075273230414936 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 320 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 12 | 24 | 335616 | 0x7f6eceb23300 | 0x7f6d9ea378c0 | 9636256 | 9470886 | 65536 | 1204531 | 1204531 | 1204531 | 0 | 747 | 1203113 | 18413 | 2539 | 2758 | 1200248 | 0 | 83844 | 1112377 | 12075273230456263 | 12075279124984474 | 12075279125773267 | 12075273231609235 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 322 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 12 | 24 | 339392 | 0x7f6eceb23a00 | 0x7f6d9ea37900 | 27553424 | 27389855 | 65536 | 3444177 | 3444177 | 3444177 | 0 | 691 | 3441734 | 33321 | 2534 | 3076 | 3441322 | 0 | 216394 | 3222181 | 12075273231646224 | 12075279125885106 | 12075279128166207 | 12075273234281060 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 324 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 12 | 24 | 344192 | 0x7f6eceb23900 | 0x7f6d9ea37940 | 18594912 | 18423667 | 65536 | 2324363 | 2324363 | 2261842 | 62521 | 747 | 2322416 | 25801 | 2534 | 2432 | 2318406 | 0 | 79437 | 2166607 | 12075273234336353 | 12075279128291006 | 12075279129826034 | 12075273236219802 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 326 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 20 | 24 | 345408 | 0x7f6eceb23800 | 0x7f6d9ea37980 | 72700112 | 72529373 | 65536 | 9087513 | 9087513 | 9024276 | 63237 | 690 | 9084751 | 70462 | 2557 | 2972 | 9084450 | 0 | 472081 | 8537391 | 12075273236251361 | 12075279129933233 | 12075279135986942 | 12075273242674756 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 328 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 20 | 24 | 348160 | 0x7f6eceb23700 | 0x7f6d9ea379c0 | 36583008 | 36421833 | 65536 | 4572875 | 4572875 | 4572875 | 0 | 1101 | 4571369 | 41460 | 3067 | 2838 | 4566976 | 0 | 283814 | 4284801 | 12075273242716183 | 12075279136096381 | 12075279139136356 | 12075273246121281 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 330 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 12 | 24 | 350912 | 0x7f6eceb23600 | 0x7f6d9ea37a00 | 18622352 | 18463532 | 65536 | 2327793 | 2327793 | 2327793 | 0 | 803 | 2328360 | 25781 | 2539 | 2458 | 2325188 | 0 | 150478 | 2172365 | 12075273246159852 | 12075279139244035 | 12075279140782262 | 12075273248057909 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 332 | 868078 | 868083 | 4194304 | 256 | 0 | 0 | 12 | 24 | 0 | 0x7f6eceb23500 | 0x7f6d9ea37a40 | 54508128 | 54339436 | 65536 | 6813515 | 6813515 | 6813515 | 0 | 690 | 6810543 | 55415 | 2531 | 2809 | 6807933 | 0 | 413353 | 6395095 | 12075273248095819 | 12075279140888662 | 12075279145415344 | 12075273252983010 |