53 KiB
53 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | SQ_CYCLES | SQ_BUSY_CYCLES | SQ_WAVES | GRBM_COUNT | GRBM_GUI_ACTIVE | CPC_CPC_STAT_BUSY | CPC_CPC_STAT_IDLE | CPC_CPC_TCIU_BUSY | CPC_CPC_TCIU_IDLE | CPC_CPC_STAT_STALL | CPC_UTCL1_STALL_ON_TRANSLATION | CPC_CPC_UTCL2IU_BUSY | CPC_CPC_UTCL2IU_IDLE | CPC_CPC_UTCL2IU_STALL | CPC_ME1_BUSY_FOR_PACKET_DECODE | CPC_ME1_DC0_SPI_BUSY | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBufferAligned.kd | 0 | 0 | 0 | 160404 | 160404 | 33554432 | 256 | 0 | 0 | 4 | 32 | 4160 | 0x0 | 0x7fa4dea04280 | 3054296 | 2965383 | 524288 | 381786 | 381786 | 381786 | 0 | 302 | 381283 | 8404 | 518 | 1217 | 380421 | 0 | 8701 | 369195 | 16455596020581 | 16459814018450 | 16459814257808 | 16455745747285 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 160404 | 160404 | 32768 | 256 | 0 | 0 | 12 | 24 | 13888 | 0x0 | 0x7fa4dea23f80 | 267264 | 159891 | 512 | 33407 | 33407 | 33407 | 0 | 302 | 32864 | 11021 | 515 | 1348 | 32461 | 0 | 29902 | 496 | 16455750923166 | 16459819051039 | 16459819064319 | 16455751060640 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 5 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 14336 | 0x7fa4e1915380 | 0x7fa4dea23fc0 | 1319368 | 1215750 | 65536 | 164920 | 164920 | 164920 | 0 | 302 | 164718 | 9883 | 1995 | 2856 | 161253 | 0 | 14318 | 148256 | 16455751096469 | 16459819137118 | 16459819229437 | 16455751438626 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 8 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15040 | 0x7fa4e1915200 | 0x7fa4dea24000 | 2489752 | 2379046 | 65536 | 311218 | 311218 | 311218 | 0 | 302 | 307656 | 11070 | 1988 | 2776 | 305679 | 0 | 23304 | 282357 | 16455751475174 | 16459819264477 | 16459819445275 | 16455751854050 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 11 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15488 | 0x7fa4e1915080 | 0x7fa4dea24040 | 2469336 | 2369608 | 65536 | 308666 | 308666 | 308666 | 0 | 302 | 311430 | 11212 | 1984 | 2854 | 306219 | 0 | 25159 | 282507 | 16455751887389 | 16459819471035 | 16459819650713 | 16455752260654 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 14 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 15936 | 0x7fa4df17df00 | 0x7fa4dea24080 | 1323944 | 1218548 | 65536 | 165492 | 165492 | 165492 | 0 | 302 | 164586 | 10023 | 1998 | 2765 | 162934 | 0 | 13812 | 149631 | 16455752292943 | 16459819675993 | 16459819769592 | 16455752574662 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 17 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 16384 | 0x7fa4df17dd80 | 0x7fa4dea240c0 | 1317936 | 1212442 | 65536 | 164741 | 164741 | 164741 | 0 | 302 | 164490 | 9682 | 1993 | 2872 | 160757 | 0 | 14503 | 148317 | 16455752607511 | 16459819795351 | 16459819887190 | 16455752884450 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 20 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 17088 | 0x7fa4df17dc00 | 0x7fa4dea24100 | 1312856 | 1212873 | 65536 | 164106 | 164106 | 164106 | 0 | 302 | 163192 | 9986 | 2023 | 2782 | 160795 | 0 | 14269 | 145183 | 16455752929538 | 16459819927030 | 16459820019029 | 16455753193628 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 23 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 16 | 24 | 17792 | 0x7fa4df17da80 | 0x7fa4dea24140 | 2473128 | 2368675 | 65536 | 309140 | 309140 | 309140 | 0 | 302 | 309598 | 11293 | 1984 | 3102 | 308107 | 0 | 23994 | 282600 | 16455753225227 | 16459820052469 | 16459820231827 | 16455753592963 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 26 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 16 | 24 | 18496 | 0x7fa4df17d900 | 0x7fa4dea24180 | 2468464 | 2363247 | 65536 | 308557 | 308557 | 308557 | 0 | 302 | 310648 | 10897 | 1999 | 2856 | 303659 | 0 | 23464 | 283744 | 16455753624442 | 16459820274067 | 16459820454065 | 16455754007727 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 29 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19200 | 0x7fa4df17d780 | 0x7fa4dea241c0 | 1323328 | 1219401 | 65536 | 165415 | 165415 | 165415 | 0 | 302 | 164270 | 10099 | 2000 | 2872 | 161555 | 0 | 13902 | 146548 | 16455754039826 | 16459820482705 | 16459820575984 | 16455754352924 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 32 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19904 | 0x7fa4df17d600 | 0x7fa4dea24200 | 1332256 | 1227054 | 65536 | 166531 | 166531 | 166531 | 0 | 302 | 164304 | 9919 | 1995 | 2870 | 161729 | 0 | 14747 | 147355 | 16455754385552 | 16459820608143 | 16459820701422 | 16455754668651 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 35 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 20608 | 0x7fa4e1915480 | 0x7fa4dea24240 | 1312112 | 1206560 | 65536 | 164013 | 164013 | 164013 | 0 | 302 | 163256 | 9919 | 2000 | 2862 | 160085 | 0 | 15267 | 147399 | 16455754710170 | 16459820745582 | 16459820839181 | 16455754981889 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 38 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 16 | 24 | 21312 | 0x7fa4e1915300 | 0x7fa4dea24280 | 2480504 | 2379791 | 65536 | 310062 | 310062 | 310062 | 0 | 302 | 308304 | 11324 | 2129 | 2854 | 306595 | 0 | 24884 | 282739 | 16455755016558 | 16459820871501 | 16459821050859 | 16455755380344 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 41 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22016 | 0x7fa4e1915180 | 0x7fa4dea242c0 | 2462736 | 2356547 | 65536 | 307841 | 307841 | 307841 | 0 | 302 | 305976 | 10794 | 1995 | 3563 | 303584 | 0 | 23454 | 283988 | 16455755413363 | 16459821083179 | 16459821263977 | 16455755750410 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 44 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22720 | 0x7fa4e1915000 | 0x7fa4dea24300 | 1313352 | 1214777 | 65536 | 164168 | 164168 | 164168 | 0 | 302 | 164008 | 9994 | 2003 | 2866 | 160013 | 0 | 15081 | 148687 | 16455755783578 | 16459821306856 | 16459821400775 | 16455756031679 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 47 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 23424 | 0x7fa4df17de80 | 0x7fa4dea24340 | 1312360 | 1209599 | 65536 | 164044 | 164044 | 164044 | 0 | 302 | 163574 | 10005 | 1992 | 2871 | 159639 | 0 | 14973 | 146142 | 16455756063728 | 16459821431655 | 16459821525254 | 16455756319398 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 50 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 24128 | 0x7fa4df17dd00 | 0x7fa4dea24380 | 1303488 | 1203745 | 65536 | 162935 | 162935 | 162935 | 0 | 302 | 162441 | 9789 | 1985 | 3348 | 159632 | 0 | 15221 | 145411 | 16455756359276 | 16459821558694 | 16459821652293 | 16455756602627 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 53 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 16 | 24 | 24832 | 0x7fa4df17db80 | 0x7fa4dea243c0 | 2472760 | 2367442 | 65536 | 309094 | 309094 | 309094 | 0 | 302 | 308963 | 11266 | 1985 | 2728 | 306896 | 0 | 24611 | 281824 | 16455756636326 | 16459821691653 | 16459821870851 | 16455756971193 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 56 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 16 | 24 | 25536 | 0x7fa4df17da00 | 0x7fa4dea24400 | 2445032 | 2341714 | 65536 | 305628 | 305628 | 305628 | 0 | 302 | 307625 | 10898 | 1985 | 3275 | 301991 | 0 | 22414 | 283687 | 16455757003461 | 16459821900130 | 16459822079809 | 16455757339179 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 59 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26240 | 0x7fa4df17d880 | 0x7fa4dea24440 | 1316024 | 1210007 | 65536 | 164502 | 164502 | 164502 | 0 | 302 | 163087 | 9709 | 2450 | 2951 | 160039 | 0 | 15364 | 147073 | 16455757371837 | 16459822111808 | 16459822204927 | 16455757627187 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 62 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26944 | 0x7fa4df17d700 | 0x7fa4dea24480 | 1308648 | 1208056 | 65536 | 163580 | 163580 | 163580 | 0 | 302 | 162705 | 9984 | 1991 | 3459 | 161393 | 0 | 14495 | 146670 | 16455757660616 | 16459822234207 | 16459822327966 | 16455757910867 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 65 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 27648 | 0x7fa4e1915580 | 0x7fa4dea244c0 | 1314040 | 1209377 | 65536 | 164254 | 164254 | 164254 | 0 | 302 | 161513 | 10164 | 2311 | 2886 | 159722 | 0 | 15040 | 146331 | 16455757951565 | 16459822362846 | 16459822454525 | 16455758198185 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 68 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 16 | 24 | 28608 | 0x7fa4e1915400 | 0x7fa4dea24500 | 2461808 | 2359696 | 65536 | 307725 | 307725 | 307725 | 0 | 302 | 306681 | 11332 | 1988 | 2879 | 305571 | 0 | 24570 | 280073 | 16455758233164 | 16459822482685 | 16459822663003 | 16455758573501 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 71 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 16 | 24 | 29312 | 0x7fa4e1915280 | 0x7fa4dea24540 | 2445408 | 2346821 | 65536 | 305675 | 305675 | 305675 | 0 | 302 | 305411 | 10994 | 1984 | 2850 | 305172 | 0 | 23174 | 283305 | 16455758606970 | 16459822687963 | 16459822868761 | 16455758946597 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 74 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 30016 | 0x7fa4e1915100 | 0x7fa4dea24580 | 1304416 | 1206732 | 65536 | 163051 | 163051 | 163051 | 0 | 302 | 162320 | 9879 | 1983 | 2864 | 159065 | 0 | 15465 | 145448 | 16455758979475 | 16459822898360 | 16459822991320 | 16455759231126 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 77 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 30976 | 0x7fa4df17df80 | 0x7fa4dea245c0 | 1292624 | 1192112 | 65536 | 161577 | 161577 | 161577 | 0 | 302 | 161738 | 10023 | 2327 | 2862 | 159710 | 0 | 14882 | 144440 | 16455759263864 | 16459823035799 | 16459823128758 | 16455759514515 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 80 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 31680 | 0x7fa4df17de00 | 0x7fa4dea24600 | 1299504 | 1195350 | 65536 | 162437 | 162437 | 162437 | 0 | 302 | 162706 | 9967 | 2001 | 3543 | 159614 | 0 | 15804 | 144282 | 16455759553613 | 16459823162198 | 16459823255317 | 16455759801724 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 83 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 16 | 24 | 32640 | 0x7fa4df17dc80 | 0x7fa4dea24640 | 2433968 | 2330320 | 65536 | 304245 | 304245 | 304245 | 0 | 302 | 303350 | 10988 | 1983 | 3405 | 300231 | 0 | 24696 | 280416 | 16455759835042 | 16459823287637 | 16459823467635 | 16455760179709 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 86 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 33600 | 0x7fa4df17db00 | 0x7fa4dea24680 | 2458592 | 2349905 | 65536 | 307323 | 307323 | 307323 | 0 | 302 | 306006 | 10884 | 1983 | 2875 | 304208 | 0 | 23040 | 282682 | 16455760212128 | 16459823496275 | 16459823676913 | 16455760549035 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 89 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 34560 | 0x7fa4df17d980 | 0x7fa4dea246c0 | 1306368 | 1206741 | 65536 | 163295 | 163295 | 163295 | 0 | 302 | 162758 | 9781 | 1980 | 2871 | 159761 | 0 | 15689 | 145267 | 16455760581394 | 16459823701712 | 16459823795632 | 16455760831274 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 92 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 35520 | 0x7fa4df17d800 | 0x7fa4dea24700 | 1312664 | 1207040 | 65536 | 164082 | 164082 | 164082 | 0 | 302 | 163488 | 9940 | 1990 | 2876 | 160507 | 0 | 14596 | 145853 | 16455760863973 | 16459823837071 | 16459823931150 | 16455761112193 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 95 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 36224 | 0x7fa4df17d680 | 0x7fa4dea24740 | 1321872 | 1221256 | 65536 | 165233 | 165233 | 165233 | 0 | 302 | 162147 | 9969 | 1996 | 2867 | 159560 | 0 | 14896 | 146899 | 16455761151432 | 16459823964430 | 16459824057869 | 16455761400102 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 98 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 37184 | 0x7fa4e1915500 | 0x7fa4dea24780 | 2464696 | 2361632 | 65536 | 308086 | 308086 | 308086 | 0 | 302 | 305139 | 11036 | 1983 | 2868 | 302387 | 0 | 25078 | 277529 | 16455761434921 | 16459824088429 | 16459824268107 | 16455761769668 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 101 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 38144 | 0x7fa4e1915380 | 0x7fa4dea247c0 | 2461392 | 2360425 | 65536 | 307673 | 307673 | 307673 | 0 | 302 | 305427 | 11076 | 1992 | 2885 | 302946 | 0 | 22532 | 279743 | 16455761802247 | 16459824295307 | 16459824476265 | 16455762140324 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 104 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 39104 | 0x7fa4e1915200 | 0x7fa4dea24800 | 1305624 | 1202597 | 65536 | 163202 | 163202 | 163202 | 0 | 302 | 163011 | 9848 | 2116 | 2880 | 159231 | 0 | 14593 | 145780 | 16455762173892 | 16459824504105 | 16459824598024 | 16455762426873 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 107 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 40320 | 0x7fa4e1915080 | 0x7fa4dea24840 | 1312352 | 1206067 | 65536 | 164043 | 164043 | 164043 | 0 | 302 | 162703 | 9864 | 1993 | 3147 | 159734 | 0 | 14886 | 146334 | 16455762460281 | 16459824627943 | 16459824723142 | 16455762709792 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 110 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 41280 | 0x7fa4df17df00 | 0x7fa4dea24880 | 1302088 | 1203236 | 65536 | 162760 | 162760 | 162760 | 0 | 302 | 161701 | 9943 | 1993 | 2869 | 160224 | 0 | 15067 | 145543 | 16455762749290 | 16459824757382 | 16459824850181 | 16455762993401 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 113 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 42240 | 0x7fa4df17dd80 | 0x7fa4dea248c0 | 2447448 | 2340620 | 65536 | 305930 | 305930 | 305930 | 0 | 302 | 306203 | 10999 | 2416 | 2857 | 304190 | 0 | 24364 | 280881 | 16455763027530 | 16459824879781 | 16459825060899 | 16455763361757 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 116 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 43200 | 0x7fa4df17dc00 | 0x7fa4dea24900 | 2435088 | 2331196 | 65536 | 304385 | 304385 | 304385 | 0 | 302 | 302129 | 10817 | 1984 | 2855 | 301237 | 0 | 22689 | 280140 | 16455763394095 | 16459825085859 | 16459825265377 | 16455763730143 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 119 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 44160 | 0x7fa4df17da80 | 0x7fa4dea24940 | 1303768 | 1197820 | 65536 | 162970 | 162970 | 162970 | 0 | 302 | 162394 | 9810 | 1993 | 2863 | 159358 | 0 | 14254 | 145251 | 16455763762461 | 16459825308256 | 16459825402656 | 16455764015832 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 122 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 45376 | 0x7fa4df17d900 | 0x7fa4dea24980 | 1314168 | 1209506 | 65536 | 164270 | 164270 | 164270 | 0 | 302 | 162606 | 10031 | 1987 | 2875 | 160011 | 0 | 15001 | 145418 | 16455764047910 | 16459825432095 | 16459825526654 | 16455764297721 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 125 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 46336 | 0x7fa4df17d780 | 0x7fa4dea249c0 | 1304744 | 1204371 | 65536 | 163092 | 163092 | 163092 | 0 | 302 | 162100 | 9832 | 1988 | 2858 | 159243 | 0 | 14453 | 145734 | 16455764339509 | 16459825560894 | 16459825655293 | 16455764579600 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 128 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 47552 | 0x7fa4df17d600 | 0x7fa4dea24a00 | 2454656 | 2342887 | 65536 | 306831 | 306831 | 306831 | 0 | 302 | 302128 | 11151 | 1988 | 2856 | 303326 | 0 | 23478 | 279679 | 16455764613389 | 16459825694973 | 16459825874011 | 16455764949186 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 131 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 48512 | 0x7fa4e1915480 | 0x7fa4dea24a40 | 2423448 | 2319468 | 65536 | 302930 | 302930 | 302930 | 0 | 302 | 301962 | 10721 | 2444 | 2775 | 299256 | 0 | 20799 | 278985 | 16455764982385 | 16459825906011 | 16459826086489 | 16455765316392 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 134 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 49472 | 0x7fa4e1915300 | 0x7fa4dea24a80 | 1296960 | 1194187 | 65536 | 162119 | 162119 | 162119 | 0 | 302 | 160548 | 9766 | 1982 | 2861 | 158093 | 0 | 14973 | 144909 | 16455765348480 | 16459826124088 | 16459826216567 | 16455765598171 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 137 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 50688 | 0x7fa4e1915180 | 0x7fa4dea24ac0 | 1299568 | 1196131 | 65536 | 162445 | 162445 | 162445 | 0 | 302 | 161718 | 10124 | 2002 | 2864 | 158747 | 0 | 15905 | 144007 | 16455765631050 | 16459826246807 | 16459826340406 | 16455765880130 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 140 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 51648 | 0x7fa4e1915000 | 0x7fa4dea24b00 | 1290392 | 1193104 | 65536 | 161298 | 161298 | 161298 | 0 | 302 | 161640 | 9771 | 2780 | 2850 | 158150 | 0 | 14711 | 145019 | 16455765920139 | 16459826376246 | 16459826468245 | 16455766159269 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 143 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 52864 | 0x7fa4df17de80 | 0x7fa4dea24b40 | 2422784 | 2315632 | 65536 | 302847 | 302847 | 302847 | 0 | 302 | 301018 | 10861 | 1984 | 2858 | 300004 | 0 | 22382 | 278312 | 16455766192818 | 16459826507925 | 16459826687443 | 16455766536935 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 146 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 54080 | 0x7fa4df17dd00 | 0x7fa4dea24b80 | 2434008 | 2326432 | 65536 | 304250 | 304250 | 304250 | 0 | 302 | 301824 | 10836 | 1982 | 2854 | 299344 | 0 | 22937 | 276973 | 16455766569704 | 16459826714482 | 16459826893201 | 16455766906861 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 149 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 55296 | 0x7fa4df17db80 | 0x7fa4dea24bc0 | 1305496 | 1205247 | 65536 | 163186 | 163186 | 163186 | 0 | 302 | 162598 | 9852 | 2602 | 2856 | 160212 | 0 | 14997 | 147179 | 16455766939259 | 16459826918480 | 16459827012720 | 16455767189110 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 152 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 56768 | 0x7fa4df17da00 | 0x7fa4dea24c00 | 1307864 | 1200590 | 65536 | 163482 | 163482 | 163482 | 0 | 302 | 162718 | 10124 | 2002 | 2874 | 160292 | 0 | 16732 | 144298 | 16455767221458 | 16459827039599 | 16459827135438 | 16455767473149 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 155 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 57728 | 0x7fa4df17d880 | 0x7fa4dea24c40 | 1302200 | 1197947 | 65536 | 162774 | 162774 | 162774 | 0 | 302 | 161680 | 10010 | 1984 | 2868 | 159446 | 0 | 14897 | 144270 | 16455767512877 | 16459827168718 | 16459827262317 | 16455767758038 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 158 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 58944 | 0x7fa4df17d700 | 0x7fa4dea24c80 | 2425960 | 2318236 | 65536 | 303244 | 303244 | 303244 | 0 | 302 | 300174 | 10933 | 1984 | 2867 | 297837 | 0 | 23743 | 277455 | 16455767792436 | 16459827288077 | 16459827466955 | 16455768124864 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 161 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 60160 | 0x7fa4e1915580 | 0x7fa4dea24cc0 | 2419872 | 2315057 | 65536 | 302483 | 302483 | 302483 | 0 | 302 | 302910 | 10812 | 1988 | 2861 | 299245 | 0 | 22219 | 278343 | 16455768158882 | 16459827491755 | 16459827673193 | 16455768495029 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 164 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 61376 | 0x7fa4e1915400 | 0x7fa4dea24d00 | 1302664 | 1203867 | 65536 | 162832 | 162832 | 162832 | 0 | 302 | 163166 | 9878 | 1978 | 3219 | 159399 | 0 | 15354 | 145529 | 16455768527638 | 16459827698153 | 16459827793192 | 16455768782078 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 167 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 62848 | 0x7fa4e1915280 | 0x7fa4dea24d40 | 1351264 | 1247322 | 65536 | 168907 | 168907 | 168907 | 0 | 302 | 168605 | 9912 | 1985 | 2861 | 167423 | 0 | 16211 | 151208 | 16455768814917 | 16459827820711 | 16459827920070 | 16455769068187 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 170 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 63808 | 0x7fa4e1915100 | 0x7fa4dea24d80 | 1297224 | 1195508 | 65536 | 162152 | 162152 | 162152 | 0 | 302 | 161547 | 9831 | 1979 | 2821 | 159531 | 0 | 15350 | 145397 | 16455769108436 | 16459827952390 | 16459828045829 | 16455769357336 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 173 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 65024 | 0x7fa4df17df80 | 0x7fa4dea24dc0 | 2413576 | 2308979 | 65536 | 301696 | 301696 | 301696 | 0 | 302 | 299849 | 10964 | 1983 | 2856 | 298478 | 0 | 22947 | 275716 | 16455769391345 | 16459828071589 | 16459828250947 | 16455769726052 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 176 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 66240 | 0x7fa4df17de00 | 0x7fa4dea24e00 | 2413176 | 2306522 | 65536 | 301646 | 301646 | 301646 | 0 | 302 | 299161 | 10963 | 1985 | 2858 | 296594 | 0 | 22449 | 275842 | 16455769758191 | 16459828277507 | 16459828458145 | 16455770091858 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 179 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 67456 | 0x7fa4df17dc80 | 0x7fa4dea24e40 | 1317816 | 1214625 | 65536 | 164726 | 164726 | 164726 | 0 | 302 | 162915 | 9976 | 2005 | 2860 | 160442 | 0 | 15371 | 145323 | 16455770124497 | 16459828485185 | 16459828580704 | 16455770380247 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 182 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 68928 | 0x7fa4df17db00 | 0x7fa4dea24e80 | 1400440 | 1299378 | 65536 | 175054 | 175054 | 175054 | 0 | 302 | 176321 | 10107 | 1984 | 2853 | 174437 | 0 | 16173 | 157632 | 16455770412186 | 16459828607744 | 16459828711903 | 16455770675815 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 185 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 70144 | 0x7fa4df17d980 | 0x7fa4dea24ec0 | 1284472 | 1182639 | 65536 | 160558 | 160558 | 160558 | 0 | 302 | 159819 | 9939 | 1978 | 2862 | 157820 | 0 | 15099 | 143263 | 16455770715324 | 16459828744542 | 16459828836861 | 16455770963884 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 188 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 71616 | 0x7fa4df17d800 | 0x7fa4dea24f00 | 2424240 | 2317645 | 65536 | 303029 | 303029 | 303029 | 0 | 302 | 303919 | 11024 | 1987 | 2856 | 301015 | 0 | 23976 | 276661 | 16455770995883 | 16459828861341 | 16459829041499 | 16455771338540 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 191 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 72832 | 0x7fa4df17d680 | 0x7fa4dea24f40 | 2413496 | 2307512 | 65536 | 301686 | 301686 | 301686 | 0 | 302 | 298577 | 11047 | 1991 | 2859 | 300012 | 0 | 21751 | 276544 | 16455771371319 | 16459829068219 | 16459829248537 | 16455771708076 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 194 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 74048 | 0x7fa4e1915500 | 0x7fa4dea24f80 | 1299552 | 1192694 | 65536 | 162443 | 162443 | 162443 | 0 | 302 | 161757 | 10159 | 1996 | 2868 | 158141 | 0 | 15318 | 144438 | 16455771740454 | 16459829274617 | 16459829367896 | 16455771989885 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 197 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 75776 | 0x7fa4e1915380 | 0x7fa4dea24fc0 | 1455320 | 1353288 | 65536 | 181914 | 181914 | 181914 | 0 | 302 | 182203 | 10235 | 1983 | 3394 | 179322 | 0 | 16944 | 163221 | 16455772022183 | 16459829393976 | 16459829500855 | 16455772284413 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 200 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 76992 | 0x7fa4e1915200 | 0x7fa4dea25000 | 1300752 | 1199343 | 65536 | 162593 | 162593 | 162593 | 0 | 302 | 160367 | 10103 | 2000 | 3502 | 158767 | 0 | 15825 | 143569 | 16455772338001 | 16459829533494 | 16459829628213 | 16455772571612 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 203 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 78464 | 0x7fa4e1915080 | 0x7fa4dea25040 | 2401688 | 2298207 | 65536 | 300210 | 300210 | 300210 | 0 | 302 | 301019 | 11026 | 1994 | 3430 | 295942 | 0 | 23478 | 276562 | 16455772605281 | 16459829654613 | 16459829834771 | 16455772944978 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 206 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 79936 | 0x7fa4df17df00 | 0x7fa4dea25080 | 2409880 | 2309181 | 65536 | 301234 | 301234 | 301234 | 0 | 302 | 303561 | 10869 | 2261 | 2812 | 298081 | 0 | 21518 | 276823 | 16455772977147 | 16459829860371 | 16459830040369 | 16455773312504 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 209 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 81408 | 0x7fa4df17dd80 | 0x7fa4dea250c0 | 1307168 | 1203521 | 65536 | 163395 | 163395 | 163395 | 0 | 302 | 162519 | 9774 | 1993 | 2857 | 159883 | 0 | 15315 | 144811 | 16455773344713 | 16459830065169 | 16459830160048 | 16455773601653 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 212 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 83136 | 0x7fa4df17dc00 | 0x7fa4dea25100 | 1539272 | 1428310 | 65536 | 192408 | 192408 | 192408 | 0 | 302 | 191485 | 10299 | 1983 | 2857 | 188579 | 0 | 18186 | 171533 | 16455773633441 | 16459830185648 | 16459830301167 | 16455773906211 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 215 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 84352 | 0x7fa4df17da80 | 0x7fa4dea25140 | 1288856 | 1189423 | 65536 | 161106 | 161106 | 161106 | 0 | 302 | 160987 | 9945 | 1981 | 2856 | 157602 | 0 | 15029 | 143949 | 16455773946199 | 16459830346926 | 16459830440525 | 16455774198710 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 218 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 85824 | 0x7fa4df17d900 | 0x7fa4dea25180 | 2426416 | 2322037 | 65536 | 303301 | 303301 | 303301 | 0 | 302 | 299491 | 10993 | 1981 | 2854 | 295678 | 0 | 22811 | 276059 | 16455774230768 | 16459830465805 | 16459830646123 | 16455774581785 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 221 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 87296 | 0x7fa4df17d780 | 0x7fa4dea251c0 | 2419416 | 2316863 | 65536 | 302426 | 302426 | 302426 | 0 | 302 | 297511 | 10650 | 2477 | 2855 | 295688 | 0 | 21807 | 275113 | 16455774614574 | 16459830669963 | 16459830849801 | 16455774952101 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 224 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 88768 | 0x7fa4df17d600 | 0x7fa4dea25200 | 1307472 | 1202961 | 65536 | 163433 | 163433 | 163433 | 0 | 302 | 163397 | 10039 | 1980 | 2848 | 159101 | 0 | 15978 | 145846 | 16455774985629 | 16459830873801 | 16459830968520 | 16455775234160 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 227 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 90752 | 0x7fa4e1915480 | 0x7fa4dea25240 | 1622168 | 1510392 | 65536 | 202770 | 202770 | 202770 | 0 | 302 | 202187 | 10397 | 2045 | 2856 | 198587 | 0 | 18036 | 180606 | 16455775267148 | 16459830993640 | 16459831113479 | 16455775544848 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 230 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 91968 | 0x7fa4e1915300 | 0x7fa4dea25280 | 1294056 | 1192499 | 65536 | 161756 | 161756 | 161756 | 0 | 302 | 161091 | 9826 | 1997 | 2867 | 158447 | 0 | 15654 | 144068 | 16455775585786 | 16459831145318 | 16459831238917 | 16455775826147 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 233 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 93440 | 0x7fa4e1915180 | 0x7fa4dea252c0 | 2405520 | 2302777 | 65536 | 300689 | 300689 | 300689 | 0 | 302 | 300287 | 11198 | 1986 | 2858 | 298643 | 0 | 23729 | 277899 | 16455775858676 | 16459831263237 | 16459831443555 | 16455776199193 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 236 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 94912 | 0x7fa4e1915000 | 0x7fa4dea25300 | 2400232 | 2295419 | 65536 | 300028 | 300028 | 300028 | 0 | 302 | 300209 | 10766 | 2084 | 2857 | 294602 | 0 | 21371 | 275268 | 16455776232571 | 16459831467555 | 16459831647073 | 16455776567138 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 239 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 96384 | 0x7fa4df17de80 | 0x7fa4dea25340 | 1300288 | 1196068 | 65536 | 162535 | 162535 | 162535 | 0 | 302 | 162454 | 10034 | 1999 | 2867 | 159071 | 0 | 15814 | 144589 | 16455776599807 | 16459831672833 | 16459831767552 | 16455776848878 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 242 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 98368 | 0x7fa4df17dd00 | 0x7fa4dea25380 | 1685304 | 1580856 | 65536 | 210662 | 210662 | 210662 | 0 | 302 | 210483 | 10410 | 1984 | 2845 | 207881 | 0 | 19002 | 190060 | 16455776880936 | 16459831791872 | 16459831918911 | 16455777168065 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 245 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 99584 | 0x7fa4df17db80 | 0x7fa4dea253c0 | 1288624 | 1186903 | 65536 | 161077 | 161077 | 161077 | 0 | 302 | 160660 | 9839 | 2551 | 2860 | 159591 | 0 | 14635 | 143609 | 16455777207664 | 16459831950910 | 16459832045149 | 16455777461254 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 248 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 101312 | 0x7fa4df17da00 | 0x7fa4dea25400 | 2381520 | 2279362 | 65536 | 297689 | 297689 | 297689 | 0 | 302 | 298143 | 10990 | 1983 | 2858 | 293125 | 0 | 22636 | 274286 | 16455777493653 | 16459832068989 | 16459832247707 | 16455777828510 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 251 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 102784 | 0x7fa4df17d880 | 0x7fa4dea25440 | 2399040 | 2297487 | 65536 | 299879 | 299879 | 299879 | 0 | 302 | 298960 | 10821 | 2452 | 2864 | 295113 | 0 | 21943 | 275142 | 16455777861029 | 16459832271707 | 16459832451865 | 16455778197966 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 254 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 104256 | 0x7fa4df17d700 | 0x7fa4dea25480 | 1286400 | 1186146 | 65536 | 160799 | 160799 | 160799 | 0 | 302 | 162335 | 9995 | 2009 | 2873 | 160086 | 0 | 16156 | 143712 | 16455778230494 | 16459832476985 | 16459832571544 | 16455778486934 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 257 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 106240 | 0x7fa4e1915580 | 0x7fa4dea254c0 | 1757424 | 1647049 | 65536 | 219677 | 219677 | 219677 | 0 | 302 | 220088 | 10626 | 1982 | 2877 | 216242 | 0 | 19822 | 196694 | 16455778520023 | 16459832596184 | 16459832726903 | 16455778814072 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 260 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 107712 | 0x7fa4e1915400 | 0x7fa4dea25500 | 1294568 | 1186472 | 65536 | 161820 | 161820 | 161820 | 0 | 302 | 160234 | 9890 | 1981 | 3698 | 158003 | 0 | 15308 | 144336 | 16455778854500 | 16459832759222 | 16459832852181 | 16455779108470 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 263 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 16 | 24 | 109440 | 0x7fa4e1915280 | 0x7fa4dea25540 | 2392888 | 2287887 | 65536 | 299110 | 299110 | 299110 | 0 | 302 | 298112 | 10867 | 1973 | 2862 | 294777 | 0 | 22679 | 274102 | 16455779141859 | 16459832879701 | 16459833058739 | 16455779475536 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 266 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 16 | 24 | 111168 | 0x7fa4e1915100 | 0x7fa4dea25580 | 2401992 | 2289811 | 65536 | 300248 | 300248 | 300248 | 0 | 302 | 296774 | 10738 | 1987 | 2853 | 295442 | 0 | 22225 | 275361 | 16455779508625 | 16459833082899 | 16459833262897 | 16455779844452 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 269 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 112896 | 0x7fa4df17df80 | 0x7fa4dea255c0 | 1305656 | 1201517 | 65536 | 163206 | 163206 | 163206 | 0 | 302 | 161242 | 9983 | 2001 | 2857 | 159450 | 0 | 15761 | 143699 | 16455779877421 | 16459833288017 | 16459833383536 | 16455780127861 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 272 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 115136 | 0x7fa4df17de00 | 0x7fa4dea25600 | 1835888 | 1728820 | 65536 | 229485 | 229485 | 229485 | 0 | 302 | 230330 | 10630 | 1993 | 2854 | 226969 | 0 | 20133 | 206619 | 16455780160520 | 16459833407376 | 16459833546414 | 16455780451169 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 275 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 116608 | 0x7fa4df17dc80 | 0x7fa4dea25640 | 1286440 | 1184750 | 65536 | 160804 | 160804 | 160804 | 0 | 302 | 161344 | 9928 | 1988 | 2872 | 157846 | 0 | 15832 | 143531 | 16455780491367 | 16459833580014 | 16459833674733 | 16455780733488 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 278 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 16 | 24 | 118336 | 0x7fa4df17db00 | 0x7fa4dea25680 | 2383424 | 2279919 | 65536 | 297927 | 297927 | 297927 | 0 | 302 | 296358 | 10745 | 1978 | 2859 | 293936 | 0 | 20764 | 273545 | 16455780765587 | 16459833700973 | 16459833882891 | 16455781104664 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 281 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 16 | 24 | 120064 | 0x7fa4df17d980 | 0x7fa4dea256c0 | 2388856 | 2286605 | 65536 | 298606 | 298606 | 298606 | 0 | 302 | 300358 | 10742 | 1983 | 2860 | 295300 | 0 | 21804 | 274850 | 16455781137012 | 16459833908331 | 16459834089929 | 16455781476229 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 284 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 121792 | 0x7fa4df17d800 | 0x7fa4dea25700 | 1313456 | 1215418 | 65536 | 164181 | 164181 | 164181 | 0 | 302 | 165292 | 10225 | 2012 | 3659 | 160574 | 0 | 15454 | 145307 | 16455781508568 | 16459834116329 | 16459834213608 | 16455781760528 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 287 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 124032 | 0x7fa4df17d680 | 0x7fa4dea25740 | 1919368 | 1807963 | 65536 | 239920 | 239920 | 239920 | 0 | 302 | 237985 | 10670 | 1978 | 2859 | 235837 | 0 | 20212 | 216222 | 16455781792887 | 16459834239048 | 16459834384486 | 16455782092975 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 290 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 125504 | 0x7fa4e1915500 | 0x7fa4dea25780 | 1291672 | 1186562 | 65536 | 161458 | 161458 | 161458 | 0 | 302 | 160940 | 9958 | 1986 | 2850 | 158079 | 0 | 15259 | 144740 | 16455782134114 | 16459834418246 | 16459834514245 | 16455782388634 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 293 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 127488 | 0x7fa4e1915380 | 0x7fa4dea257c0 | 2388120 | 2281798 | 65536 | 298514 | 298514 | 298514 | 0 | 302 | 297125 | 10718 | 1993 | 2856 | 293906 | 0 | 20863 | 274592 | 16455782421403 | 16459834539525 | 16459834719203 | 16455782757750 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 296 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 129216 | 0x7fa4e1915200 | 0x7fa4dea25800 | 2390256 | 2281848 | 65536 | 298781 | 298781 | 298781 | 0 | 302 | 297372 | 10626 | 1987 | 2854 | 296749 | 0 | 20412 | 275088 | 16455782790769 | 16459834742243 | 16459834922081 | 16455783127606 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 299 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 130944 | 0x7fa4e1915080 | 0x7fa4dea25840 | 1367216 | 1260259 | 65536 | 170901 | 170901 | 170901 | 0 | 302 | 170601 | 10015 | 1981 | 2841 | 168434 | 0 | 16533 | 150533 | 16455783159834 | 16459834947360 | 16459835047839 | 16455783418554 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 302 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 133440 | 0x7fa4df17df00 | 0x7fa4dea25880 | 2073824 | 1962299 | 65536 | 259227 | 259227 | 259227 | 0 | 302 | 258964 | 10861 | 1990 | 2847 | 256744 | 0 | 21179 | 234585 | 16455783452133 | 16459835074079 | 16459835229598 | 16455783768351 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 305 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 134912 | 0x7fa4df17dd80 | 0x7fa4dea258c0 | 1287496 | 1186190 | 65536 | 160936 | 160936 | 160936 | 0 | 302 | 161393 | 10076 | 2008 | 2865 | 157458 | 0 | 16069 | 142943 | 16455783808239 | 16459835264157 | 16459835358876 | 16455784057380 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 308 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 136896 | 0x7fa4df17dc00 | 0x7fa4dea25900 | 2378112 | 2272096 | 65536 | 297263 | 297263 | 297263 | 0 | 302 | 296072 | 10630 | 1980 | 2856 | 292857 | 0 | 20523 | 273942 | 16455784089709 | 16459835383836 | 16459835562394 | 16455784427076 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 311 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 138880 | 0x7fa4df17da80 | 0x7fa4dea25940 | 2373424 | 2264706 | 65536 | 296677 | 296677 | 296677 | 0 | 302 | 296052 | 10851 | 1990 | 2859 | 293568 | 0 | 20830 | 272748 | 16455784459774 | 16459835587674 | 16459835767352 | 16455784797701 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 314 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 140864 | 0x7fa4df17d900 | 0x7fa4dea25980 | 1426448 | 1320448 | 65536 | 178305 | 178305 | 178305 | 0 | 302 | 179040 | 10084 | 1979 | 2864 | 175601 | 0 | 17178 | 159309 | 16455784829610 | 16459835793432 | 16459835900471 | 16455785091690 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 317 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 143616 | 0x7fa4df17d780 | 0x7fa4dea259c0 | 2243128 | 2131517 | 65536 | 280390 | 280390 | 280390 | 0 | 302 | 280672 | 11034 | 2010 | 2883 | 276790 | 0 | 22466 | 256112 | 16455785123559 | 16459835926871 | 16459836097749 | 16455785449056 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 320 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 145344 | 0x7fa4df17d600 | 0x7fa4dea25a00 | 1298512 | 1198584 | 65536 | 162313 | 162313 | 162313 | 0 | 302 | 161284 | 9990 | 2159 | 2868 | 157347 | 0 | 16198 | 144054 | 16455785489515 | 16459836131189 | 16459836226868 | 16455785733205 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 323 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 147584 | 0x7fa4e1915480 | 0x7fa4dea25a40 | 2380240 | 2267768 | 65536 | 297529 | 297529 | 297529 | 0 | 302 | 295128 | 10694 | 1982 | 2861 | 293122 | 0 | 21619 | 273587 | 16455785766304 | 16459836253427 | 16459836433426 | 16455786102831 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 326 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 149568 | 0x7fa4e1915300 | 0x7fa4dea25a80 | 2372944 | 2273074 | 65536 | 296617 | 296617 | 296617 | 0 | 302 | 294446 | 10900 | 1989 | 2849 | 293088 | 0 | 21322 | 271797 | 16455786136420 | 16459836459185 | 16459836639824 | 16455786478156 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 329 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 151552 | 0x7fa4e1915180 | 0x7fa4dea25ac0 | 1492576 | 1383501 | 65536 | 186571 | 186571 | 186571 | 0 | 302 | 186776 | 10247 | 1980 | 2858 | 183631 | 0 | 17719 | 166727 | 16455786511615 | 16459836664303 | 16459836773902 | 16455786777795 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 332 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 154304 | 0x7fa4e1915000 | 0x7fa4dea25b00 | 2402512 | 2292175 | 65536 | 300313 | 300313 | 300313 | 0 | 302 | 299105 | 11231 | 2000 | 2854 | 297369 | 0 | 23652 | 274190 | 16455786811294 | 16459836797742 | 16459836979020 | 16455787150531 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 335 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 156288 | 0x7fa4df17de80 | 0x7fa4dea25b40 | 1374040 | 1270875 | 65536 | 171754 | 171754 | 171754 | 0 | 302 | 171424 | 10216 | 1978 | 2848 | 167823 | 0 | 16860 | 151743 | 16455787190419 | 16459837011500 | 16459837113419 | 16455787456439 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 338 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 158784 | 0x7fa4df17dd00 | 0x7fa4dea25b80 | 2398768 | 2277363 | 65536 | 299845 | 299845 | 299845 | 0 | 302 | 298103 | 10556 | 1987 | 2773 | 294772 | 0 | 20837 | 276952 | 16455787490437 | 16459837138859 | 16459837321577 | 16455787827884 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 341 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 161024 | 0x7fa4df17db80 | 0x7fa4dea25bc0 | 2379488 | 2255075 | 65536 | 297435 | 297435 | 297435 | 0 | 302 | 298599 | 10571 | 2001 | 2861 | 295739 | 0 | 20052 | 275208 | 16455787861403 | 16459837347497 | 16459837531175 | 16455788200670 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 344 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 163264 | 0x7fa4df17da00 | 0x7fa4dea25c00 | 1652944 | 1538836 | 65536 | 206617 | 206617 | 206617 | 0 | 302 | 205530 | 10316 | 1983 | 2855 | 203192 | 0 | 18539 | 185814 | 16455788232639 | 16459837558534 | 16459837680773 | 16455788513508 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 347 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 166528 | 0x7fa4df17d880 | 0x7fa4dea25c40 | 2716912 | 2608658 | 65536 | 339613 | 339613 | 339613 | 0 | 302 | 339967 | 11294 | 1983 | 3553 | 338157 | 0 | 25564 | 311658 | 16455788546017 | 16459837706533 | 16459837913251 | 16455788930282 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 350 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 168512 | 0x7fa4df17d700 | 0x7fa4dea25c80 | 1465744 | 1363230 | 65536 | 183217 | 183217 | 183217 | 0 | 302 | 183306 | 10315 | 1978 | 2859 | 180632 | 0 | 17747 | 163958 | 16455788949911 | 16459837962370 | 16459838073729 | 16455789221371 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 353 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 171264 | 0x7fa4e1915580 | 0x7fa4dea25cc0 | 2365360 | 2249828 | 65536 | 295669 | 295669 | 295669 | 0 | 302 | 295351 | 10619 | 1986 | 2862 | 295260 | 0 | 20792 | 274338 | 16455789256059 | 16459838101889 | 16459838286687 | 16455789595896 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 356 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 173760 | 0x7fa4e1915400 | 0x7fa4dea25d00 | 2394888 | 2277860 | 65536 | 299360 | 299360 | 299360 | 0 | 302 | 301421 | 10732 | 1992 | 2869 | 295174 | 0 | 20994 | 274687 | 16455789628565 | 16459838311807 | 16459838497405 | 16455789967012 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 359 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 176256 | 0x7fa4e1915280 | 0x7fa4dea25d40 | 1803392 | 1692900 | 65536 | 225423 | 225423 | 225423 | 0 | 302 | 225331 | 10622 | 1988 | 3610 | 222616 | 0 | 19616 | 203319 | 16455790000191 | 16459838522845 | 16459838657564 | 16455790301549 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 362 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 179776 | 0x7fa4e1915100 | 0x7fa4dea25d80 | 3046136 | 2933967 | 65536 | 380766 | 380766 | 380766 | 0 | 302 | 380945 | 11632 | 2009 | 2871 | 378348 | 0 | 27845 | 350455 | 16455790334318 | 16459838681723 | 16459838913721 | 16455790768711 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 365 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 182272 | 0x7fa4df17df80 | 0x7fa4dea25dc0 | 1689024 | 1577596 | 65536 | 211127 | 211127 | 211127 | 0 | 302 | 210601 | 10552 | 1978 | 3565 | 208607 | 0 | 19194 | 189381 | 16455790787631 | 16459838983800 | 16459839110039 | 16455791072370 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 368 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 185536 | 0x7fa4df17de00 | 0x7fa4dea25e00 | 2452920 | 2338387 | 65536 | 306614 | 306614 | 306614 | 0 | 302 | 303981 | 11163 | 1982 | 2857 | 303985 | 0 | 23959 | 281436 | 16455791106538 | 16459839134519 | 16459839329077 | 16455791495573 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 371 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 188544 | 0x7fa4df17dc80 | 0x7fa4dea25e40 | 2493536 | 2350810 | 65536 | 311691 | 311691 | 311691 | 0 | 302 | 313129 | 10884 | 1984 | 2855 | 307657 | 0 | 21901 | 285776 | 16455791506873 | 16459839354517 | 16459839549715 | 16455791913897 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 374 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 191552 | 0x7fa4df17db00 | 0x7fa4dea25e80 | 2127720 | 2020824 | 65536 | 265964 | 265964 | 265964 | 0 | 302 | 265921 | 10866 | 1984 | 2856 | 263032 | 0 | 21800 | 241530 | 16455791924787 | 16459839573554 | 16459839734193 | 16455792256194 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 377 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 195840 | 0x7fa4df17d980 | 0x7fa4dea25ec0 | 3693904 | 3582511 | 65536 | 461737 | 461737 | 461737 | 0 | 302 | 460857 | 12055 | 2753 | 3484 | 459384 | 0 | 31588 | 426959 | 16455792288193 | 16459839759153 | 16459840042350 | 16455792770324 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 380 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 198592 | 0x7fa4df17d800 | 0x7fa4dea25f00 | 1945672 | 1845665 | 65536 | 243208 | 243208 | 243208 | 0 | 302 | 242954 | 10685 | 1979 | 2856 | 240760 | 0 | 20816 | 219698 | 16455792789354 | 16459840112589 | 16459840262348 | 16455793095822 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 383 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 202368 | 0x7fa4df17d680 | 0x7fa4dea25f40 | 2679216 | 2562304 | 65536 | 334901 | 334901 | 334901 | 0 | 302 | 332929 | 11200 | 2302 | 2858 | 330635 | 0 | 23385 | 310213 | 16455793130421 | 16459840288107 | 16459840496745 | 16455793512646 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 386 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 205888 | 0x7fa4e1915500 | 0x7fa4dea25f80 | 2659976 | 2539669 | 65536 | 332496 | 332496 | 332496 | 0 | 302 | 328280 | 11344 | 2348 | 2850 | 327963 | 0 | 24068 | 306442 | 16455793527455 | 16459840558025 | 16459840764743 | 16455793910461 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 389 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 209408 | 0x7fa4e1915380 | 0x7fa4dea25fc0 | 2456952 | 2348100 | 65536 | 307118 | 307118 | 307118 | 0 | 302 | 306137 | 11219 | 1989 | 3234 | 304212 | 0 | 23876 | 280418 | 16455793926980 | 16459840826502 | 16459841012420 | 16455794273027 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 392 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 214464 | 0x7fa4e1915200 | 0x7fa4dea26000 | 4339496 | 4228742 | 65536 | 542436 | 542436 | 542436 | 0 | 302 | 544108 | 12972 | 2279 | 2884 | 539838 | 0 | 35914 | 505008 | 16455794307655 | 16459841037700 | 16459841372256 | 16455794845565 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 395 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 217728 | 0x7fa4e1915080 | 0x7fa4dea26040 | 2184424 | 2074572 | 65536 | 273052 | 273052 | 273052 | 0 | 302 | 271359 | 10923 | 1998 | 2879 | 269204 | 0 | 22264 | 248023 | 16455794864144 | 16459841417216 | 16459841584894 | 16455795201471 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 398 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 222016 | 0x7fa4df17df00 | 0x7fa4dea26080 | 2947416 | 2834049 | 65536 | 368426 | 368426 | 368426 | 0 | 302 | 366126 | 11494 | 1995 | 2858 | 363568 | 0 | 25990 | 336478 | 16455795236310 | 16459841610014 | 16459841838812 | 16455795658563 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 401 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 226048 | 0x7fa4df17dd80 | 0x7fa4dea260c0 | 2923400 | 2816323 | 65536 | 365424 | 365424 | 365424 | 0 | 302 | 365277 | 11889 | 1980 | 2847 | 361478 | 0 | 29178 | 333718 | 16455795669763 | 16459841873212 | 16459842103289 | 16455796106026 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 404 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 230080 | 0x7fa4df17dc00 | 0x7fa4dea26100 | 2780648 | 2671271 | 65536 | 347580 | 347580 | 347580 | 0 | 302 | 346393 | 11581 | 2002 | 2866 | 344702 | 0 | 25700 | 318886 | 16455796117396 | 16459842148409 | 16459842359927 | 16455796534340 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 407 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 235904 | 0x7fa4df17da80 | 0x7fa4dea26140 | 4998192 | 4878999 | 65536 | 624773 | 624773 | 624773 | 0 | 302 | 623941 | 13172 | 1979 | 3344 | 621441 | 0 | 40093 | 581740 | 16455796545099 | 16459842398966 | 16459842784882 | 16455797137856 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 410 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 239424 | 0x7fa4df17d900 | 0x7fa4dea26180 | 2432192 | 2321616 | 65536 | 304023 | 304023 | 304023 | 0 | 302 | 303469 | 11259 | 1985 | 2860 | 300575 | 0 | 23889 | 277806 | 16455797156716 | 16459842828402 | 16459843014480 | 16455797501032 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 413 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 244224 | 0x7fa4df17d780 | 0x7fa4dea261c0 | 3174160 | 3069908 | 65536 | 396769 | 396769 | 396769 | 0 | 302 | 398077 | 11804 | 1985 | 3542 | 393868 | 0 | 27786 | 366242 | 16455797534991 | 16459843039440 | 16459843288877 | 16455797956385 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 416 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 248768 | 0x7fa4df17d600 | 0x7fa4dea26200 | 3131216 | 3022543 | 65536 | 391401 | 391401 | 391401 | 0 | 302 | 390713 | 11547 | 1985 | 2854 | 388958 | 0 | 27950 | 362846 | 16455797971014 | 16459843351117 | 16459843599434 | 16455798431677 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 419 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 253312 | 0x7fa4e1915480 | 0x7fa4dea26240 | 3100528 | 2990806 | 65536 | 387565 | 387565 | 387565 | 0 | 302 | 387865 | 11639 | 2764 | 2863 | 386044 | 0 | 27863 | 357762 | 16455798442746 | 16459843661514 | 16459843898951 | 16455798894989 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 422 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 255552 | 0x7fa4e1915300 | 0x7fa4dea26280 | 5747248 | 5634568 | 65536 | 718405 | 718405 | 718405 | 0 | 302 | 718669 | 14144 | 1989 | 2849 | 717066 | 0 | 45015 | 671238 | 16455798905498 | 16459843960711 | 16459844415106 | 16455799564403 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 425 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 259840 | 0x7fa4e1915180 | 0x7fa4dea262c0 | 2916576 | 2804807 | 65536 | 364571 | 364571 | 364571 | 0 | 302 | 363952 | 11555 | 1991 | 2852 | 360506 | 0 | 26611 | 335118 | 16455799583402 | 16459844484865 | 16459844710943 | 16455800006936 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 428 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 265664 | 0x7fa4e1915000 | 0x7fa4dea26300 | 3754320 | 3653751 | 65536 | 469289 | 469289 | 469289 | 0 | 302 | 468313 | 12192 | 1974 | 3076 | 465341 | 0 | 32235 | 435591 | 16455800017685 | 16459844772863 | 16459845070780 | 16455800527876 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 431 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 271232 | 0x7fa4df17de80 | 0x7fa4dea26340 | 3780312 | 3677623 | 65536 | 472538 | 472538 | 472538 | 0 | 302 | 471486 | 12289 | 1983 | 2857 | 468151 | 0 | 32146 | 437240 | 16455800538425 | 16459845111739 | 16459845410296 | 16455801043406 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 434 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 273216 | 0x7fa4df17dd00 | 0x7fa4dea26380 | 3867328 | 3755881 | 65536 | 483415 | 483415 | 483415 | 0 | 302 | 484215 | 12290 | 1979 | 2854 | 480712 | 0 | 33514 | 447552 | 16455801053976 | 16459845448696 | 16459845745013 | 16455801555576 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 437 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 275712 | 0x7fa4df17db80 | 0x7fa4dea263c0 | 7038864 | 6928585 | 65536 | 879857 | 879857 | 879857 | 0 | 302 | 880408 | 15075 | 2364 | 2863 | 877849 | 0 | 52888 | 823542 | 16455801566496 | 16459845787893 | 16459846345647 | 16455802340196 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 440 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 280768 | 0x7fa4df17da00 | 0x7fa4dea26400 | 3420136 | 3306909 | 65536 | 427516 | 427516 | 427516 | 0 | 302 | 426665 | 11889 | 1987 | 2868 | 423858 | 0 | 29963 | 393627 | 16455802359925 | 16459846419726 | 16459846683244 | 16455802831697 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 443 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 287616 | 0x7fa4df17d880 | 0x7fa4dea26440 | 4387064 | 4284369 | 65536 | 548382 | 548382 | 548382 | 0 | 302 | 548706 | 12730 | 1977 | 2855 | 547464 | 0 | 35893 | 512573 | 16455802842607 | 16459846747883 | 16459847098759 | 16455803405205 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 446 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 294208 | 0x7fa4df17d700 | 0x7fa4dea26480 | 4420232 | 4316073 | 65536 | 552528 | 552528 | 552528 | 0 | 302 | 550357 | 12746 | 1983 | 2850 | 548734 | 0 | 35631 | 513345 | 16455803416105 | 16459847172679 | 16459847523075 | 16455803975473 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 449 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 296960 | 0x7fa4e1915580 | 0x7fa4dea264c0 | 4505280 | 4392775 | 65536 | 563159 | 563159 | 563159 | 0 | 302 | 563312 | 13121 | 1988 | 2860 | 560110 | 0 | 36992 | 524418 | 16455803986203 | 16459847586275 | 16459847933631 | 16455804539921 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 452 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 299968 | 0x7fa4e1915400 | 0x7fa4dea26500 | 8328112 | 8221147 | 65536 | 1041013 | 1041013 | 1041013 | 0 | 302 | 1041397 | 16179 | 1994 | 2863 | 1039515 | 0 | 61225 | 978130 | 16455804550891 | 16459847995231 | 16459848656664 | 16455805412838 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 455 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 302720 | 0x7fa4e1915280 | 0x7fa4dea26540 | 5770040 | 5659672 | 65536 | 721254 | 721254 | 721254 | 0 | 302 | 721414 | 14162 | 1994 | 2862 | 718518 | 0 | 44774 | 674116 | 16455805431707 | 16459848727703 | 16459849174899 | 16455806053823 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 458 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 305472 | 0x7fa4e1915100 | 0x7fa4dea26580 | 5789080 | 5671472 | 65536 | 723634 | 723634 | 723634 | 0 | 302 | 723413 | 13942 | 2140 | 2887 | 720116 | 0 | 45522 | 675148 | 16455806069412 | 16459849236978 | 16459849702094 | 16455806744846 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 461 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 308224 | 0x7fa4df17df80 | 0x7fa4dea265c0 | 5779840 | 5665403 | 65536 | 722479 | 722479 | 722479 | 0 | 302 | 723012 | 14099 | 1985 | 2803 | 719455 | 0 | 45239 | 674724 | 16455806755986 | 16459849767533 | 16459850230248 | 16455807428240 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 464 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 310976 | 0x7fa4df17de00 | 0x7fa4dea26600 | 5815432 | 5696511 | 65536 | 726928 | 726928 | 726928 | 0 | 302 | 724965 | 13809 | 1978 | 3397 | 723415 | 0 | 45253 | 678914 | 16455807438810 | 16459850296328 | 16459850749123 | 16455808121763 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 467 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 313216 | 0x7fa4df17dc80 | 0x7fa4dea26640 | 10937192 | 10813152 | 65536 | 1367148 | 1367148 | 1367148 | 0 | 302 | 1368697 | 18219 | 2246 | 2858 | 1365600 | 0 | 77946 | 1288492 | 16455808132613 | 16459850814882 | 16459851685274 | 16455809225601 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 470 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 315968 | 0x7fa4df17db00 | 0x7fa4dea26680 | 10964392 | 10847231 | 65536 | 1370548 | 1370548 | 1370548 | 0 | 302 | 1370271 | 18344 | 1987 | 2860 | 1367847 | 0 | 77513 | 1289984 | 16455809244270 | 16459851757913 | 16459852611665 | 16455810335458 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 473 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 318720 | 0x7fa4df17d980 | 0x7fa4dea266c0 | 10992872 | 10863103 | 65536 | 1374108 | 1374108 | 1374108 | 0 | 302 | 1374472 | 18270 | 1990 | 2864 | 1371593 | 0 | 77740 | 1294475 | 16455810346737 | 16459852679664 | 16459853565255 | 16455811479784 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 476 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 321472 | 0x7fa4df17d800 | 0x7fa4dea26700 | 10966968 | 10847229 | 65536 | 1370870 | 1370870 | 1370870 | 0 | 302 | 1370743 | 18248 | 2458 | 2860 | 1367996 | 0 | 77467 | 1290818 | 16455811491423 | 16459853631814 | 16459854512286 | 16455812616200 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 479 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 324224 | 0x7fa4df17d680 | 0x7fa4dea26740 | 11011488 | 10884514 | 65536 | 1376435 | 1376435 | 1376435 | 0 | 302 | 1375492 | 18166 | 1990 | 2860 | 1372950 | 0 | 77321 | 1295109 | 16455812627040 | 16459854589245 | 16459855450036 | 16455813729097 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 482 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 326464 | 0x7fa4e1915500 | 0x7fa4dea26780 | 21315392 | 21186254 | 65536 | 2664423 | 2664423 | 2664423 | 0 | 302 | 2662927 | 26888 | 1984 | 2853 | 2661083 | 0 | 142564 | 2516378 | 16455813740147 | 16459855516916 | 16459857217059 | 16455815696001 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 485 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 329216 | 0x7fa4e1915380 | 0x7fa4dea267c0 | 21363016 | 21232803 | 65536 | 2670376 | 2670376 | 2670376 | 0 | 302 | 2668444 | 26984 | 1988 | 2868 | 2666482 | 0 | 142637 | 2525415 | 16455815715451 | 16459857313218 | 16459858979761 | 16455817626487 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 488 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 331968 | 0x7fa4e1915200 | 0x7fa4dea26800 | 21349320 | 21239724 | 65536 | 2668664 | 2668664 | 2668664 | 0 | 302 | 2670123 | 26928 | 1985 | 2857 | 2666445 | 0 | 144071 | 2524469 | 16455817637907 | 16459859072080 | 16459860798303 | 16455819622720 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 491 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 12 | 24 | 334720 | 0x7fa4e1915080 | 0x7fa4dea26840 | 21367280 | 21231303 | 65536 | 2670909 | 2670909 | 2670909 | 0 | 302 | 2670788 | 26949 | 1984 | 2862 | 2667713 | 0 | 143628 | 2523988 | 16455819634510 | 16459860911902 | 16459862627405 | 16455821603674 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 494 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 337472 | 0x7fa4df17df00 | 0x7fa4dea26880 | 21376560 | 21264518 | 65536 | 2672069 | 2672069 | 2672069 | 0 | 302 | 2672852 | 27184 | 1991 | 2772 | 2673975 | 0 | 143359 | 2526542 | 16455821614774 | 16459862726444 | 16459864400827 | 16455823540860 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 497 | 160404 | 160404 | 4194304 | 256 | 0 | 0 | 8 | 24 | 0 | 0x7fa4df17dd80 | 0x7fa4dea268c0 | 42063856 | 41926181 | 65536 | 5257981 | 5257981 | 5257981 | 0 | 302 | 5257452 | 44432 | 1981 | 2867 | 5255318 | 0 | 273863 | 4980459 | 16455823552009 | 16459864489946 | 16459867849753 | 16455827171680 |