58 KiB
58 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | SQ_CYCLES | SQ_BUSY_CYCLES | SQ_WAVES | GRBM_COUNT | GRBM_GUI_ACTIVE | SPI_CSN_WINDOW_VALID | SPI_CSN_BUSY | GRBM_SPI_BUSY | SPI_CSN_NUM_THREADGROUPS | SPI_CSN_WAVE | SPI_RA_REQ_NO_ALLOC | SPI_RA_REQ_NO_ALLOC_CSN | SPI_RA_RES_STALL_CSN | SPI_RA_TMP_STALL_CSN | SPI_RA_WAVE_SIMD_FULL_CSN | SPI_RA_VGPR_SIMD_FULL_CSN | SPI_RA_SGPR_SIMD_FULL_CSN | SPI_RA_LDS_CU_FULL_CSN | SPI_RA_BAR_CU_FULL_CSN | SPI_RA_TGLIM_CU_FULL_CSN | SPI_RA_WVLIM_STALL_CSN | SPI_SWC_CSC_WR | SPI_VWC_CSC_WR | SPI_RA_BULKY_CU_FULL_CSN | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBufferAligned.kd | 0 | 0 | 0 | 157596 | 157596 | 33554432 | 256 | 0 | 0 | 4 | 32 | 4160 | 0x0 | 0x7f3dcc204280 | 3029720 | 2939484 | 524288 | 378714 | 378714 | 3028009 | 2945891 | 370029 | 131072 | 524288 | 208770 | 534570 | 194898 | 0 | 9353793 | 0 | 0 | 0 | 0 | 0 | 0 | 1572864 | 524288 | 0 | 16379423217189 | 16385586870073 | 16385587107675 | 16379569394275 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 157596 | 157596 | 32768 | 256 | 0 | 0 | 12 | 24 | 13888 | 0x0 | 0x7f3dcc223f80 | 201600 | 101231 | 512 | 25199 | 25199 | 201592 | 109278 | 14871 | 128 | 512 | 6 | 518 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1024 | 512 | 0 | 16379574609650 | 16385591946586 | 16385591956026 | 16379574748737 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 5 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 14336 | 0x7f3dea9f1380 | 0x7f3dcc223fc0 | 1315896 | 1206067 | 65536 | 164486 | 164486 | 1315888 | 1214126 | 153755 | 16384 | 65536 | 221429 | 216568 | 70636 | 0 | 1718682 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379574791106 | 16385592030587 | 16385592123387 | 16379575140868 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 8 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15040 | 0x7f3dea9f1200 | 0x7f3dcc224000 | 2478592 | 2368147 | 65536 | 309823 | 309823 | 2478584 | 2376214 | 297449 | 16384 | 65536 | 496529 | 376181 | 197601 | 0 | 6890987 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379575185647 | 16385592157307 | 16385592336509 | 16379575570979 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 11 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15488 | 0x7f3dea9f1080 | 0x7f3dcc224040 | 2487080 | 2376122 | 65536 | 310884 | 310884 | 2487072 | 2384184 | 299574 | 16384 | 65536 | 497579 | 369761 | 187043 | 0 | 6723620 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379575609908 | 16385592361309 | 16385592541310 | 16379575993149 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 14 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 15936 | 0x7f3de834df00 | 0x7f3dcc224080 | 1327296 | 1217159 | 65536 | 165911 | 165911 | 1327288 | 1225232 | 152857 | 16384 | 65536 | 221335 | 213573 | 74232 | 0 | 1426120 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379576033108 | 16385592565790 | 16385592658591 | 16379576326582 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 17 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 16384 | 0x7f3de834dd80 | 0x7f3dcc2240c0 | 1313960 | 1205122 | 65536 | 164244 | 164244 | 1313952 | 1213196 | 152638 | 16384 | 65536 | 217056 | 210853 | 67897 | 0 | 1762992 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379576366091 | 16385592683871 | 16385592775392 | 16379576655325 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 20 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 17088 | 0x7f3de834dc00 | 0x7f3dcc224100 | 1319240 | 1205546 | 65536 | 164904 | 164904 | 1319232 | 1213610 | 151841 | 16384 | 65536 | 212410 | 202058 | 74444 | 0 | 2765592 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379576709154 | 16385592817632 | 16385592909632 | 16379576984308 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 23 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 16 | 24 | 17792 | 0x7f3de834da80 | 0x7f3dcc224140 | 2499384 | 2384647 | 65536 | 312422 | 312422 | 2499376 | 2392718 | 297225 | 16384 | 65536 | 495206 | 375773 | 198623 | 0 | 8129434 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379577023217 | 16385592947873 | 16385593127874 | 16379577402008 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 26 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 16 | 24 | 18496 | 0x7f3de834d900 | 0x7f3dcc224180 | 2487832 | 2368293 | 65536 | 310978 | 310978 | 2487824 | 2376372 | 295774 | 16384 | 65536 | 496190 | 394060 | 167531 | 0 | 6055127 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379577441047 | 16385593154434 | 16385593333955 | 16379577830609 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 29 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19200 | 0x7f3de834d780 | 0x7f3dcc2241c0 | 1315496 | 1208927 | 65536 | 164436 | 164436 | 1315488 | 1216994 | 153859 | 16384 | 65536 | 223300 | 217074 | 65245 | 0 | 1901639 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379577869538 | 16385593367556 | 16385593460676 | 16379578158202 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 32 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19904 | 0x7f3de834d600 | 0x7f3dcc224200 | 1317704 | 1208172 | 65536 | 164712 | 164712 | 1317696 | 1216248 | 152633 | 16384 | 65536 | 217901 | 213827 | 66710 | 0 | 1679930 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379578198671 | 16385593500036 | 16385593592197 | 16379578491444 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 35 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 20608 | 0x7f3dea9f1480 | 0x7f3dcc224240 | 1314760 | 1202146 | 65536 | 164344 | 164344 | 1314752 | 1210218 | 151846 | 16384 | 65536 | 173305 | 176785 | 58571 | 0 | 2592403 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379578539123 | 16385593629637 | 16385593721958 | 16379578834437 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 38 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 16 | 24 | 21312 | 0x7f3dea9f1300 | 0x7f3dcc224280 | 2463680 | 2356149 | 65536 | 307959 | 307959 | 2463672 | 2364220 | 298227 | 16384 | 65536 | 493694 | 376488 | 196227 | 0 | 8074511 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379578876186 | 16385593758278 | 16385593938279 | 16379579215098 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 41 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22016 | 0x7f3dea9f1180 | 0x7f3dcc2242c0 | 2471464 | 2361635 | 65536 | 308932 | 308932 | 2471456 | 2369712 | 296209 | 16384 | 65536 | 496129 | 379442 | 184922 | 0 | 6039943 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379579253647 | 16385593973800 | 16385594154441 | 16379579598160 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 44 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22720 | 0x7f3dea9f1000 | 0x7f3dcc224300 | 1305136 | 1199525 | 65536 | 163141 | 163141 | 1305128 | 1207594 | 152410 | 16384 | 65536 | 212758 | 207729 | 71735 | 0 | 2224259 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379579637919 | 16385594180521 | 16385594272842 | 16379579892213 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 47 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 23424 | 0x7f3de834de80 | 0x7f3dcc224340 | 1305040 | 1196013 | 65536 | 163129 | 163129 | 1305032 | 1204080 | 153185 | 16384 | 65536 | 200617 | 198730 | 65670 | 0 | 2005584 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379579930982 | 16385594303082 | 16385594394762 | 16379580192977 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 50 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 24128 | 0x7f3de834dd00 | 0x7f3dcc224380 | 1302920 | 1195572 | 65536 | 162864 | 162864 | 1302912 | 1203642 | 150726 | 16384 | 65536 | 193080 | 187563 | 57998 | 0 | 1805504 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379580239666 | 16385594428363 | 16385594519883 | 16379580489500 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 53 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 16 | 24 | 24832 | 0x7f3de834db80 | 0x7f3dcc2243c0 | 2468976 | 2362186 | 65536 | 308621 | 308621 | 2468968 | 2370256 | 297894 | 16384 | 65536 | 495001 | 378531 | 181640 | 0 | 8057723 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379580528799 | 16385594546763 | 16385594726925 | 16379580875261 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 56 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 16 | 24 | 25536 | 0x7f3de834da00 | 0x7f3dcc224400 | 2460200 | 2342057 | 65536 | 307524 | 307524 | 2460192 | 2350112 | 296441 | 16384 | 65536 | 496742 | 384686 | 171250 | 0 | 6254855 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379580914501 | 16385594754605 | 16385594934446 | 16379581259163 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 59 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26240 | 0x7f3de834d880 | 0x7f3dcc224440 | 1314952 | 1205410 | 65536 | 164368 | 164368 | 1314944 | 1213474 | 152297 | 16384 | 65536 | 201671 | 196741 | 70756 | 0 | 2588653 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379581298272 | 16385594959566 | 16385595052367 | 16379581555466 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 62 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26944 | 0x7f3de834d700 | 0x7f3dcc224480 | 1324776 | 1209248 | 65536 | 165596 | 165596 | 1324768 | 1217320 | 152185 | 16384 | 65536 | 185005 | 187871 | 71400 | 0 | 2215593 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379581595075 | 16385595095567 | 16385595187728 | 16379581855350 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 65 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 27648 | 0x7f3dea9f1580 | 0x7f3dcc2244c0 | 1297720 | 1193067 | 65536 | 162214 | 162214 | 1297712 | 1201132 | 151062 | 16384 | 65536 | 221333 | 208361 | 75932 | 0 | 2539487 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379581902839 | 16385595221808 | 16385595313489 | 16379582158963 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 68 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 16 | 24 | 28608 | 0x7f3dea9f1400 | 0x7f3dcc224500 | 2467032 | 2355444 | 65536 | 308378 | 308378 | 2467024 | 2363518 | 298905 | 16384 | 65536 | 494382 | 377217 | 202861 | 0 | 8665991 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379582202072 | 16385595342289 | 16385595521650 | 16379582549724 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 71 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 16 | 24 | 29312 | 0x7f3dea9f1280 | 0x7f3dcc224540 | 2445976 | 2340300 | 65536 | 305746 | 305746 | 2445968 | 2348374 | 298011 | 16384 | 65536 | 492998 | 387907 | 178061 | 0 | 5439418 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379582588874 | 16385595547250 | 16385595727731 | 16379582949466 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 74 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 30016 | 0x7f3dea9f1100 | 0x7f3dcc224580 | 1309256 | 1200447 | 65536 | 163656 | 163656 | 1309248 | 1208516 | 151291 | 16384 | 65536 | 214999 | 201101 | 79855 | 0 | 2503233 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379582989925 | 16385595754771 | 16385595847092 | 16379583245049 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 77 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 30976 | 0x7f3de834df80 | 0x7f3dcc2245c0 | 1293880 | 1184402 | 65536 | 161734 | 161734 | 1293872 | 1192466 | 150622 | 16384 | 65536 | 219533 | 215505 | 75035 | 0 | 2158198 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379583284578 | 16385595874292 | 16385595966293 | 16379583540273 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 80 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 31680 | 0x7f3de834de00 | 0x7f3dcc224600 | 1298328 | 1191955 | 65536 | 162290 | 162290 | 1298320 | 1200024 | 151399 | 16384 | 65536 | 208592 | 201316 | 60592 | 0 | 2056722 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379583585792 | 16385596000533 | 16385596092214 | 16379583838586 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 83 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 16 | 24 | 32640 | 0x7f3de834dc80 | 0x7f3dcc224640 | 2457520 | 2348568 | 65536 | 307189 | 307189 | 2457512 | 2356636 | 296105 | 16384 | 65536 | 488425 | 382844 | 179114 | 0 | 7552287 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379583878895 | 16385596121174 | 16385596300855 | 16379584236747 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 86 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 33600 | 0x7f3de834db00 | 0x7f3dcc224680 | 2442160 | 2335632 | 65536 | 305269 | 305269 | 2442152 | 2343698 | 296110 | 16384 | 65536 | 490563 | 364097 | 182624 | 0 | 6203191 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379584276806 | 16385596325655 | 16385596504856 | 16379584618029 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 89 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 34560 | 0x7f3de834d980 | 0x7f3dcc2246c0 | 1312584 | 1202548 | 65536 | 164072 | 164072 | 1312576 | 1210616 | 153073 | 16384 | 65536 | 140129 | 156875 | 73385 | 0 | 2712225 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379584660328 | 16385596531257 | 16385596623577 | 16379584918662 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 92 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 35520 | 0x7f3de834d800 | 0x7f3dcc224700 | 1309000 | 1195502 | 65536 | 163624 | 163624 | 1308992 | 1203574 | 152065 | 16384 | 65536 | 220588 | 213736 | 71920 | 0 | 2103336 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379584958441 | 16385596653017 | 16385596745818 | 16379585214686 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 95 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 36224 | 0x7f3de834d680 | 0x7f3dcc224740 | 1300264 | 1194710 | 65536 | 162532 | 162532 | 1300256 | 1202770 | 150619 | 16384 | 65536 | 207661 | 203689 | 78600 | 0 | 1909810 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379585260285 | 16385596778618 | 16385596870459 | 16379585515509 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 98 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 37184 | 0x7f3dea9f1500 | 0x7f3dcc224780 | 2453536 | 2343536 | 65536 | 306691 | 306691 | 2453528 | 2351606 | 294874 | 16384 | 65536 | 487823 | 359843 | 189432 | 0 | 8021365 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379585556028 | 16385596905659 | 16385597084860 | 16379585899451 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 101 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 38144 | 0x7f3dea9f1380 | 0x7f3dcc2247c0 | 2447200 | 2341809 | 65536 | 305899 | 305899 | 2447192 | 2349882 | 296778 | 16384 | 65536 | 488101 | 363940 | 184236 | 0 | 6089868 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379585938980 | 16385597113501 | 16385597293182 | 16379586293432 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 104 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 39104 | 0x7f3dea9f1200 | 0x7f3dcc224800 | 1305672 | 1195767 | 65536 | 163208 | 163208 | 1305664 | 1203834 | 152073 | 16384 | 65536 | 174098 | 177747 | 70860 | 0 | 1872309 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379586334721 | 16385597318302 | 16385597410943 | 16379586592635 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 107 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 40320 | 0x7f3dea9f1080 | 0x7f3dcc224840 | 1297680 | 1190581 | 65536 | 162209 | 162209 | 1297672 | 1198640 | 150278 | 16384 | 65536 | 168342 | 178828 | 60086 | 0 | 1728310 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379586632245 | 16385597436543 | 16385597528863 | 16379586891999 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 110 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 41280 | 0x7f3de834df00 | 0x7f3dcc224880 | 1296192 | 1187059 | 65536 | 162023 | 162023 | 1296184 | 1195118 | 151513 | 16384 | 65536 | 169362 | 176787 | 71853 | 0 | 1947105 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379586938828 | 16385597562144 | 16385597654784 | 16379587188942 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 113 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 42240 | 0x7f3de834dd80 | 0x7f3dcc2248c0 | 2446568 | 2337961 | 65536 | 305820 | 305820 | 2446560 | 2346032 | 294203 | 16384 | 65536 | 487466 | 393151 | 184887 | 0 | 7768177 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379587228761 | 16385597680064 | 16385597859586 | 16379587573604 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 116 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 43200 | 0x7f3de834dc00 | 0x7f3dcc224900 | 2453192 | 2347351 | 65536 | 306648 | 306648 | 2453184 | 2355420 | 295995 | 16384 | 65536 | 490333 | 386410 | 161763 | 0 | 6315375 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379587611953 | 16385597889826 | 16385598069187 | 16379587964155 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 119 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 44160 | 0x7f3de834da80 | 0x7f3dcc224940 | 1307000 | 1197745 | 65536 | 163374 | 163374 | 1306992 | 1205816 | 151290 | 16384 | 65536 | 167419 | 170070 | 75452 | 0 | 1823690 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379588003664 | 16385598117347 | 16385598209988 | 16379588262109 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 122 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 45376 | 0x7f3de834d900 | 0x7f3dcc224980 | 1298904 | 1190785 | 65536 | 162362 | 162362 | 1298896 | 1198850 | 150953 | 16384 | 65536 | 198674 | 192443 | 77176 | 0 | 2160543 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379588301038 | 16385598237188 | 16385598330469 | 16379588556712 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 125 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 46336 | 0x7f3de834d780 | 0x7f3dcc2249c0 | 1304288 | 1196001 | 65536 | 163035 | 163035 | 1304280 | 1204052 | 150502 | 16384 | 65536 | 161121 | 171144 | 71663 | 0 | 2104592 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379588602921 | 16385598364069 | 16385598456069 | 16379588861875 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 128 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 47552 | 0x7f3de834d600 | 0x7f3dcc224a00 | 2438184 | 2324736 | 65536 | 304772 | 304772 | 2438176 | 2332804 | 293422 | 16384 | 65536 | 485052 | 379149 | 177410 | 0 | 7694503 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379588902855 | 16385598488230 | 16385598669031 | 16379589245187 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 131 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 48512 | 0x7f3dea9f1480 | 0x7f3dcc224a40 | 2437272 | 2326659 | 65536 | 304658 | 304658 | 2437264 | 2334730 | 291290 | 16384 | 65536 | 483810 | 383202 | 178688 | 0 | 5553009 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379589284116 | 16385598692391 | 16385598871912 | 16379589627699 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 134 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 49472 | 0x7f3dea9f1300 | 0x7f3dcc224a80 | 1298296 | 1189135 | 65536 | 162286 | 162286 | 1298288 | 1197196 | 149726 | 16384 | 65536 | 217673 | 213763 | 73198 | 0 | 1698309 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379589666978 | 16385598895432 | 16385598988233 | 16379589924362 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 137 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 50688 | 0x7f3dea9f1180 | 0x7f3dcc224ac0 | 1291800 | 1183457 | 65536 | 161474 | 161474 | 1291792 | 1191520 | 149494 | 16384 | 65536 | 217133 | 203062 | 74615 | 0 | 2089957 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379589965051 | 16385599011753 | 16385599103754 | 16379590232715 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 140 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 51648 | 0x7f3dea9f1000 | 0x7f3dcc224b00 | 1289264 | 1180475 | 65536 | 161157 | 161157 | 1289256 | 1188542 | 149275 | 16384 | 65536 | 197469 | 196685 | 70136 | 0 | 2115563 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379590279534 | 16385599135754 | 16385599227595 | 16379590536259 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 143 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 52864 | 0x7f3de834de80 | 0x7f3dcc224b40 | 2424312 | 2317831 | 65536 | 303038 | 303038 | 2424304 | 2325896 | 291953 | 16384 | 65536 | 483567 | 374668 | 176851 | 0 | 7427926 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379590575008 | 16385599253515 | 16385599431596 | 16379590922300 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 146 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 54080 | 0x7f3de834dd00 | 0x7f3dcc224b80 | 2404344 | 2297016 | 65536 | 300542 | 300542 | 2404336 | 2305082 | 290729 | 16384 | 65536 | 481950 | 371166 | 168079 | 0 | 5611247 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379590960819 | 16385599456876 | 16385599634797 | 16379591303482 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 149 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 55296 | 0x7f3de834db80 | 0x7f3dcc224bc0 | 1298040 | 1186227 | 65536 | 162254 | 162254 | 1298032 | 1194288 | 151177 | 16384 | 65536 | 174658 | 177614 | 72460 | 0 | 2201961 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379591342111 | 16385599657997 | 16385599750158 | 16379591599445 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 152 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 56768 | 0x7f3de834da00 | 0x7f3dcc224c00 | 1290224 | 1183477 | 65536 | 161277 | 161277 | 1290216 | 1191550 | 150499 | 16384 | 65536 | 210396 | 198916 | 75451 | 0 | 2037006 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379591638484 | 16385599773998 | 16385599867279 | 16379591896978 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 155 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 57728 | 0x7f3de834d880 | 0x7f3dcc224c40 | 1294744 | 1184779 | 65536 | 161842 | 161842 | 1294736 | 1192838 | 150841 | 16384 | 65536 | 212677 | 206209 | 63480 | 0 | 1999870 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379591943827 | 16385599899439 | 16385599991280 | 16379592198642 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 158 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 58944 | 0x7f3de834d700 | 0x7f3dcc224c80 | 2425120 | 2315637 | 65536 | 303139 | 303139 | 2425112 | 2323712 | 289383 | 16384 | 65536 | 483831 | 371077 | 167893 | 0 | 6909018 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379592237081 | 16385600014640 | 16385600194161 | 16379592580783 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 161 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 60160 | 0x7f3dea9f1580 | 0x7f3dcc224cc0 | 2416776 | 2306047 | 65536 | 302096 | 302096 | 2416768 | 2314124 | 292519 | 16384 | 65536 | 485732 | 374137 | 166176 | 0 | 5748730 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379592619492 | 16385600217361 | 16385600398482 | 16379592961815 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 164 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 61376 | 0x7f3dea9f1400 | 0x7f3dcc224d00 | 1299216 | 1191081 | 65536 | 162401 | 162401 | 1299208 | 1199150 | 151174 | 16384 | 65536 | 217346 | 206768 | 77540 | 0 | 2341716 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379593001164 | 16385600422163 | 16385600515603 | 16379593264728 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 167 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 62848 | 0x7f3dea9f1280 | 0x7f3dcc224d40 | 1346328 | 1231969 | 65536 | 168290 | 168290 | 1346320 | 1240040 | 156214 | 16384 | 65536 | 226623 | 208328 | 87217 | 0 | 2572926 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379593305157 | 16385600541523 | 16385600639284 | 16379593566331 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 170 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 63808 | 0x7f3dea9f1100 | 0x7f3dcc224d80 | 1303216 | 1194192 | 65536 | 162901 | 162901 | 1303208 | 1202264 | 150841 | 16384 | 65536 | 157641 | 169743 | 55144 | 0 | 1655311 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379593613850 | 16385600671444 | 16385600764085 | 16379593859775 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 173 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 65024 | 0x7f3de834df80 | 0x7f3dcc224dc0 | 2405208 | 2298224 | 65536 | 300650 | 300650 | 2405200 | 2306280 | 291959 | 16384 | 65536 | 483407 | 370114 | 165571 | 0 | 6947586 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379593898384 | 16385600791285 | 16385600970166 | 16379594239426 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 176 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 66240 | 0x7f3de834de00 | 0x7f3dcc224e00 | 2411328 | 2301803 | 65536 | 301415 | 301415 | 2411320 | 2309866 | 288369 | 16384 | 65536 | 462204 | 369797 | 169267 | 0 | 5471523 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379594279616 | 16385600994326 | 16385601172888 | 16379594625468 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 179 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 67456 | 0x7f3de834dc80 | 0x7f3dcc224e40 | 1305696 | 1196160 | 65536 | 163211 | 163211 | 1305688 | 1204226 | 150502 | 16384 | 65536 | 199580 | 197267 | 74129 | 0 | 1887809 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379594665227 | 16385601197688 | 16385601290328 | 16379594923561 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 182 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 68928 | 0x7f3de834db00 | 0x7f3dcc224e80 | 1411480 | 1296526 | 65536 | 176434 | 176434 | 1411472 | 1304590 | 163610 | 16384 | 65536 | 241271 | 218652 | 90426 | 0 | 2795082 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379594962351 | 16385601313689 | 16385601415609 | 16379595230995 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 185 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 70144 | 0x7f3de834d980 | 0x7f3dcc224ec0 | 1283048 | 1176376 | 65536 | 160380 | 160380 | 1283040 | 1184456 | 149166 | 16384 | 65536 | 198760 | 193471 | 65774 | 0 | 1937504 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379595277004 | 16385601448249 | 16385601540410 | 16379595532018 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 188 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 71616 | 0x7f3de834d800 | 0x7f3dcc224f00 | 2426656 | 2322186 | 65536 | 303331 | 303331 | 2426648 | 2330252 | 290619 | 16384 | 65536 | 484162 | 375552 | 171290 | 0 | 7043997 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379595570417 | 16385601564570 | 16385601744251 | 16379595913900 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 191 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 72832 | 0x7f3de834d680 | 0x7f3dcc224f40 | 2430984 | 2321833 | 65536 | 303872 | 303872 | 2430976 | 2329888 | 291515 | 16384 | 65536 | 483873 | 378245 | 177409 | 0 | 6810875 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379595952229 | 16385601768252 | 16385601946653 | 16379596298411 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 194 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 74048 | 0x7f3dea9f1500 | 0x7f3dcc224f80 | 1283536 | 1177772 | 65536 | 160441 | 160441 | 1283528 | 1185854 | 148378 | 16384 | 65536 | 212766 | 197239 | 68130 | 0 | 1890822 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379596336420 | 16385601970013 | 16385602061854 | 16379596593055 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 197 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 75776 | 0x7f3dea9f1380 | 0x7f3dcc224fc0 | 1450968 | 1338829 | 65536 | 181370 | 181370 | 1450960 | 1346898 | 170110 | 16384 | 65536 | 250213 | 219640 | 98233 | 0 | 2973253 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379596632774 | 16385602101854 | 16385602207454 | 16379596909128 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 200 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 76992 | 0x7f3dea9f1200 | 0x7f3dcc225000 | 1299600 | 1186012 | 65536 | 162449 | 162449 | 1299592 | 1194074 | 150506 | 16384 | 65536 | 194648 | 191708 | 78271 | 0 | 2189202 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379596970006 | 16385602239935 | 16385602332735 | 16379597209811 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 203 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 78464 | 0x7f3dea9f1080 | 0x7f3dcc225040 | 2405496 | 2299362 | 65536 | 300686 | 300686 | 2405488 | 2307430 | 290947 | 16384 | 65536 | 480174 | 380023 | 182373 | 0 | 6898849 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379597248780 | 16385602356575 | 16385602537057 | 16379597604942 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 206 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 79936 | 0x7f3de834df00 | 0x7f3dcc225080 | 2432728 | 2313743 | 65536 | 304090 | 304090 | 2432720 | 2321804 | 290617 | 16384 | 65536 | 482939 | 372946 | 176453 | 0 | 7079992 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379597644922 | 16385602560257 | 16385602740738 | 16379597992274 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 209 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 81408 | 0x7f3de834dd80 | 0x7f3dcc2250c0 | 1290904 | 1180991 | 65536 | 161362 | 161362 | 1290896 | 1189056 | 151401 | 16384 | 65536 | 198006 | 192853 | 71997 | 0 | 2247819 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379598030503 | 16385602765218 | 16385602858979 | 16379598312307 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 212 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 83136 | 0x7f3de834dc00 | 0x7f3dcc225100 | 1518944 | 1410870 | 65536 | 189867 | 189867 | 1518936 | 1418940 | 179395 | 16384 | 65536 | 268230 | 241681 | 108622 | 0 | 3263810 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379598351076 | 16385602881539 | 16385602993380 | 16379598629160 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 215 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 84352 | 0x7f3de834da80 | 0x7f3dcc225140 | 1287440 | 1179956 | 65536 | 160929 | 160929 | 1287432 | 1188014 | 149611 | 16384 | 65536 | 201148 | 195246 | 74228 | 0 | 2107903 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379598676539 | 16385603040740 | 16385603133381 | 16379598934173 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 218 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 85824 | 0x7f3de834d900 | 0x7f3dcc225180 | 2400984 | 2295338 | 65536 | 300122 | 300122 | 2400976 | 2303392 | 289489 | 16384 | 65536 | 486047 | 376242 | 193303 | 0 | 7606641 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379598972472 | 16385603158021 | 16385603336422 | 16379599315425 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 221 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 87296 | 0x7f3de834d780 | 0x7f3dcc2251c0 | 2399664 | 2293217 | 65536 | 299957 | 299957 | 2399656 | 2301286 | 290286 | 16384 | 65536 | 480805 | 368507 | 173584 | 0 | 6789866 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379599355244 | 16385603360262 | 16385603538823 | 16379599697726 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 224 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 88768 | 0x7f3de834d600 | 0x7f3dcc225200 | 1294936 | 1188831 | 65536 | 161866 | 161866 | 1294928 | 1196902 | 149268 | 16384 | 65536 | 192348 | 186924 | 71065 | 0 | 2291344 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379599736096 | 16385603562024 | 16385603656104 | 16379599992240 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 227 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 90752 | 0x7f3dea9f1480 | 0x7f3dcc225240 | 1601840 | 1491590 | 65536 | 200229 | 200229 | 1601832 | 1499672 | 188025 | 16384 | 65536 | 288711 | 256866 | 96066 | 0 | 3169674 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379600032789 | 16385603679464 | 16385603798345 | 16379600313363 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 230 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 91968 | 0x7f3dea9f1300 | 0x7f3dcc225280 | 1286856 | 1177881 | 65536 | 160856 | 160856 | 1286848 | 1185956 | 149489 | 16384 | 65536 | 200612 | 186180 | 71145 | 0 | 2161508 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379600359362 | 16385603830505 | 16385603922986 | 16379600607966 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 233 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 93440 | 0x7f3dea9f1180 | 0x7f3dcc2252c0 | 2404880 | 2298405 | 65536 | 300609 | 300609 | 2404872 | 2306474 | 289827 | 16384 | 65536 | 486620 | 381939 | 172515 | 0 | 7334944 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379600648785 | 16385603946506 | 16385604125387 | 16379600994858 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 236 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 94912 | 0x7f3dea9f1000 | 0x7f3dcc225300 | 2391880 | 2285267 | 65536 | 298984 | 298984 | 2391872 | 2293324 | 290391 | 16384 | 65536 | 478763 | 362817 | 164676 | 0 | 5327998 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379601034507 | 16385604148587 | 16385604327789 | 16379601375999 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 239 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 96384 | 0x7f3de834de80 | 0x7f3dcc225340 | 1297544 | 1188587 | 65536 | 162192 | 162192 | 1297536 | 1196668 | 150057 | 16384 | 65536 | 213119 | 201422 | 77212 | 0 | 2176680 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379601415369 | 16385604350989 | 16385604445389 | 16379601673063 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 242 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 98368 | 0x7f3de834dd00 | 0x7f3dcc225380 | 1681952 | 1574696 | 65536 | 210243 | 210243 | 1681944 | 1582768 | 198218 | 16384 | 65536 | 302828 | 259556 | 103541 | 0 | 3627947 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379601711412 | 16385604468590 | 16385604592270 | 16379601998876 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 245 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 99584 | 0x7f3de834db80 | 0x7f3dcc2253c0 | 1289792 | 1182395 | 65536 | 161223 | 161223 | 1289784 | 1190454 | 149502 | 16384 | 65536 | 212748 | 199143 | 78633 | 0 | 1907162 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379602044405 | 16385604621071 | 16385604713391 | 16379602305749 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 248 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 101312 | 0x7f3de834da00 | 0x7f3dcc225400 | 2378400 | 2272472 | 65536 | 297299 | 297299 | 2378392 | 2280546 | 287150 | 16384 | 65536 | 478442 | 359998 | 179471 | 0 | 6899685 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379602344458 | 16385604737391 | 16385604915473 | 16379602689901 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 251 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 102784 | 0x7f3de834d880 | 0x7f3dcc225440 | 2392160 | 2284317 | 65536 | 299019 | 299019 | 2392152 | 2292378 | 287587 | 16384 | 65536 | 477936 | 364715 | 168210 | 0 | 5775901 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379602729530 | 16385604939313 | 16385605120114 | 16379603074912 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 254 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 104256 | 0x7f3de834d700 | 0x7f3dcc225480 | 1290976 | 1183221 | 65536 | 161371 | 161371 | 1290968 | 1191288 | 148379 | 16384 | 65536 | 213615 | 199496 | 74253 | 0 | 2293766 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379603113861 | 16385605143474 | 16385605236915 | 16379603377015 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 257 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 106240 | 0x7f3dea9f1580 | 0x7f3dcc2254c0 | 1750336 | 1639898 | 65536 | 218791 | 218791 | 1750328 | 1647980 | 205726 | 16384 | 65536 | 323371 | 274417 | 116387 | 0 | 3624211 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379603416405 | 16385605261555 | 16385605390836 | 16379603709218 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 260 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 107712 | 0x7f3dea9f1400 | 0x7f3dcc225500 | 1280784 | 1171863 | 65536 | 160097 | 160097 | 1280776 | 1179928 | 148379 | 16384 | 65536 | 196898 | 187077 | 84008 | 0 | 2149253 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379603756757 | 16385605422996 | 16385605515317 | 16379604009201 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 263 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 16 | 24 | 109440 | 0x7f3dea9f1280 | 0x7f3dcc225540 | 2398336 | 2290031 | 65536 | 299791 | 299791 | 2398328 | 2298080 | 286030 | 16384 | 65536 | 474834 | 365746 | 178029 | 0 | 6636015 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379604048101 | 16385605541237 | 16385605720438 | 16379604389813 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 266 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 16 | 24 | 111168 | 0x7f3dea9f1100 | 0x7f3dcc225580 | 2393544 | 2282079 | 65536 | 299192 | 299192 | 2393536 | 2290148 | 287923 | 16384 | 65536 | 473196 | 362650 | 171796 | 0 | 6405672 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379604429222 | 16385605743638 | 16385605923159 | 16379604772775 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 269 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 112896 | 0x7f3de834df80 | 0x7f3dcc2255c0 | 1304592 | 1194427 | 65536 | 163073 | 163073 | 1304584 | 1202494 | 149275 | 16384 | 65536 | 214460 | 197132 | 79758 | 0 | 2618819 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379604812154 | 16385605947959 | 16385606042680 | 16379605070708 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 272 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 115136 | 0x7f3de834de00 | 0x7f3dcc225600 | 1831480 | 1720900 | 65536 | 228934 | 228934 | 1831472 | 1728970 | 216810 | 16384 | 65536 | 344903 | 279756 | 94376 | 0 | 3019622 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379605109467 | 16385606066680 | 16385606202521 | 16379605425790 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 275 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 116608 | 0x7f3de834dc80 | 0x7f3dcc225640 | 1284488 | 1175677 | 65536 | 160560 | 160560 | 1284480 | 1183750 | 149271 | 16384 | 65536 | 210451 | 196325 | 81960 | 0 | 2598835 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379605471649 | 16385606235641 | 16385606328922 | 16379605727804 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 278 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 16 | 24 | 118336 | 0x7f3de834db00 | 0x7f3dcc225680 | 2392784 | 2282810 | 65536 | 299097 | 299097 | 2392776 | 2290886 | 286363 | 16384 | 65536 | 475063 | 368912 | 180791 | 0 | 6612142 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379605767163 | 16385606352442 | 16385606531963 | 16379606115745 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 281 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 16 | 24 | 120064 | 0x7f3de834d980 | 0x7f3dcc2256c0 | 2386376 | 2269809 | 65536 | 298296 | 298296 | 2386368 | 2277890 | 286139 | 16384 | 65536 | 478103 | 363512 | 178482 | 0 | 6051441 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379606155464 | 16385606555163 | 16385606736445 | 16379606503717 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 284 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 121792 | 0x7f3de834d800 | 0x7f3dcc225700 | 1311032 | 1199293 | 65536 | 163878 | 163878 | 1311024 | 1207370 | 150951 | 16384 | 65536 | 214181 | 201802 | 86038 | 0 | 2737743 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379606542186 | 16385606760925 | 16385606855965 | 16379606805780 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 287 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 124032 | 0x7f3de834d680 | 0x7f3dcc225740 | 1899856 | 1791113 | 65536 | 237481 | 237481 | 1899848 | 1799182 | 226106 | 16384 | 65536 | 359156 | 306594 | 143274 | 0 | 4048236 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379606845589 | 16385606879486 | 16385607021247 | 16379607150342 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 290 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 125504 | 0x7f3dea9f1500 | 0x7f3dcc225780 | 1283712 | 1174406 | 65536 | 160463 | 160463 | 1283704 | 1182468 | 148259 | 16384 | 65536 | 209404 | 197523 | 75900 | 0 | 2180962 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379607197101 | 16385607053407 | 16385607146687 | 16379607444956 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 293 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 127488 | 0x7f3dea9f1380 | 0x7f3dcc2257c0 | 2372016 | 2266947 | 65536 | 296501 | 296501 | 2372008 | 2275008 | 285351 | 16384 | 65536 | 475456 | 369400 | 169693 | 0 | 5386591 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379607484035 | 16385607170048 | 16385607348609 | 16379607825967 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 296 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 129216 | 0x7f3dea9f1200 | 0x7f3dcc225800 | 2375952 | 2266861 | 65536 | 296993 | 296993 | 2375944 | 2274934 | 285577 | 16384 | 65536 | 477417 | 356996 | 175317 | 0 | 5204489 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379607865856 | 16385607371969 | 16385607551330 | 16379608209779 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 299 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 130944 | 0x7f3dea9f1080 | 0x7f3dcc225840 | 1353648 | 1245777 | 65536 | 169205 | 169205 | 1353640 | 1253848 | 157562 | 16384 | 65536 | 232629 | 210088 | 86430 | 0 | 2665226 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379608249018 | 16385607575010 | 16385607673571 | 16379608511952 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 302 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 133440 | 0x7f3de834df00 | 0x7f3dcc225880 | 2062936 | 1954975 | 65536 | 257866 | 257866 | 2062928 | 1963048 | 247161 | 16384 | 65536 | 400574 | 315530 | 152777 | 0 | 5235139 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379608550411 | 16385607696771 | 16385607849732 | 16379608872744 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 305 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 134912 | 0x7f3de834dd80 | 0x7f3dcc2258c0 | 1281440 | 1175959 | 65536 | 160179 | 160179 | 1281432 | 1184024 | 148823 | 16384 | 65536 | 194749 | 182831 | 79287 | 0 | 2169558 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379608920463 | 16385607881892 | 16385607975013 | 16379609177717 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 308 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 136896 | 0x7f3de834dc00 | 0x7f3dcc225900 | 2367168 | 2259910 | 65536 | 295895 | 295895 | 2367160 | 2267972 | 285239 | 16384 | 65536 | 473707 | 363676 | 183915 | 0 | 5099670 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379609217206 | 16385607998533 | 16385608177254 | 16379609560549 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 311 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 138880 | 0x7f3de834da80 | 0x7f3dcc225940 | 2358928 | 2252255 | 65536 | 294865 | 294865 | 2358920 | 2260330 | 284011 | 16384 | 65536 | 473156 | 362920 | 181326 | 0 | 6314406 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379609601188 | 16385608201734 | 16385608381096 | 16379609941920 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 314 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 140864 | 0x7f3de834d900 | 0x7f3dcc225980 | 1425368 | 1314048 | 65536 | 178170 | 178170 | 1425360 | 1322122 | 167299 | 16384 | 65536 | 247857 | 208300 | 104583 | 0 | 3269764 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379609980809 | 16385608404136 | 16385608507817 | 16379610262873 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 317 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 143616 | 0x7f3de834d780 | 0x7f3dcc2259c0 | 2226104 | 2107071 | 65536 | 278262 | 278262 | 2226096 | 2115136 | 265747 | 16384 | 65536 | 435777 | 338614 | 159166 | 0 | 6756875 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379610301832 | 16385608531177 | 16385608699178 | 16379610637735 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 320 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 145344 | 0x7f3de834d600 | 0x7f3dcc225a00 | 1281096 | 1172557 | 65536 | 160136 | 160136 | 1281088 | 1180620 | 148826 | 16384 | 65536 | 215513 | 196534 | 78860 | 0 | 2384624 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379610684504 | 16385608731818 | 16385608826219 | 16379610936358 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 323 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 147584 | 0x7f3dea9f1480 | 0x7f3dcc225a40 | 2364584 | 2257138 | 65536 | 295572 | 295572 | 2364576 | 2265206 | 283897 | 16384 | 65536 | 459417 | 365859 | 162623 | 0 | 5688963 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379610974938 | 16385608849579 | 16385609028620 | 16379611330180 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 326 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 149568 | 0x7f3dea9f1300 | 0x7f3dcc225a80 | 2363288 | 2256665 | 65536 | 295410 | 295410 | 2363280 | 2264742 | 284574 | 16384 | 65536 | 473627 | 358030 | 190920 | 0 | 6042041 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379611369779 | 16385609051500 | 16385609231661 | 16379611718691 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 329 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 151552 | 0x7f3dea9f1180 | 0x7f3dcc225ac0 | 1483888 | 1372950 | 65536 | 185485 | 185485 | 1483880 | 1381020 | 173806 | 16384 | 65536 | 260228 | 217785 | 95942 | 0 | 2575369 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379611758360 | 16385609254541 | 16385609363182 | 16379612034584 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 332 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 154304 | 0x7f3dea9f1000 | 0x7f3dcc225b00 | 2370648 | 2263835 | 65536 | 296330 | 296330 | 2370640 | 2271898 | 285582 | 16384 | 65536 | 473174 | 368716 | 206243 | 0 | 7495782 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379612074013 | 16385609386062 | 16385609565104 | 16379612419176 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 335 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 156288 | 0x7f3de834de80 | 0x7f3dcc225b40 | 1371784 | 1260303 | 65536 | 171472 | 171472 | 1371776 | 1268364 | 160471 | 16384 | 65536 | 226766 | 205611 | 93896 | 0 | 2547581 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379612464885 | 16385609597264 | 16385609697264 | 16379612723559 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 338 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 158784 | 0x7f3de834dd00 | 0x7f3dcc225b80 | 2376992 | 2266589 | 65536 | 297123 | 297123 | 2376984 | 2274666 | 286474 | 16384 | 65536 | 476990 | 366154 | 165615 | 0 | 4104042 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379612762598 | 16385609721425 | 16385609903666 | 16379613109491 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 341 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 161024 | 0x7f3de834db80 | 0x7f3dcc225bc0 | 2368872 | 2256275 | 65536 | 296108 | 296108 | 2368864 | 2264342 | 285689 | 16384 | 65536 | 468633 | 366455 | 165539 | 0 | 4748244 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379613149150 | 16385609927346 | 16385610109907 | 16379613492412 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 344 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 163264 | 0x7f3de834da00 | 0x7f3dcc225c00 | 1644944 | 1530233 | 65536 | 205617 | 205617 | 1644936 | 1538292 | 193289 | 16384 | 65536 | 298631 | 256610 | 124890 | 0 | 4551404 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379613530781 | 16385610134387 | 16385610255508 | 16379613820105 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 347 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 166528 | 0x7f3de834d880 | 0x7f3dcc225c40 | 2704192 | 2594708 | 65536 | 338023 | 338023 | 2704184 | 2602770 | 327242 | 16384 | 65536 | 550421 | 363537 | 206090 | 0 | 7494873 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379613859064 | 16385610278708 | 16385610483350 | 16379614260375 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 350 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 168512 | 0x7f3de834d700 | 0x7f3dcc225c80 | 1478872 | 1362954 | 65536 | 184858 | 184858 | 1478864 | 1371016 | 170547 | 16384 | 65536 | 255950 | 222362 | 98098 | 0 | 3133597 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379614284165 | 16385610526390 | 16385610635191 | 16379614562249 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 353 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 171264 | 0x7f3dea9f1580 | 0x7f3dcc225cc0 | 2375072 | 2238561 | 65536 | 296883 | 296883 | 2375064 | 2246630 | 286359 | 16384 | 65536 | 416000 | 325903 | 154173 | 0 | 4865551 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379614601278 | 16385610661751 | 16385610844472 | 16379614951250 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 356 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 173760 | 0x7f3dea9f1400 | 0x7f3dcc225d00 | 2394736 | 2263194 | 65536 | 299341 | 299341 | 2394728 | 2271262 | 288822 | 16384 | 65536 | 477768 | 366338 | 174380 | 0 | 4880134 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379614990689 | 16385610867672 | 16385611050553 | 16379615337962 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 359 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 176256 | 0x7f3dea9f1280 | 0x7f3dcc225d40 | 1794904 | 1684142 | 65536 | 224362 | 224362 | 1794896 | 1692228 | 210985 | 16384 | 65536 | 334601 | 266531 | 127758 | 0 | 5027938 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379615376661 | 16385611073914 | 16385611206555 | 16379615677454 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 362 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 179776 | 0x7f3dea9f1100 | 0x7f3dcc225d80 | 3024704 | 2912578 | 65536 | 378087 | 378087 | 3024696 | 2920660 | 366779 | 16384 | 65536 | 629390 | 444348 | 244899 | 0 | 10218817 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379615717353 | 16385611230235 | 16385611460316 | 16379616128424 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 365 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 182272 | 0x7f3de834df80 | 0x7f3dcc225dc0 | 1684088 | 1575694 | 65536 | 210510 | 210510 | 1684080 | 1583766 | 198782 | 16384 | 65536 | 309685 | 258937 | 106683 | 0 | 3676562 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379616159203 | 16385611502556 | 16385611627837 | 16379616445587 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 368 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 185536 | 0x7f3de834de00 | 0x7f3dcc225e00 | 2455912 | 2328413 | 65536 | 306988 | 306988 | 2455904 | 2336478 | 296442 | 16384 | 65536 | 482767 | 377503 | 177707 | 0 | 4504355 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379616485766 | 16385611651997 | 16385611843039 | 16379616853338 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 371 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 188544 | 0x7f3de834dc80 | 0x7f3dcc225e40 | 2492912 | 2352511 | 65536 | 311613 | 311613 | 2492904 | 2360570 | 301811 | 16384 | 65536 | 493141 | 388705 | 152405 | 0 | 5006123 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379616879948 | 16385611866719 | 16385612061120 | 16379617254009 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 374 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 191552 | 0x7f3de834db00 | 0x7f3dcc225e80 | 2101472 | 1993856 | 65536 | 262683 | 262683 | 2101464 | 2001920 | 252874 | 16384 | 65536 | 409581 | 304603 | 164096 | 0 | 6027846 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379617281779 | 16385612086880 | 16385612245601 | 16379617607102 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 377 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 195840 | 0x7f3de834d980 | 0x7f3dcc225ec0 | 3668344 | 3558143 | 65536 | 458542 | 458542 | 3668336 | 3566210 | 447754 | 16384 | 65536 | 779833 | 536099 | 398411 | 0 | 13741707 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379617644971 | 16385612268642 | 16385612549923 | 16379618112140 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 380 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 198592 | 0x7f3de834d800 | 0x7f3dcc225f00 | 1941920 | 1830929 | 65536 | 242739 | 242739 | 1941912 | 1839000 | 230814 | 16384 | 65536 | 358977 | 293050 | 168042 | 0 | 6376357 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379618140260 | 16385612596004 | 16385612742405 | 16379618448773 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 383 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 202368 | 0x7f3de834d680 | 0x7f3dcc225f40 | 2677648 | 2558136 | 65536 | 334705 | 334705 | 2677640 | 2566208 | 325337 | 16384 | 65536 | 444112 | 349317 | 174871 | 0 | 5231785 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379618487692 | 16385612766885 | 16385612978406 | 16379618876554 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 386 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 205888 | 0x7f3dea9f1500 | 0x7f3dcc225f80 | 2652192 | 2529926 | 65536 | 331523 | 331523 | 2652184 | 2538000 | 320518 | 16384 | 65536 | 535629 | 411809 | 179547 | 0 | 5698510 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379618902043 | 16385613015207 | 16385613225448 | 16379619287375 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 389 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 209408 | 0x7f3dea9f1380 | 0x7f3dcc225fc0 | 2438776 | 2331207 | 65536 | 304846 | 304846 | 2438768 | 2339264 | 293422 | 16384 | 65536 | 486468 | 373901 | 184576 | 0 | 7565166 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379619314444 | 16385613263688 | 16385613447529 | 16379619666246 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 392 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 214464 | 0x7f3dea9f1200 | 0x7f3dcc226000 | 4322688 | 4210372 | 65536 | 540335 | 540335 | 4322680 | 4218434 | 527834 | 16384 | 65536 | 932507 | 636597 | 406076 | 0 | 14859224 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379619704995 | 16385613472170 | 16385613804812 | 16379620221704 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 395 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 217728 | 0x7f3dea9f1080 | 0x7f3dcc226040 | 2171800 | 2060291 | 65536 | 271474 | 271474 | 2171792 | 2068350 | 258366 | 16384 | 65536 | 423854 | 326862 | 148374 | 0 | 6300615 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379620256873 | 16385613844812 | 16385614009133 | 16379620587296 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 398 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 222016 | 0x7f3de834df00 | 0x7f3dcc226080 | 2933112 | 2810434 | 65536 | 366638 | 366638 | 2933104 | 2818494 | 355798 | 16384 | 65536 | 594492 | 455512 | 222408 | 0 | 6212397 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379620625395 | 16385614033293 | 16385614261775 | 16379621033096 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 401 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 226048 | 0x7f3de834dd80 | 0x7f3dcc2260c0 | 2943312 | 2828623 | 65536 | 367913 | 367913 | 2943304 | 2836686 | 354910 | 16384 | 65536 | 600993 | 422346 | 222980 | 0 | 7081670 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379621060136 | 16385614299535 | 16385614529137 | 16379621467777 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 404 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 230080 | 0x7f3de834dc00 | 0x7f3dcc226100 | 2762456 | 2653065 | 65536 | 345306 | 345306 | 2762448 | 2661128 | 334969 | 16384 | 65536 | 568994 | 392487 | 242438 | 0 | 9169676 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379621495416 | 16385614565297 | 16385614774738 | 16379621885997 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 407 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 235904 | 0x7f3de834da80 | 0x7f3dcc226140 | 4968968 | 4850237 | 65536 | 621120 | 621120 | 4968960 | 4858312 | 610046 | 16384 | 65536 | 1086852 | 754127 | 531000 | 0 | 18779310 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379621911897 | 16385614810899 | 16385615194261 | 16379622476834 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 410 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 239424 | 0x7f3de834d900 | 0x7f3dcc226180 | 2415352 | 2305718 | 65536 | 301918 | 301918 | 2415344 | 2313790 | 289945 | 16384 | 65536 | 483257 | 360393 | 190237 | 0 | 8531357 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379622510914 | 16385615236181 | 16385615420663 | 16379622870206 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 413 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 244224 | 0x7f3de834d780 | 0x7f3dcc2261c0 | 3191144 | 3076795 | 65536 | 398892 | 398892 | 3191136 | 3084852 | 387950 | 16384 | 65536 | 670407 | 484786 | 235834 | 0 | 7135168 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379622909015 | 16385615444503 | 16385615697144 | 16379623342475 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 416 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 248768 | 0x7f3de834d600 | 0x7f3dcc226200 | 3118176 | 3012692 | 65536 | 389771 | 389771 | 3118168 | 3020752 | 381443 | 16384 | 65536 | 649716 | 416936 | 236815 | 0 | 11572499 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379623365525 | 16385615730745 | 16385615977146 | 16379623792675 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 419 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 253312 | 0x7f3dea9f1480 | 0x7f3dcc226240 | 3098264 | 2988493 | 65536 | 387282 | 387282 | 3098256 | 2996546 | 375070 | 16384 | 65536 | 643968 | 347709 | 311518 | 0 | 11888351 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379623819785 | 16385616012347 | 16385616248028 | 16379624236175 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 422 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 255552 | 0x7f3dea9f1300 | 0x7f3dcc226280 | 5713984 | 5599091 | 65536 | 714247 | 714247 | 5713976 | 5607162 | 702433 | 16384 | 65536 | 1262535 | 811954 | 497968 | 0 | 20713400 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379624262035 | 16385616284348 | 16385616734911 | 16379624888211 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 425 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 259840 | 0x7f3dea9f1180 | 0x7f3dcc2262c0 | 2905704 | 2792961 | 65536 | 363212 | 363212 | 2905696 | 2801016 | 349866 | 16384 | 65536 | 596309 | 373082 | 270215 | 0 | 11753893 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379624923490 | 16385616779072 | 16385617001633 | 16379625326061 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 428 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 265664 | 0x7f3dea9f1000 | 0x7f3dcc226300 | 3763968 | 3652730 | 65536 | 470495 | 470495 | 3763960 | 3660788 | 458062 | 16384 | 65536 | 800176 | 529470 | 445113 | 0 | 15779454 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379625353671 | 16385617037633 | 16385617336515 | 16379625833110 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 431 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 271232 | 0x7f3de834de80 | 0x7f3dcc226340 | 3810664 | 3703268 | 65536 | 476332 | 476332 | 3810656 | 3711324 | 460074 | 16384 | 65536 | 806927 | 558065 | 379780 | 0 | 10902933 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379625859859 | 16385617375396 | 16385617674758 | 16379626341119 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 434 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 273216 | 0x7f3de834dd00 | 0x7f3dcc226380 | 3838816 | 3722725 | 65536 | 479851 | 479851 | 3838808 | 3730796 | 467242 | 16384 | 65536 | 817135 | 540759 | 387797 | 0 | 11363183 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379626365998 | 16385617707078 | 16385618001960 | 16379626849888 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 437 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 275712 | 0x7f3de834db80 | 0x7f3dcc2263c0 | 7003080 | 6895109 | 65536 | 875384 | 875384 | 7003072 | 6903170 | 865059 | 16384 | 65536 | 1567233 | 744403 | 842191 | 0 | 32174689 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379626871827 | 16385618032200 | 16385618586764 | 16379627610651 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 440 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 280768 | 0x7f3de834da00 | 0x7f3dcc226400 | 3394400 | 3285695 | 65536 | 424299 | 424299 | 3394392 | 3293768 | 413146 | 16384 | 65536 | 716524 | 512997 | 343823 | 0 | 10888936 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379627641730 | 16385618633004 | 16385618895406 | 16379628082130 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 443 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 287616 | 0x7f3de834d880 | 0x7f3dcc226440 | 4409384 | 4294199 | 65536 | 551172 | 551172 | 4409376 | 4302250 | 542054 | 16384 | 65536 | 953151 | 642005 | 471907 | 0 | 13972294 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379628108360 | 16385618932206 | 16385619283728 | 16379628636858 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 446 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 294208 | 0x7f3de834d700 | 0x7f3dcc226480 | 4436864 | 4322131 | 65536 | 554607 | 554607 | 4436856 | 4330202 | 540939 | 16384 | 65536 | 955198 | 589400 | 499174 | 0 | 17408481 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379628666908 | 16385619320209 | 16385619670611 | 16379629196716 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 449 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 296960 | 0x7f3dea9f1580 | 0x7f3dcc2264c0 | 4469920 | 4358908 | 65536 | 558739 | 558739 | 4469912 | 4366966 | 547546 | 16384 | 65536 | 970958 | 716436 | 380944 | 0 | 14995515 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379629223985 | 16385619707091 | 16385620052053 | 16379629751834 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 452 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 299968 | 0x7f3dea9f1400 | 0x7f3dcc226500 | 8298088 | 8189894 | 65536 | 1037260 | 1037260 | 8298080 | 8197960 | 1027123 | 16384 | 65536 | 1873178 | 897417 | 990586 | 0 | 26832766 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379629777833 | 16385620088374 | 16385620746458 | 16379630624384 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 455 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 302720 | 0x7f3dea9f1280 | 0x7f3dcc226540 | 5742984 | 5625588 | 65536 | 717872 | 717872 | 5742976 | 5633668 | 706355 | 16384 | 65536 | 1265747 | 832960 | 610633 | 0 | 23685827 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379630648994 | 16385620791418 | 16385621236061 | 16379631271660 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 458 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 305472 | 0x7f3dea9f1100 | 0x7f3dcc226580 | 5748872 | 5637443 | 65536 | 718608 | 718608 | 5748864 | 5645508 | 708042 | 16384 | 65536 | 1269354 | 551626 | 518301 | 0 | 22028568 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379631298590 | 16385621272542 | 16385621733985 | 16379631944875 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 461 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 308224 | 0x7f3de834df80 | 0x7f3dcc2265c0 | 5745120 | 5631933 | 65536 | 718139 | 718139 | 5745112 | 5640004 | 706249 | 16384 | 65536 | 1269032 | 796634 | 576319 | 0 | 28474687 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379631964865 | 16385621767585 | 16385622226148 | 16379632620731 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 464 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 310976 | 0x7f3de834de00 | 0x7f3dcc226600 | 5768848 | 5659642 | 65536 | 721105 | 721105 | 5768840 | 5667710 | 710947 | 16384 | 65536 | 1274484 | 793089 | 695567 | 0 | 25992756 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379632648850 | 16385622268068 | 16385622716391 | 16379633275476 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 467 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 313216 | 0x7f3de834dc80 | 0x7f3dcc226640 | 10904744 | 10783447 | 65536 | 1363092 | 1363092 | 10904736 | 10791496 | 1350579 | 16384 | 65536 | 2492079 | 997949 | 1138246 | 0 | 48180186 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379633301386 | 16385622757512 | 16385623623277 | 16379634348712 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 470 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 315968 | 0x7f3de834db00 | 0x7f3dcc226680 | 10931424 | 10816078 | 65536 | 1366427 | 1366427 | 10931416 | 10824156 | 1355395 | 16384 | 65536 | 2498207 | 765223 | 1551371 | 0 | 67810314 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379634381752 | 16385623670958 | 16385624522163 | 16379635435068 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 473 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 318720 | 0x7f3de834d980 | 0x7f3dcc2266c0 | 10940880 | 10827085 | 65536 | 1367609 | 1367609 | 10940872 | 10835148 | 1355290 | 16384 | 65536 | 2504694 | 1025120 | 1420859 | 0 | 62876117 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379635446938 | 16385624558484 | 16385625440729 | 16379636541494 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 476 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 321472 | 0x7f3de834d800 | 0x7f3dcc226700 | 10938032 | 10818342 | 65536 | 1367253 | 1367253 | 10938024 | 10826410 | 1355283 | 16384 | 65536 | 2499629 | 777213 | 1762155 | 0 | 69676660 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379636552254 | 16385625479770 | 16385626356256 | 16379637640039 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 479 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 324224 | 0x7f3de834d680 | 0x7f3dcc226740 | 10981232 | 10856001 | 65536 | 1372653 | 1372653 | 10981224 | 10864070 | 1359091 | 16384 | 65536 | 2510385 | 958910 | 1773575 | 0 | 66615489 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379637651929 | 16385626403776 | 16385627260902 | 16379638724616 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 482 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 326464 | 0x7f3dea9f1500 | 0x7f3dcc226780 | 21269872 | 21156444 | 65536 | 2658733 | 2658733 | 21269864 | 21164498 | 2646983 | 16384 | 65536 | 4951836 | 883837 | 4053427 | 0 | 198805907 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379638737385 | 16385627298342 | 16385628993713 | 16379640653643 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 485 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 329216 | 0x7f3dea9f1380 | 0x7f3dcc2267c0 | 21316248 | 21197428 | 65536 | 2664530 | 2664530 | 21316240 | 21205502 | 2651687 | 16384 | 65536 | 4963635 | 1167214 | 3936294 | 0 | 171034355 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379640672733 | 16385629083474 | 16385630746205 | 16379642584091 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 488 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 331968 | 0x7f3dea9f1200 | 0x7f3dcc226800 | 21322848 | 21213077 | 65536 | 2665355 | 2665355 | 21322840 | 21221148 | 2653258 | 16384 | 65536 | 4965554 | 942318 | 4207700 | 0 | 186980909 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379642597350 | 16385630828125 | 16385632550377 | 16379644555287 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 491 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 12 | 24 | 334720 | 0x7f3dea9f1080 | 0x7f3dcc226840 | 21317592 | 21198304 | 65536 | 2664698 | 2664698 | 21317584 | 21206368 | 2652131 | 16384 | 65536 | 4964807 | 988425 | 4056294 | 0 | 174833632 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379644567937 | 16385632634857 | 16385634347989 | 16379646522034 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 494 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 337472 | 0x7f3de834df00 | 0x7f3dcc226880 | 21365192 | 21236614 | 65536 | 2670648 | 2670648 | 21365184 | 21244680 | 2656842 | 16384 | 65536 | 4970973 | 636247 | 4052104 | 0 | 177431644 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379646534674 | 16385634433909 | 16385636105921 | 16379648450711 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 497 | 157596 | 157596 | 4194304 | 256 | 0 | 0 | 8 | 24 | 0 | 0x7f3de834dd80 | 0x7f3dcc2268c0 | 42014552 | 41899223 | 65536 | 5251818 | 5251818 | 42014544 | 41907290 | 5239895 | 16384 | 65536 | 9878595 | 1233614 | 8884810 | 0 | 408383757 | 0 | 0 | 0 | 0 | 0 | 0 | 131072 | 65536 | 0 | 16379648462461 | 16385636187361 | 16385639542103 | 16379652056242 |