55 KiB
55 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | SQ_CYCLES | SQ_BUSY_CYCLES | SQ_BUSY_CU_CYCLES | SQ_WAVES | SQ_WAVE_CYCLES | GRBM_COUNT | GRBM_GUI_ACTIVE | TD_TD_BUSY_sum | TD_TC_STALL_sum | TD_SPI_STALL_sum | TD_LOAD_WAVEFRONT_sum | TD_ATOMIC_WAVEFRONT_sum | TD_STORE_WAVEFRONT_sum | TD_COALESCABLE_WAVEFRONT_sum | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBufferAligned.kd | 0 | 0 | 0 | 148935 | 148935 | 33554432 | 256 | 0 | 0 | 4 | 32 | 4160 | 0x0 | 0x7fe74a404280 | 3060592 | 2972533 | 38620036 | 524288 | 240875275 | 382573 | 382573 | 37840900.0 | 37281003.0 | 7661.0 | 524288.0 | 0.0 | 524288.0 | 0.0 | 16149794842349 | 16152833487509 | 16152833727350 | 16149943687783 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 148935 | 148935 | 32768 | 256 | 0 | 0 | 12 | 24 | 13888 | 0x0 | 0x7fe74a423f80 | 263752 | 161641 | 1390824 | 512 | 1728708 | 32968 | 32968 | 178468.0 | 159696.0 | 0.0 | 4096.0 | 0.0 | 0.0 | 4096.0 | 16149949158912 | 16152838526258 | 16152838539378 | 16149949350468 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 5 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 14336 | 0x7fe74d4cf380 | 0x7fe74a423fc0 | 1567456 | 1438041 | 18621854 | 65536 | 104017641 | 195931 | 195931 | 13732372.0 | 11593614.0 | 2600.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149949640741 | 16152838607859 | 16152838700339 | 16149949912285 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 8 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15040 | 0x7fe74d4cf200 | 0x7fe74a424000 | 2506760 | 2379848 | 30798118 | 65536 | 215467337 | 313344 | 313344 | 29244072.0 | 20851862.0 | 1968.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149950207079 | 16152838733299 | 16152838912180 | 16149950572251 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 11 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15488 | 0x7fe74d4cf080 | 0x7fe74a424040 | 2530224 | 2400602 | 31098348 | 65536 | 216398876 | 316277 | 316277 | 29335678.0 | 20943408.0 | 2280.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149950860334 | 16152838937461 | 16152839117622 | 16149951223766 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 14 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 15936 | 0x7fe74ae2ff00 | 0x7fe74a424080 | 1397224 | 1271352 | 16390759 | 65536 | 84347518 | 174652 | 174652 | 13895714.0 | 11753415.0 | 2144.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149951511880 | 16152839142422 | 16152839236182 | 16149951788164 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 17 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 16384 | 0x7fe74ae2fd80 | 0x7fe74a4240c0 | 1390168 | 1260920 | 16265011 | 65536 | 88797724 | 173770 | 173770 | 13901602.0 | 11763121.0 | 2287.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149952075857 | 16152839261943 | 16152839354423 | 16149952350181 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 20 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 17088 | 0x7fe74ae2fc00 | 0x7fe74a424100 | 1301320 | 1198951 | 15520248 | 65536 | 103928920 | 162664 | 162664 | 14415034.0 | 12277640.0 | 2111.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149952649125 | 16152839393463 | 16152839485144 | 16149952951928 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 23 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 16 | 24 | 17792 | 0x7fe74ae2fa80 | 0x7fe74a424140 | 2471472 | 2369393 | 30701420 | 65536 | 227881913 | 308933 | 308933 | 29508532.0 | 21117378.0 | 1995.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149953239152 | 16152839521144 | 16152839701145 | 16149953638763 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 26 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 16 | 24 | 18496 | 0x7fe74ae2f900 | 0x7fe74a424180 | 2465408 | 2358579 | 30553344 | 65536 | 218401891 | 308175 | 308175 | 29293340.0 | 20902185.0 | 2352.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149953924446 | 16152839734745 | 16152839914427 | 16149954331937 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 29 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19200 | 0x7fe74ae2f780 | 0x7fe74a4241c0 | 1314328 | 1211316 | 15685269 | 65536 | 100533145 | 164290 | 164290 | 14483040.0 | 12345958.0 | 2142.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149954618181 | 16152839952187 | 16152840045307 | 16149954918984 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 32 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19904 | 0x7fe74ae2f600 | 0x7fe74a424200 | 1317864 | 1212744 | 15686029 | 65536 | 92508528 | 164732 | 164732 | 14485940.0 | 12347892.0 | 2089.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149955205358 | 16152840070907 | 16152840164348 | 16149955505411 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 35 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 20608 | 0x7fe74d4cf480 | 0x7fe74a424240 | 1315816 | 1203371 | 15578196 | 65536 | 100673826 | 164476 | 164476 | 14376612.0 | 12240156.0 | 2161.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149955800765 | 16152840196188 | 16152840289149 | 16149956140567 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 38 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 16 | 24 | 21312 | 0x7fe74d4cf300 | 0x7fe74a424280 | 2470208 | 2360626 | 30592051 | 65536 | 225105776 | 308775 | 308775 | 29372832.0 | 20981676.0 | 1971.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149956426341 | 16152840318269 | 16152840497950 | 16149956859021 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 41 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22016 | 0x7fe74d4cf180 | 0x7fe74a4242c0 | 2482608 | 2378875 | 30779555 | 65536 | 223634749 | 310325 | 310325 | 29579132.0 | 21187979.0 | 2222.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149957144905 | 16152840529150 | 16152840708831 | 16149957595425 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 44 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22720 | 0x7fe74d4cf000 | 0x7fe74a424300 | 1312520 | 1210486 | 15662791 | 65536 | 94005776 | 164064 | 164064 | 14452510.0 | 12314778.0 | 2254.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149957881779 | 16152840734431 | 16152840827872 | 16149958225081 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 47 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 23424 | 0x7fe74ae2fe80 | 0x7fe74a424340 | 1308304 | 1201466 | 15551372 | 65536 | 101361863 | 163537 | 163537 | 14358296.0 | 12223277.0 | 2167.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149958511875 | 16152840859232 | 16152840952833 | 16149958856797 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 50 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 24128 | 0x7fe74ae2fd00 | 0x7fe74a424380 | 1297952 | 1192296 | 15443427 | 65536 | 104290672 | 162243 | 162243 | 14271456.0 | 12131631.0 | 2249.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149959149021 | 16152840985473 | 16152841078434 | 16149959477563 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 53 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 16 | 24 | 24832 | 0x7fe74ae2fb80 | 0x7fe74a4243c0 | 2464496 | 2359357 | 30559101 | 65536 | 229112628 | 308061 | 308061 | 29332584.0 | 20941448.0 | 1973.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149959764887 | 16152841104354 | 16152841284035 | 16149960188258 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 56 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 16 | 24 | 25536 | 0x7fe74ae2fa00 | 0x7fe74a424400 | 2453216 | 2345901 | 30380077 | 65536 | 213152662 | 306651 | 306651 | 29287176.0 | 20896022.0 | 2310.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149960476121 | 16152841309635 | 16152841490756 | 16149960905262 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 59 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26240 | 0x7fe74ae2f880 | 0x7fe74a424440 | 1311520 | 1203677 | 15583576 | 65536 | 98521283 | 163939 | 163939 | 14324720.0 | 12187759.0 | 2162.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149961190995 | 16152841524676 | 16152841618437 | 16149961527028 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 62 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26944 | 0x7fe74ae2f700 | 0x7fe74a424480 | 1313888 | 1201403 | 15542966 | 65536 | 104807274 | 164235 | 164235 | 14350392.0 | 12212542.0 | 2113.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149961815031 | 16152841652677 | 16152841746438 | 16149962114825 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 65 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 27648 | 0x7fe74d4cf580 | 0x7fe74a4244c0 | 1301968 | 1199623 | 15523389 | 65536 | 106172191 | 162745 | 162745 | 14367028.0 | 12228824.0 | 2133.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149962410268 | 16152841780518 | 16152841872678 | 16149962709302 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 68 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 16 | 24 | 28608 | 0x7fe74d4cf400 | 0x7fe74a424500 | 2477064 | 2368538 | 30661155 | 65536 | 231107473 | 309632 | 309632 | 29457556.0 | 21066391.0 | 1969.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149962996275 | 16152841899559 | 16152842079720 | 16149963386637 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 71 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 16 | 24 | 29312 | 0x7fe74d4cf280 | 0x7fe74a424540 | 2467744 | 2353647 | 30479671 | 65536 | 217377496 | 308467 | 308467 | 29296240.0 | 20905077.0 | 2189.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149963673531 | 16152842139400 | 16152842318761 | 16149964056842 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 74 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 30016 | 0x7fe74d4cf100 | 0x7fe74a424580 | 1296432 | 1192079 | 15426186 | 65536 | 102117310 | 162053 | 162053 | 14306780.0 | 12170192.0 | 2139.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149964343936 | 16152842346921 | 16152842438922 | 16149964646349 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 77 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 30976 | 0x7fe74ae2ff80 | 0x7fe74a4245c0 | 1300912 | 1192877 | 15421676 | 65536 | 91558785 | 162613 | 162613 | 14282492.0 | 12145308.0 | 2159.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149964933413 | 16152842464202 | 16152842556363 | 16149965229316 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 80 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 31680 | 0x7fe74ae2fe00 | 0x7fe74a424600 | 1296296 | 1191362 | 15422246 | 65536 | 102887801 | 162036 | 162036 | 14236664.0 | 12098132.0 | 2207.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149965523300 | 16152842589163 | 16152842682443 | 16149965835993 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 83 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 16 | 24 | 32640 | 0x7fe74ae2fc80 | 0x7fe74a424640 | 2447176 | 2338542 | 30306873 | 65536 | 222108803 | 305896 | 305896 | 29220260.0 | 20829116.0 | 2003.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149966123157 | 16152842709323 | 16152842889805 | 16149966518788 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 86 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 33600 | 0x7fe74ae2fb00 | 0x7fe74a424680 | 2478096 | 2372092 | 30704192 | 65536 | 225095106 | 309761 | 309761 | 29557704.0 | 21166536.0 | 2210.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149966805521 | 16152842913165 | 16152843093646 | 16149967195213 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 89 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 34560 | 0x7fe74ae2f980 | 0x7fe74a4246c0 | 1300848 | 1197088 | 15488540 | 65536 | 108394074 | 162605 | 162605 | 14294520.0 | 12156323.0 | 2059.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149967481996 | 16152843117486 | 16152843210767 | 16149967780520 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 92 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 35520 | 0x7fe74ae2f800 | 0x7fe74a424700 | 1311728 | 1203481 | 15542346 | 65536 | 87807705 | 163965 | 163965 | 14337330.0 | 12201678.0 | 2168.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149968067193 | 16152843235087 | 16152843329167 | 16149968364537 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 95 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 36224 | 0x7fe74ae2f680 | 0x7fe74a424740 | 1304400 | 1198426 | 15512163 | 65536 | 87206526 | 163049 | 163049 | 14326448.0 | 12192341.0 | 2198.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149968658390 | 16152843361327 | 16152843453968 | 16149968958394 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 98 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 37184 | 0x7fe74d4cf500 | 0x7fe74a424780 | 2442200 | 2333408 | 30203349 | 65536 | 225184057 | 305274 | 305274 | 29095716.0 | 20704560.0 | 2027.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149969245317 | 16152843477168 | 16152843657169 | 16149969667478 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 101 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 38144 | 0x7fe74d4cf380 | 0x7fe74a4247c0 | 2441840 | 2330773 | 30187493 | 65536 | 203616711 | 305229 | 305229 | 29006452.0 | 20615277.0 | 2228.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149969953842 | 16152843682129 | 16152843863730 | 16149970350023 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 104 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 39104 | 0x7fe74d4cf200 | 0x7fe74a424800 | 1304352 | 1200041 | 15522705 | 65536 | 98382332 | 163043 | 163043 | 14301130.0 | 12165032.0 | 2097.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149970638356 | 16152843887891 | 16152843982131 | 16149970944160 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 107 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 40320 | 0x7fe74d4cf080 | 0x7fe74a424840 | 1318040 | 1208516 | 15581549 | 65536 | 89903294 | 164754 | 164754 | 14143944.0 | 12004004.0 | 2239.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149971230633 | 16152844005811 | 16152844100372 | 16149971571836 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 110 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 41280 | 0x7fe74ae2ff00 | 0x7fe74a424880 | 1311336 | 1207447 | 15623514 | 65536 | 104150745 | 163916 | 163916 | 14286500.0 | 12148741.0 | 2162.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149971864419 | 16152844133172 | 16152844226293 | 16149972195652 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 113 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 42240 | 0x7fe74ae2fd80 | 0x7fe74a4248c0 | 2446720 | 2339964 | 30290088 | 65536 | 224783477 | 305839 | 305839 | 29104556.0 | 20713394.0 | 2176.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149972481286 | 16152844252373 | 16152844431574 | 16149972913106 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 116 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 43200 | 0x7fe74ae2fc00 | 0x7fe74a424900 | 2444256 | 2334954 | 30235199 | 65536 | 220859283 | 305531 | 305531 | 29148832.0 | 20757664.0 | 2108.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149973198960 | 16152844457174 | 16152844637815 | 16149973624250 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 119 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 44160 | 0x7fe74ae2fa80 | 0x7fe74a424940 | 1304680 | 1198952 | 15498517 | 65536 | 98105788 | 163084 | 163084 | 14294624.0 | 12158928.0 | 2134.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149973910364 | 16152844662775 | 16152844756536 | 16149974260176 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 122 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 45376 | 0x7fe74ae2f900 | 0x7fe74a424980 | 1319144 | 1204026 | 15502591 | 65536 | 92713885 | 164892 | 164892 | 14281512.0 | 12144572.0 | 2144.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149974546590 | 16152844781016 | 16152844875417 | 16149974862013 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 125 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 46336 | 0x7fe74ae2f780 | 0x7fe74a4249c0 | 1306152 | 1197459 | 15483887 | 65536 | 90135789 | 163268 | 163268 | 14211052.0 | 12073771.0 | 2122.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149975154166 | 16152844907577 | 16152845001977 | 16149975491489 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 128 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 47552 | 0x7fe74ae2f600 | 0x7fe74a424a00 | 2455960 | 2341234 | 30313426 | 65536 | 218469938 | 306994 | 306994 | 29014604.0 | 20623431.0 | 1906.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149975777723 | 16152845026778 | 16152845205179 | 16149976208183 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 131 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 48512 | 0x7fe74d4cf480 | 0x7fe74a424a40 | 2415376 | 2299322 | 29793586 | 65536 | 216896403 | 301921 | 301921 | 28629972.0 | 20238819.0 | 2205.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149976494947 | 16152845229659 | 16152845409500 | 16149976923357 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 134 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 49472 | 0x7fe74d4cf300 | 0x7fe74a424a80 | 1300688 | 1195231 | 15427742 | 65536 | 94612257 | 162585 | 162585 | 14297660.0 | 12160709.0 | 2108.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149977209561 | 16152845434300 | 16152845527101 | 16149977544173 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 137 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 50688 | 0x7fe74d4cf180 | 0x7fe74a424ac0 | 1300336 | 1188209 | 15325350 | 65536 | 96875824 | 162541 | 162541 | 14171826.0 | 12032707.0 | 2300.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149977830967 | 16152845551901 | 16152845645341 | 16149978169439 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 140 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 51648 | 0x7fe74d4cf000 | 0x7fe74a424b00 | 1289768 | 1186126 | 15340806 | 65536 | 99482377 | 161220 | 161220 | 14252608.0 | 12114977.0 | 2198.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149978462513 | 16152845678302 | 16152845769822 | 16149978797016 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 143 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 52864 | 0x7fe74ae2fe80 | 0x7fe74a424b40 | 2441312 | 2335378 | 30213479 | 65536 | 224334460 | 305163 | 305163 | 29010552.0 | 20619399.0 | 2025.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149979083509 | 16152845796862 | 16152845975583 | 16149979514500 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 146 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 54080 | 0x7fe74ae2fd00 | 0x7fe74a424b80 | 2414576 | 2308143 | 29917902 | 65536 | 219085457 | 301821 | 301821 | 28754388.0 | 20363262.0 | 2149.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149979800453 | 16152845999903 | 16152846179265 | 16149980230374 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 149 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 55296 | 0x7fe74ae2fb80 | 0x7fe74a424bc0 | 1300232 | 1191798 | 15375441 | 65536 | 96753564 | 162528 | 162528 | 14158630.0 | 12019447.0 | 2007.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149980515877 | 16152846202785 | 16152846296385 | 16149980818221 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 152 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 56768 | 0x7fe74ae2fa00 | 0x7fe74a424c00 | 1307776 | 1202978 | 15516593 | 65536 | 97994624 | 163471 | 163471 | 14141650.0 | 11993802.0 | 2301.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149981104484 | 16152846320225 | 16152846415906 | 16149981412778 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 155 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 57728 | 0x7fe74ae2f880 | 0x7fe74a424c40 | 1294968 | 1189137 | 15369842 | 65536 | 93261475 | 161870 | 161870 | 14178352.0 | 12041195.0 | 2098.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149981706101 | 16152846448386 | 16152846541667 | 16149982039024 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 158 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 58944 | 0x7fe74ae2f700 | 0x7fe74a424c80 | 2427264 | 2321638 | 30076667 | 65536 | 223199937 | 303407 | 303407 | 28964244.0 | 20573079.0 | 1934.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149982325697 | 16152846564867 | 16152846742948 | 16149982725138 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 161 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 60160 | 0x7fe74d4cf580 | 0x7fe74a424cc0 | 2413552 | 2306209 | 29886541 | 65536 | 214468368 | 301693 | 301693 | 28604960.0 | 20213816.0 | 1948.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149983011482 | 16152846767428 | 16152846947269 | 16149983409993 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 164 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 61376 | 0x7fe74d4cf400 | 0x7fe74a424d00 | 1302696 | 1198554 | 15456278 | 65536 | 92842864 | 162836 | 162836 | 14203124.0 | 12065181.0 | 2161.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149983696227 | 16152846971109 | 16152847065990 | 16149984041149 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 167 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 62848 | 0x7fe74d4cf280 | 0x7fe74a424d40 | 1340584 | 1238129 | 15955552 | 65536 | 104202383 | 167572 | 167572 | 14303048.0 | 12132438.0 | 2432.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149984327743 | 16152847091590 | 16152847190791 | 16149984676975 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 170 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 63808 | 0x7fe74d4cf100 | 0x7fe74a424d80 | 1297768 | 1195392 | 15453422 | 65536 | 100008013 | 162220 | 162220 | 14189128.0 | 12051490.0 | 2190.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149984970479 | 16152847224231 | 16152847317511 | 16149985306781 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 173 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 65024 | 0x7fe74ae2ff80 | 0x7fe74a424dc0 | 2408208 | 2298255 | 29758294 | 65536 | 219778786 | 301025 | 301025 | 28619052.0 | 20227872.0 | 2134.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149985591845 | 16152847343272 | 16152847522793 | 16149986020175 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 176 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 66240 | 0x7fe74ae2fe00 | 0x7fe74a424e00 | 2403752 | 2293431 | 29709926 | 65536 | 214848879 | 300468 | 300468 | 28620616.0 | 20229450.0 | 2156.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149986307649 | 16152847547753 | 16152847728874 | 16149986744599 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 179 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 67456 | 0x7fe74ae2fc80 | 0x7fe74a424e40 | 1311120 | 1205386 | 15549589 | 65536 | 92010656 | 163889 | 163889 | 14196998.0 | 12059330.0 | 2168.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149987030973 | 16152847752234 | 16152847847275 | 16149987377666 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 182 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 68928 | 0x7fe74ae2fb00 | 0x7fe74a424e80 | 1408960 | 1299501 | 16782144 | 65536 | 106930229 | 176119 | 176119 | 14657990.0 | 12460408.0 | 2369.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149987664229 | 16152847871755 | 16152847976555 | 16149988021691 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 185 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 70144 | 0x7fe74ae2f980 | 0x7fe74a424ec0 | 1288232 | 1183651 | 15276702 | 65536 | 97658584 | 161028 | 161028 | 14177502.0 | 12041442.0 | 2124.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149988314495 | 16152848009516 | 16152848102476 | 16149988647708 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 188 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 71616 | 0x7fe74ae2f800 | 0x7fe74a424f00 | 2435872 | 2329344 | 30157297 | 65536 | 221518793 | 304483 | 304483 | 28881620.0 | 20490458.0 | 2222.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149988933901 | 16152848127436 | 16152848307437 | 16149989372712 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 191 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 72832 | 0x7fe74ae2f680 | 0x7fe74a424f40 | 2435816 | 2329967 | 30182705 | 65536 | 223722997 | 304476 | 304476 | 28879228.0 | 20488081.0 | 2174.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149989658445 | 16152848333838 | 16152848513039 | 16149990086116 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 194 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 74048 | 0x7fe74d4cf500 | 0x7fe74a424f80 | 1294912 | 1188746 | 15325586 | 65536 | 93110356 | 161863 | 161863 | 14207266.0 | 12067861.0 | 2253.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149990374610 | 16152848537039 | 16152848630639 | 16149990683353 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 197 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 75776 | 0x7fe74d4cf380 | 0x7fe74a424fc0 | 1459144 | 1349814 | 17417885 | 65536 | 109161010 | 182392 | 182392 | 15021010.0 | 12800740.0 | 2246.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149990970346 | 16152848654960 | 16152848762000 | 16149991338088 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 200 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 76992 | 0x7fe74d4cf200 | 0x7fe74a425000 | 1294920 | 1188927 | 15346684 | 65536 | 88512306 | 161864 | 161864 | 14142256.0 | 12004697.0 | 2076.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149991640231 | 16152848794800 | 16152848888881 | 16149991982274 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 203 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 78464 | 0x7fe74d4cf080 | 0x7fe74a425040 | 2416400 | 2305940 | 29822372 | 65536 | 215757421 | 302049 | 302049 | 28579636.0 | 20188474.0 | 2145.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149992268688 | 16152848914321 | 16152849094962 | 16149992668689 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 206 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 79936 | 0x7fe74ae2ff00 | 0x7fe74a425080 | 2428352 | 2321723 | 30061867 | 65536 | 222896304 | 303543 | 303543 | 28863280.0 | 20472120.0 | 2119.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149992954792 | 16152849120722 | 16152849300563 | 16149993392313 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 209 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 81408 | 0x7fe74ae2fd80 | 0x7fe74a4250c0 | 1299808 | 1189143 | 15321108 | 65536 | 88888404 | 162475 | 162475 | 14096502.0 | 11957897.0 | 2191.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149993677586 | 16152849324724 | 16152849419444 | 16149994022399 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 212 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 83136 | 0x7fe74ae2fc00 | 0x7fe74a425100 | 1529912 | 1421451 | 18311355 | 65536 | 123393492 | 191238 | 191238 | 14869336.0 | 12605325.0 | 2158.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149994308982 | 16152849443124 | 16152849558325 | 16149994629435 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 215 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 84352 | 0x7fe74ae2fa80 | 0x7fe74a425140 | 1297584 | 1189787 | 15360232 | 65536 | 87780043 | 162197 | 162197 | 14094340.0 | 11957551.0 | 2145.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149994922099 | 16152849604405 | 16152849697846 | 16149995262641 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 218 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 85824 | 0x7fe74ae2f900 | 0x7fe74a425180 | 2427408 | 2317292 | 30005693 | 65536 | 223845452 | 303425 | 303425 | 28755044.0 | 20363881.0 | 2299.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149995548695 | 16152849722006 | 16152849902007 | 16149995936376 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 221 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 87296 | 0x7fe74ae2f780 | 0x7fe74a4251c0 | 2412088 | 2301272 | 29796627 | 65536 | 220140652 | 301510 | 301510 | 28608048.0 | 20216904.0 | 2187.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149996222270 | 16152849927127 | 16152850108248 | 16149996664790 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 224 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 88768 | 0x7fe74ae2f600 | 0x7fe74a425200 | 1300680 | 1190130 | 15338642 | 65536 | 93462388 | 162584 | 162584 | 14096998.0 | 11956551.0 | 2257.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149996951384 | 16152850133529 | 16152850229049 | 16149997310486 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 227 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 90752 | 0x7fe74d4cf480 | 0x7fe74a425240 | 1619704 | 1498189 | 19311483 | 65536 | 134706000 | 202462 | 202462 | 15223144.0 | 12942101.0 | 1983.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149997597119 | 16152850254649 | 16152850374970 | 16149997966871 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 230 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 91968 | 0x7fe74d4cf300 | 0x7fe74a425280 | 1299224 | 1187735 | 15317554 | 65536 | 88069570 | 162402 | 162402 | 14091700.0 | 11954173.0 | 2246.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16149998259775 | 16152850412250 | 16152850506011 | 16149998604077 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 233 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 93440 | 0x7fe74d4cf180 | 0x7fe74a4252c0 | 2418568 | 2310885 | 29918967 | 65536 | 220194692 | 302320 | 302320 | 28672792.0 | 20281634.0 | 2136.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149998890591 | 16152850530971 | 16152850710652 | 16149999274932 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 236 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 94912 | 0x7fe74d4cf000 | 0x7fe74a425300 | 2381856 | 2276419 | 29502863 | 65536 | 204655483 | 297731 | 297731 | 28290128.0 | 19898960.0 | 2008.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16149999563926 | 16152850734332 | 16152850915773 | 16149999998636 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 239 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 96384 | 0x7fe74ae2fe80 | 0x7fe74a425340 | 1298824 | 1194609 | 15412328 | 65536 | 96473838 | 162352 | 162352 | 14145432.0 | 12005146.0 | 2218.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150000284440 | 16152850939133 | 16152851034654 | 16150000633332 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 242 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 98368 | 0x7fe74ae2fd00 | 0x7fe74a425380 | 1681952 | 1568465 | 20244341 | 65536 | 129255076 | 210243 | 210243 | 15483946.0 | 13162642.0 | 1911.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150000918716 | 16152851059454 | 16152851185695 | 16150001305117 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 245 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 99584 | 0x7fe74ae2fb80 | 0x7fe74a4253c0 | 1291704 | 1187531 | 15312831 | 65536 | 86421458 | 161462 | 161462 | 14081070.0 | 11941621.0 | 2226.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150001597121 | 16152851219135 | 16152851313536 | 16150001931683 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 248 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 101312 | 0x7fe74ae2fa00 | 0x7fe74a425400 | 2410432 | 2297874 | 29740893 | 65536 | 220326378 | 301303 | 301303 | 28593244.0 | 20202073.0 | 2198.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150002221197 | 16152851338016 | 16152851517057 | 16150002610978 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 251 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 102784 | 0x7fe74ae2f880 | 0x7fe74a425440 | 2403520 | 2290916 | 29658530 | 65536 | 206919935 | 300439 | 300439 | 28407136.0 | 20015973.0 | 2081.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150002896852 | 16152851539617 | 16152851718338 | 16150003331362 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 254 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 104256 | 0x7fe74ae2f700 | 0x7fe74a425480 | 1293552 | 1188596 | 15328944 | 65536 | 96294250 | 161693 | 161693 | 14142184.0 | 12004506.0 | 2343.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150003617576 | 16152851743778 | 16152851837699 | 16150003965508 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 257 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 106240 | 0x7fe74d4cf580 | 0x7fe74a4254c0 | 1754752 | 1642139 | 21169537 | 65536 | 141071638 | 219343 | 219343 | 15817144.0 | 13482755.0 | 1752.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150004251882 | 16152851862499 | 16152851992900 | 16150004635933 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 260 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 107712 | 0x7fe74d4cf400 | 0x7fe74a425500 | 1297728 | 1187576 | 15315546 | 65536 | 89807209 | 162215 | 162215 | 14154588.0 | 12018279.0 | 2241.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150004928857 | 16152852026180 | 16152852119141 | 16150005262739 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 263 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 16 | 24 | 109440 | 0x7fe74d4cf280 | 0x7fe74a425540 | 2368912 | 2264081 | 29334057 | 65536 | 210480267 | 296113 | 296113 | 28099496.0 | 19708327.0 | 2179.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150005549513 | 16152852143941 | 16152852323622 | 16150005982963 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 266 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 16 | 24 | 111168 | 0x7fe74d4cf100 | 0x7fe74a425580 | 2382456 | 2276345 | 29499946 | 65536 | 211184834 | 297806 | 297806 | 28294268.0 | 19903107.0 | 2094.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150006271367 | 16152852348742 | 16152852528423 | 16150006660049 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 269 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 112896 | 0x7fe74ae2ff80 | 0x7fe74a4255c0 | 1320552 | 1210417 | 15617219 | 65536 | 104600368 | 165068 | 165068 | 14283998.0 | 12144162.0 | 2356.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150006946492 | 16152852553063 | 16152852648424 | 16150007296315 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 272 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 115136 | 0x7fe74ae2fe00 | 0x7fe74a425600 | 1850128 | 1730979 | 22355012 | 65536 | 129879799 | 231265 | 231265 | 15520574.0 | 13147027.0 | 1569.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150007582508 | 16152852673544 | 16152852812905 | 16150007976360 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 275 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 116608 | 0x7fe74ae2fc80 | 0x7fe74a425640 | 1291208 | 1184420 | 15265666 | 65536 | 100244665 | 161400 | 161400 | 13996398.0 | 11858099.0 | 2286.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150008268953 | 16152852844745 | 16152852939146 | 16150008606076 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 278 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 16 | 24 | 118336 | 0x7fe74ae2fb00 | 0x7fe74a425680 | 2376024 | 2268897 | 29389474 | 65536 | 210462537 | 297002 | 297002 | 28166872.0 | 19775722.0 | 2274.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150008892019 | 16152852963146 | 16152853142027 | 16150009322580 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 281 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 16 | 24 | 120064 | 0x7fe74ae2f980 | 0x7fe74a4256c0 | 2407184 | 2293489 | 29690944 | 65536 | 213506567 | 300897 | 300897 | 28483800.0 | 20092652.0 | 2066.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150009608064 | 16152853166507 | 16152853346348 | 16150010036694 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 284 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 121792 | 0x7fe74ae2f800 | 0x7fe74a425700 | 1312808 | 1203291 | 15522791 | 65536 | 104325955 | 164100 | 164100 | 14222694.0 | 12076760.0 | 2314.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150010324308 | 16152853370028 | 16152853468269 | 16150010627691 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 287 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 124032 | 0x7fe74ae2f680 | 0x7fe74a425740 | 1925632 | 1807124 | 23332426 | 65536 | 144905205 | 240703 | 240703 | 15792110.0 | 13395720.0 | 1571.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150010914175 | 16152853492909 | 16152853638510 | 16150011262617 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 290 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 125504 | 0x7fe74d4cf500 | 0x7fe74a425780 | 1288624 | 1184232 | 15254647 | 65536 | 95272002 | 161077 | 161077 | 13995762.0 | 11855064.0 | 2223.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150011555621 | 16152853671310 | 16152853765711 | 16150011896783 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 293 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 127488 | 0x7fe74d4cf380 | 0x7fe74a4257c0 | 2377592 | 2268679 | 29354718 | 65536 | 209053550 | 297198 | 297198 | 28111656.0 | 19720305.0 | 2073.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150012183887 | 16152853790991 | 16152853971952 | 16150012624207 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 296 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 129216 | 0x7fe74d4cf200 | 0x7fe74a425800 | 2374928 | 2269659 | 29375800 | 65536 | 200701740 | 296865 | 296865 | 28154516.0 | 19763283.0 | 2054.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150012910640 | 16152853996432 | 16152854176593 | 16150013340421 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 299 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 130944 | 0x7fe74d4cf080 | 0x7fe74a425840 | 1355920 | 1248950 | 16126166 | 65536 | 104509645 | 169489 | 169489 | 14752126.0 | 12591892.0 | 2441.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150013626405 | 16152854200913 | 16152854300914 | 16150013975257 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 302 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 133440 | 0x7fe74ae2ff00 | 0x7fe74a425880 | 2077520 | 1963144 | 25346998 | 65536 | 176815089 | 259689 | 259689 | 15153350.0 | 12736110.0 | 1622.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150014261591 | 16152854326354 | 16152854481715 | 16150014675841 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 305 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 134912 | 0x7fe74ae2fd80 | 0x7fe74a4258c0 | 1291936 | 1187170 | 15302074 | 65536 | 99285790 | 161491 | 161491 | 14037418.0 | 11896047.0 | 2298.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150014968865 | 16152854515155 | 16152854610036 | 16150015306107 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 308 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 136896 | 0x7fe74ae2fc00 | 0x7fe74a425900 | 2373688 | 2267684 | 29341221 | 65536 | 207840763 | 296710 | 296710 | 28179144.0 | 19787471.0 | 2240.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150015592071 | 16152854634996 | 16152854813557 | 16150016025031 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 311 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 138880 | 0x7fe74ae2fa80 | 0x7fe74a425940 | 2364416 | 2259616 | 29244828 | 65536 | 208086535 | 295551 | 295551 | 28011088.0 | 19619934.0 | 2249.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150016311355 | 16152854838197 | 16152855018358 | 16150016744575 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 314 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 140864 | 0x7fe74ae2f900 | 0x7fe74a425980 | 1430880 | 1322125 | 17057622 | 65536 | 109796937 | 178859 | 178859 | 15460446.0 | 13290897.0 | 2413.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150017030969 | 16152855040598 | 16152855147959 | 16150017383501 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 317 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 143616 | 0x7fe74ae2f780 | 0x7fe74a4259c0 | 2229424 | 2124144 | 27448471 | 65536 | 196965938 | 278677 | 278677 | 15117178.0 | 12685442.0 | 1249.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150017668455 | 16152855170519 | 16152855341400 | 16150018082076 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 320 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 145344 | 0x7fe74ae2f600 | 0x7fe74a425a00 | 1290224 | 1183715 | 15264882 | 65536 | 97492518 | 161277 | 161277 | 13988306.0 | 11840752.0 | 2408.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150018375009 | 16152855374520 | 16152855469881 | 16150018717742 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 323 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 147584 | 0x7fe74d4cf480 | 0x7fe74a425a40 | 2357752 | 2252617 | 29130652 | 65536 | 203475871 | 294718 | 294718 | 27842938.0 | 19451328.0 | 2353.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150019004185 | 16152855495161 | 16152855675322 | 16150019441616 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 326 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 149568 | 0x7fe74d4cf300 | 0x7fe74a425a80 | 2371744 | 2258841 | 29238271 | 65536 | 204194512 | 296467 | 296467 | 27988206.0 | 19596980.0 | 2184.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150019727539 | 16152855699162 | 16152855882523 | 16150020153630 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 329 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 151552 | 0x7fe74d4cf180 | 0x7fe74a425ac0 | 1489296 | 1381859 | 17825222 | 65536 | 117727227 | 186161 | 186161 | 16371758.0 | 14196866.0 | 2359.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150020441464 | 16152855906204 | 16152856017084 | 16150020762966 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 332 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 154304 | 0x7fe74d4cf000 | 0x7fe74a425b00 | 2395088 | 2279677 | 29457715 | 65536 | 210032889 | 299385 | 299385 | 15155304.0 | 12719562.0 | 1330.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150021049600 | 16152856040444 | 16152856221085 | 16150021483740 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 335 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 156288 | 0x7fe74ae2fe80 | 0x7fe74a425b40 | 1368624 | 1258214 | 16215144 | 65536 | 102655384 | 171077 | 171077 | 14644064.0 | 12468060.0 | 2433.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150021777784 | 16152856254046 | 16152856354526 | 16150022118996 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 338 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 158784 | 0x7fe74ae2fd00 | 0x7fe74a425b80 | 2390528 | 2273437 | 29332261 | 65536 | 184519857 | 298815 | 298815 | 27992934.0 | 19599282.0 | 2434.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150022407780 | 16152856380126 | 16152856565088 | 16150022811221 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 341 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 161024 | 0x7fe74ae2fb80 | 0x7fe74a425bc0 | 2383960 | 2270552 | 29313611 | 65536 | 186445817 | 297994 | 297994 | 28009436.0 | 19616430.0 | 2236.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150023097295 | 16152856589568 | 16152856773729 | 16150023532055 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 344 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 163264 | 0x7fe74ae2fa00 | 0x7fe74a425c00 | 1643872 | 1531854 | 19784449 | 65536 | 131555965 | 205483 | 205483 | 17872642.0 | 15673658.0 | 2122.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150023817889 | 16152856797089 | 16152856919330 | 16150024195640 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 347 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 166528 | 0x7fe74ae2f880 | 0x7fe74a425c40 | 2720544 | 2607346 | 33686793 | 65536 | 246736812 | 340067 | 340067 | 15461790.0 | 13030896.0 | 1052.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150024482184 | 16152856943010 | 16152857149571 | 16150024957413 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 350 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 168512 | 0x7fe74ae2f700 | 0x7fe74a425c80 | 1478368 | 1360346 | 17559367 | 65536 | 104684337 | 184795 | 184795 | 15726686.0 | 13517468.0 | 2205.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150025231247 | 16152857196931 | 16152857308772 | 16150025593659 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 353 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 171264 | 0x7fe74d4cf580 | 0x7fe74a425cc0 | 2366824 | 2253938 | 29057299 | 65536 | 195237421 | 295852 | 295852 | 27700026.0 | 19305078.0 | 2385.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150025879693 | 16152857335012 | 16152857518053 | 16150026316403 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 356 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 173760 | 0x7fe74d4cf400 | 0x7fe74a425d00 | 2386384 | 2266963 | 29211052 | 65536 | 187922298 | 298297 | 298297 | 27864632.0 | 19471087.0 | 2260.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150026602807 | 16152857542694 | 16152857728615 | 16150026999048 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 359 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 176256 | 0x7fe74d4cf280 | 0x7fe74a425d40 | 1795904 | 1684103 | 21763245 | 65536 | 150304438 | 224487 | 224487 | 19344110.0 | 17019062.0 | 2109.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150027285862 | 16152857753095 | 16152857887816 | 16150027665893 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 362 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 179776 | 0x7fe74d4cf100 | 0x7fe74a425d80 | 3038784 | 2927436 | 37860774 | 65536 | 281275938 | 379847 | 379847 | 15359248.0 | 12922712.0 | 1437.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150027952407 | 16152857911016 | 16152858143177 | 16150028463136 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 365 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 182272 | 0x7fe74ae2ff80 | 0x7fe74a425dc0 | 1686808 | 1572953 | 20305083 | 65536 | 133536953 | 210850 | 210850 | 17197372.0 | 14856365.0 | 1973.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150028738450 | 16152858215338 | 16152858342058 | 16150029103731 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 368 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 185536 | 0x7fe74ae2fe00 | 0x7fe74a425e00 | 2474872 | 2346751 | 30167687 | 65536 | 186850997 | 309358 | 309358 | 28111256.0 | 19676662.0 | 2276.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150029389665 | 16152858366219 | 16152858557420 | 16150029800096 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 371 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 188544 | 0x7fe74ae2fc80 | 0x7fe74a425e40 | 2476080 | 2344653 | 30239056 | 65536 | 177856499 | 309509 | 309509 | 28515264.0 | 20099527.0 | 2240.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150030069830 | 16152858583020 | 16152858778701 | 16150030537880 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 374 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 191552 | 0x7fe74ae2fb00 | 0x7fe74a425e80 | 2122096 | 2010881 | 26012607 | 65536 | 182317267 | 265261 | 265261 | 20018314.0 | 17543422.0 | 1322.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150030803564 | 16152858802701 | 16152858962862 | 16150031165726 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 377 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 195840 | 0x7fe74ae2f980 | 0x7fe74a425ec0 | 3689144 | 3576642 | 46265655 | 65536 | 339406850 | 461142 | 461142 | 16178464.0 | 13723077.0 | 947.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150031451510 | 16152858987182 | 16152859271184 | 16150031999578 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 380 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 198592 | 0x7fe74ae2f800 | 0x7fe74a425f00 | 1968632 | 1848907 | 23776328 | 65536 | 142979244 | 246078 | 246078 | 21741820.0 | 19517441.0 | 2016.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150032273832 | 16152859317264 | 16152859466385 | 16150032676483 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 383 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 202368 | 0x7fe74ae2f680 | 0x7fe74a425f40 | 2682056 | 2557821 | 32992836 | 65536 | 206860390 | 335256 | 335256 | 28818302.0 | 20308851.0 | 1940.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150032963106 | 16152859494545 | 16152859704627 | 16150033442816 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 386 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 205888 | 0x7fe74d4cf500 | 0x7fe74a425f80 | 2660704 | 2528885 | 32569699 | 65536 | 218474274 | 332587 | 332587 | 30298172.0 | 21850608.0 | 1995.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150033709710 | 16152859742707 | 16152859952148 | 16150034184559 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 389 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 209408 | 0x7fe74d4cf380 | 0x7fe74a425fc0 | 2451632 | 2338690 | 30227778 | 65536 | 214566203 | 306453 | 306453 | 20647646.0 | 18150670.0 | 1414.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150034450974 | 16152859990388 | 16152860175990 | 16150034897154 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 392 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 214464 | 0x7fe74d4cf200 | 0x7fe74a426000 | 4336400 | 4219603 | 54666103 | 65536 | 409556317 | 542049 | 542049 | 16285628.0 | 13833322.0 | 1208.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150035183587 | 16152860202070 | 16152860536792 | 16150035790164 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 395 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 217728 | 0x7fe74d4cf080 | 0x7fe74a426040 | 2189568 | 2075373 | 26751956 | 65536 | 171176415 | 273695 | 273695 | 18616194.0 | 16127931.0 | 1505.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150036065148 | 16152860578872 | 16152860746553 | 16150036470259 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 398 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 222016 | 0x7fe74ae2ff00 | 0x7fe74a426080 | 2931528 | 2806919 | 36176024 | 65536 | 240115787 | 366440 | 366440 | 29307762.0 | 20722632.0 | 1706.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150036756752 | 16152860772313 | 16152861001115 | 16150037253791 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 401 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 226048 | 0x7fe74ae2fd80 | 0x7fe74a4260c0 | 2915584 | 2806122 | 36057564 | 65536 | 255068840 | 364447 | 364447 | 33051446.0 | 24530377.0 | 1719.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150037521325 | 16152861034715 | 16152861263836 | 16150038020544 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 404 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 230080 | 0x7fe74ae2fc00 | 0x7fe74a426100 | 2771472 | 2662537 | 34411481 | 65536 | 249016532 | 346433 | 346433 | 21028328.0 | 18495815.0 | 1003.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150038294058 | 16152861299676 | 16152861510398 | 16150038778738 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 407 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 235904 | 0x7fe74ae2fa80 | 0x7fe74a426140 | 4987808 | 4871693 | 63046869 | 65536 | 470141136 | 623475 | 623475 | 16416688.0 | 13963755.0 | 888.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150039044952 | 16152861544478 | 16152861929760 | 16150039657048 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 410 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 239424 | 0x7fe74ae2f900 | 0x7fe74a426180 | 2424768 | 2315521 | 29886423 | 65536 | 208135171 | 303095 | 303095 | 19156682.0 | 16652186.0 | 1454.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150039931892 | 16152861971680 | 16152862158722 | 16150040356073 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 413 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 244224 | 0x7fe74ae2f780 | 0x7fe74a4261c0 | 3182648 | 3072565 | 39641401 | 65536 | 275275537 | 397830 | 397830 | 30359486.0 | 21691475.0 | 1513.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150040642736 | 16152862184802 | 16152862434243 | 16150041172175 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 416 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 248768 | 0x7fe74ae2f600 | 0x7fe74a426200 | 3113136 | 3011498 | 38976367 | 65536 | 294946377 | 389141 | 389141 | 33408332.0 | 24657308.0 | 1466.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150041440059 | 16152862493604 | 16152862740325 | 16150041947497 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 419 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 253312 | 0x7fe74d4cf480 | 0x7fe74a426240 | 3102744 | 2989117 | 38639818 | 65536 | 284024657 | 387842 | 387842 | 21164414.0 | 18643914.0 | 817.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150042222541 | 16152862803685 | 16152863040487 | 16150042740870 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 422 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 255552 | 0x7fe74d4cf300 | 0x7fe74a426280 | 5740368 | 5629687 | 72853887 | 65536 | 547653358 | 717545 | 717545 | 67324998.0 | 64625303.0 | 811.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150043007904 | 16152863102087 | 16152863554730 | 16150043737138 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 425 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 259840 | 0x7fe74d4cf180 | 0x7fe74a4262c0 | 2908976 | 2795645 | 36161994 | 65536 | 263403179 | 363621 | 363621 | 21482940.0 | 18943935.0 | 1329.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150044013572 | 16152863622570 | 16152863848492 | 16150044499161 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 428 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 265664 | 0x7fe74d4cf000 | 0x7fe74a426300 | 3757512 | 3647679 | 47213613 | 65536 | 352721188 | 469688 | 469688 | 31614178.0 | 22819277.0 | 1456.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150044768755 | 16152863906732 | 16152864204974 | 16150045334462 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 431 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 271232 | 0x7fe74ae2fe80 | 0x7fe74a426340 | 3781608 | 3667293 | 47410584 | 65536 | 349383870 | 472700 | 472700 | 30404846.0 | 21648357.0 | 1619.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150045601846 | 16152864267534 | 16152864566416 | 16150046161434 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 434 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 273216 | 0x7fe74ae2fd00 | 0x7fe74a426380 | 3866872 | 3744097 | 48458514 | 65536 | 357113276 | 483358 | 483358 | 46847218.0 | 44610685.0 | 1131.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150046428308 | 16152864630257 | 16152864926578 | 16150046999635 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 437 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 275712 | 0x7fe74ae2fb80 | 0x7fe74a4263c0 | 7040472 | 6924982 | 89658826 | 65536 | 683157916 | 880058 | 880058 | 73356668.0 | 70050175.0 | 498.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150047267219 | 16152864984979 | 16152865542582 | 16150048088811 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 440 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 280768 | 0x7fe74ae2fa00 | 0x7fe74a426400 | 3410616 | 3299066 | 42692621 | 65536 | 316068847 | 426326 | 426326 | 26404886.0 | 23705675.0 | 903.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150048363295 | 16152865585942 | 16152865849304 | 16150048890643 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 443 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 287616 | 0x7fe74ae2f880 | 0x7fe74a426440 | 4406832 | 4292607 | 55555877 | 65536 | 413803479 | 550853 | 550853 | 32485500.0 | 23649192.0 | 1488.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150049158777 | 16152865882744 | 16152866233786 | 16150049775744 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 446 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 294208 | 0x7fe74ae2f700 | 0x7fe74a426480 | 4415864 | 4302380 | 55697218 | 65536 | 419462949 | 551982 | 551982 | 29743128.0 | 20929057.0 | 1536.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150050042208 | 16152866303227 | 16152866652829 | 16150050671734 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 449 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 296960 | 0x7fe74d4cf580 | 0x7fe74a4264c0 | 4502432 | 4389214 | 56768971 | 65536 | 421066932 | 562803 | 562803 | 54664976.0 | 52333830.0 | 965.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150050935058 | 16152866717309 | 16152867064351 | 16150051505536 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 452 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 299968 | 0x7fe74d4cf400 | 0x7fe74a426500 | 8345976 | 8220667 | 106412747 | 65536 | 804824536 | 1043246 | 1043246 | 79648662.0 | 76061115.0 | 463.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150051772760 | 16152867122912 | 16152867784516 | 16150052708919 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 455 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 302720 | 0x7fe74d4cf280 | 0x7fe74a426540 | 5777800 | 5651936 | 73216952 | 65536 | 546690132 | 722224 | 722224 | 66836826.0 | 64058712.0 | 716.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150052983463 | 16152867853636 | 16152868300999 | 16150053700307 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 458 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 305472 | 0x7fe74d4cf100 | 0x7fe74a426580 | 5789552 | 5670894 | 73426550 | 65536 | 551340219 | 723693 | 723693 | 67610396.0 | 58608738.0 | 758.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150053967281 | 16152868363879 | 16152868827882 | 16150054660146 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 461 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 308224 | 0x7fe74ae2ff80 | 0x7fe74a4265c0 | 5768840 | 5655845 | 73240238 | 65536 | 556287167 | 721104 | 721104 | 67589138.0 | 58578845.0 | 820.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150054925750 | 16152868898762 | 16152869359725 | 16150055658024 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 464 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 310976 | 0x7fe74ae2fe00 | 0x7fe74a426600 | 5815648 | 5691790 | 73694185 | 65536 | 555545649 | 726955 | 726955 | 66867326.0 | 64061622.0 | 700.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150055924528 | 16152869423406 | 16152869874928 | 16150056591723 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 467 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 313216 | 0x7fe74ae2fc80 | 0x7fe74a426640 | 10929976 | 10812215 | 140045190 | 65536 | 1061240538 | 1366246 | 1366246 | 90366692.0 | 86264773.0 | 362.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150056859437 | 16152869911089 | 16152870780054 | 16150057994632 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 470 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 315968 | 0x7fe74ae2fb00 | 0x7fe74a426680 | 10979008 | 10855033 | 140563688 | 65536 | 1081635781 | 1372375 | 1372375 | 88373544.0 | 84487583.0 | 342.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150058269026 | 16152870854134 | 16152871707099 | 16150059387491 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 473 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 318720 | 0x7fe74ae2f980 | 0x7fe74a4266c0 | 10978352 | 10863387 | 140666105 | 65536 | 1076923641 | 1372293 | 1372293 | 91317948.0 | 81097970.0 | 386.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150059653125 | 16152871773340 | 16152872657345 | 16150060763841 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 476 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 321472 | 0x7fe74ae2f800 | 0x7fe74a426700 | 10974272 | 10846512 | 140523391 | 65536 | 1075100765 | 1371783 | 1371783 | 91808648.0 | 81523333.0 | 280.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150061026695 | 16152872727586 | 16152873607591 | 16150062184219 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 479 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 324224 | 0x7fe74ae2f680 | 0x7fe74a426740 | 10999040 | 10879393 | 140962790 | 65536 | 1081331346 | 1374879 | 1374879 | 88742218.0 | 84807407.0 | 287.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150062448193 | 16152873677671 | 16152874536397 | 16150063615738 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 482 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 326464 | 0x7fe74d4cf500 | 0x7fe74a426780 | 21308208 | 21183790 | 274493525 | 65536 | 2130513375 | 2663525 | 2663525 | 106173846.0 | 101224077.0 | 192.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150063881142 | 16152874603917 | 16152876302327 | 16150065871518 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 485 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 329216 | 0x7fe74d4cf380 | 0x7fe74a4267c0 | 21341376 | 21228279 | 275124288 | 65536 | 2128373975 | 2667671 | 2667671 | 119928220.0 | 114633342.0 | 174.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150066105132 | 16152876402328 | 16152878066338 | 16150068065129 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 488 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 331968 | 0x7fe74d4cf200 | 0x7fe74a426800 | 21370792 | 21239094 | 275242436 | 65536 | 2129072385 | 2671348 | 2671348 | 122982504.0 | 111284898.0 | 215.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150068288514 | 16152878159939 | 16152879883949 | 16150070321169 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 491 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 12 | 24 | 334720 | 0x7fe74d4cf080 | 0x7fe74a426840 | 21347696 | 21224023 | 275032892 | 65536 | 2123793505 | 2668461 | 2668461 | 127223424.0 | 115310390.0 | 251.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 16150070530184 | 16152879977390 | 16152881692920 | 16150072572669 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 494 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 337472 | 0x7fe74ae2ff00 | 0x7fe74a426880 | 21393656 | 21258001 | 275460692 | 65536 | 2127874713 | 2674206 | 2674206 | 119930134.0 | 114631505.0 | 160.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150072789985 | 16152881779321 | 16152883454211 | 16150074790750 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 497 | 148935 | 148935 | 4194304 | 256 | 0 | 0 | 8 | 24 | 0 | 0x7fe74ae2fd80 | 0x7fe74a4268c0 | 42043352 | 41923906 | 543378794 | 65536 | 4234933995 | 5255418 | 5255418 | 103101520.0 | 98270320.0 | 60.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 16150075006726 | 16152883541891 | 16152886898872 | 16150078687914 |