50 KiB
50 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | SQ_CYCLES | SQ_BUSY_CYCLES | SQ_WAVES | GRBM_COUNT | GRBM_GUI_ACTIVE | CPF_CPF_STAT_BUSY | CPF_CPF_STAT_STALL | CPF_CPF_TCIU_BUSY | CPF_CPF_TCIU_STALL | CPF_CPF_STAT_IDLE | CPF_CPF_TCIU_IDLE | CPF_CMP_UTCL1_STALL_ON_TRANSLATION | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBuffer.kd | 0 | 0 | 0 | 869685 | 869690 | 33554432 | 256 | 0 | 0 | 8 | 32 | 6464 | 0x0 | 0x7fdec2404180 | 4040784 | 3830766 | 524288 | 505097 | 505097 | 505097 | 0 | 4178 | 0 | 0 | 482832 | 0 | 12075315587403796 | 12075319737980510 | 12075319738305467 | 12075315835499200 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 869685 | 869690 | 32768 | 256 | 0 | 0 | 24 | 24 | 12480 | 0x0 | 0x7fdec2435100 | 219112 | 74883 | 512 | 27388 | 27388 | 27388 | 0 | 2343 | 0 | 0 | 26941 | 0 | 12075315849868370 | 12075319753341463 | 12075319753348183 | 12075315850220494 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 4 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 12928 | 0x7fdfcdfa6900 | 0x7fdec2435140 | 1809384 | 1646053 | 65536 | 226172 | 226172 | 226172 | 0 | 2355 | 0 | 0 | 215251 | 0 | 12075315850313286 | 12075319753399863 | 12075319753530422 | 12075315850719822 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 6 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 36 | 24 | 13632 | 0x7fdfcdfa6800 | 0x7fdec2435180 | 3169936 | 3005273 | 65536 | 396241 | 396241 | 396241 | 0 | 2347 | 0 | 0 | 392231 | 0 | 12075315850814648 | 12075319753574262 | 12075319753821780 | 12075315851358448 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 8 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 14080 | 0x7fdfcdfa6700 | 0x7fdec24351c0 | 3189352 | 3021400 | 65536 | 398668 | 398668 | 398668 | 0 | 2359 | 0 | 0 | 398419 | 0 | 12075315851432586 | 12075319753956658 | 12075319754204016 | 12075315851956329 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 10 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14528 | 0x7fdfcdfa6600 | 0x7fdec2435200 | 1793488 | 1638838 | 65536 | 224185 | 224185 | 224185 | 0 | 2434 | 0 | 0 | 217278 | 0 | 12075315851984842 | 12075319754338095 | 12075319754463374 | 12075315852385677 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 12 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14976 | 0x7fdfcdfa6500 | 0x7fdec2435240 | 1717408 | 1550057 | 65536 | 214675 | 214675 | 214675 | 0 | 2327 | 0 | 0 | 220394 | 0 | 12075315852437914 | 12075319754493134 | 12075319754618413 | 12075315852816578 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 14 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 15424 | 0x7fdfcdfa6400 | 0x7fdec2435280 | 1714320 | 1556658 | 65536 | 214289 | 214289 | 214289 | 0 | 2318 | 0 | 0 | 209547 | 0 | 12075315852896025 | 12075319754675693 | 12075319754799852 | 12075315853264290 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 16 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16128 | 0x7fdfcdfa6300 | 0x7fdec24352c0 | 3257312 | 3078800 | 65536 | 407163 | 407163 | 407163 | 0 | 2505 | 0 | 0 | 397597 | 0 | 12075315853313741 | 12075319754833611 | 12075319755084649 | 12075315853868222 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 18 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16832 | 0x7fdfcdfa6a00 | 0x7fdec2435300 | 3180112 | 3016507 | 65536 | 397513 | 397513 | 397513 | 0 | 2500 | 0 | 0 | 398451 | 158 | 12075315853890914 | 12075319755219048 | 12075319755463206 | 12075315854451385 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 20 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 17536 | 0x7fdfcdfa6900 | 0x7fdec2435340 | 1737576 | 1575579 | 65536 | 217196 | 217196 | 217196 | 0 | 2380 | 0 | 0 | 208961 | 0 | 12075315854480008 | 12075319755598085 | 12075319755723364 | 12075315854860956 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 22 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18240 | 0x7fdfcdfa6800 | 0x7fdec2435380 | 1771152 | 1616521 | 65536 | 221393 | 221393 | 221393 | 0 | 2338 | 0 | 0 | 212395 | 0 | 12075315854910338 | 12075319755752164 | 12075319755877123 | 12075315855297958 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 24 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18944 | 0x7fdfcdfa6700 | 0x7fdec24353c0 | 1694304 | 1525377 | 65536 | 211787 | 211787 | 211787 | 0 | 2410 | 0 | 0 | 212491 | 0 | 12075315855357649 | 12075319755920322 | 12075319756045121 | 12075315855730422 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 26 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 19904 | 0x7fdfcdfa6600 | 0x7fdec2435400 | 3229712 | 3064323 | 65536 | 403713 | 403713 | 403713 | 0 | 2529 | 0 | 0 | 405416 | 0 | 12075315855780796 | 12075319756075201 | 12075319756325759 | 12075315856338081 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 28 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 20608 | 0x7fdfcdfa6500 | 0x7fdec2435440 | 3643816 | 3004078 | 65536 | 455476 | 455476 | 455476 | 158 | 2510 | 0 | 0 | 390636 | 0 | 12075315856367586 | 12075319756460158 | 12075319756708796 | 12075315856941753 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 30 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 21312 | 0x7fdfcdfa6400 | 0x7fdec2435480 | 1718160 | 1552053 | 65536 | 214769 | 214769 | 214769 | 0 | 2281 | 0 | 0 | 216843 | 0 | 12075315856976567 | 12075319756839835 | 12075319756964794 | 12075315857370039 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 32 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22016 | 0x7fdfcdfa6300 | 0x7fdec24354c0 | 1740064 | 1576789 | 65536 | 217507 | 217507 | 217507 | 0 | 2389 | 0 | 0 | 216268 | 0 | 12075315857418979 | 12075319756996954 | 12075319757121913 | 12075315857806269 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 34 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22720 | 0x7fdfcdfa6a00 | 0x7fdec2435500 | 1707024 | 1546990 | 65536 | 213377 | 213377 | 213377 | 0 | 2296 | 0 | 0 | 210444 | 0 | 12075315857865810 | 12075319757164152 | 12075319757288151 | 12075315858249563 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 36 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 23680 | 0x7fdfcdfa6900 | 0x7fdec2435540 | 3221352 | 3046256 | 65536 | 402668 | 402668 | 402668 | 0 | 2501 | 0 | 0 | 392492 | 0 | 12075315858300477 | 12075319757318231 | 12075319757567189 | 12075315858849608 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 38 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 24384 | 0x7fdfcdfa6800 | 0x7fdec2435580 | 3144592 | 2984630 | 65536 | 393073 | 393073 | 393073 | 0 | 2566 | 0 | 0 | 400764 | 158 | 12075315858879614 | 12075319757699028 | 12075319757981906 | 12075315859423314 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 40 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25088 | 0x7fdfcdfa6700 | 0x7fdec24355c0 | 1765672 | 1607605 | 65536 | 220708 | 220708 | 220708 | 0 | 2305 | 0 | 0 | 214500 | 0 | 12075315859450855 | 12075319758051665 | 12075319758176304 | 12075315859834117 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 42 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25792 | 0x7fdfcdfa6600 | 0x7fdec2435600 | 1661584 | 1501547 | 65536 | 207697 | 207697 | 207697 | 0 | 2249 | 0 | 0 | 215644 | 0 | 12075315859883369 | 12075319758209904 | 12075319758334863 | 12075315860264657 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 44 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 26496 | 0x7fdfcdfa6500 | 0x7fdec2435640 | 1722976 | 1564474 | 65536 | 215371 | 215371 | 215371 | 0 | 2347 | 0 | 0 | 215499 | 0 | 12075315860323707 | 12075319758378382 | 12075319758502221 | 12075315860695518 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 46 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 27712 | 0x7fdfcdfa6400 | 0x7fdec2435680 | 3255760 | 3066445 | 65536 | 406969 | 406969 | 406969 | 0 | 2468 | 0 | 0 | 399867 | 158 | 12075315860737576 | 12075319758533741 | 12075319758779179 | 12075315861290153 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 48 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 28416 | 0x7fdfcdfa6300 | 0x7fdec24356c0 | 3170080 | 2999943 | 65536 | 396259 | 396259 | 396259 | 0 | 2530 | 0 | 0 | 403444 | 158 | 12075315861315460 | 12075319758864778 | 12075319759114376 | 12075315861865602 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 50 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 29120 | 0x7fdfcdfa6a00 | 0x7fdec2435700 | 1674640 | 1520178 | 65536 | 209329 | 209329 | 209329 | 0 | 2306 | 0 | 0 | 216155 | 0 | 12075315861892973 | 12075319759209256 | 12075319759333095 | 12075315862275694 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 52 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30080 | 0x7fdfcdfa6900 | 0x7fdec2435740 | 1770528 | 1606221 | 65536 | 221315 | 221315 | 221315 | 0 | 2371 | 0 | 0 | 211010 | 0 | 12075315862325356 | 12075319759364774 | 12075319759489253 | 12075315862709420 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 54 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30784 | 0x7fdfcdfa6800 | 0x7fdec2435780 | 1749392 | 1585041 | 65536 | 218673 | 218673 | 218673 | 0 | 2381 | 0 | 0 | 210845 | 0 | 12075315862767638 | 12075319759529893 | 12075319759654852 | 12075315863136984 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 56 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32000 | 0x7fdfcdfa6700 | 0x7fdec24357c0 | 3228640 | 3056689 | 65536 | 403579 | 403579 | 403579 | 0 | 2456 | 0 | 0 | 393939 | 0 | 12075315863188350 | 12075319759687812 | 12075319759935810 | 12075315863733292 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 58 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32960 | 0x7fdfcdfa6600 | 0x7fdec2435800 | 3200208 | 3016637 | 65536 | 400025 | 400025 | 400025 | 0 | 2507 | 0 | 0 | 405570 | 0 | 12075315863765382 | 12075319760050849 | 12075319760294207 | 12075315864306187 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 60 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 33920 | 0x7fdfcdfa6500 | 0x7fdec2435840 | 1672160 | 1505023 | 65536 | 209019 | 209019 | 209019 | 0 | 2340 | 0 | 0 | 210084 | 0 | 12075315864329931 | 12075319760430366 | 12075319760555165 | 12075315864712031 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 62 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 34880 | 0x7fdfcdfa6400 | 0x7fdec2435880 | 1770192 | 1603134 | 65536 | 221273 | 221273 | 221273 | 0 | 2290 | 0 | 0 | 213014 | 0 | 12075315864761353 | 12075319760583164 | 12075319760708763 | 12075315865152028 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 64 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 35840 | 0x7fdfcdfa6300 | 0x7fdec24358c0 | 1675040 | 1515023 | 65536 | 209379 | 209379 | 209379 | 0 | 2352 | 0 | 0 | 214578 | 0 | 12075315865211409 | 12075319760750043 | 12075319760874362 | 12075315865578982 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 66 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 37312 | 0x7fdfcdfa6a00 | 0x7fdec2435900 | 3113680 | 2925202 | 65536 | 389209 | 389209 | 389209 | 0 | 2239 | 0 | 0 | 393465 | 0 | 12075315865626741 | 12075319760903962 | 12075319761151000 | 12075315866171663 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 68 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 38272 | 0x7fdfcdfa6900 | 0x7fdec2435940 | 3640872 | 3013320 | 65536 | 455108 | 455108 | 455108 | 158 | 2299 | 0 | 0 | 392748 | 0 | 12075315866196539 | 12075319761269719 | 12075319761513717 | 12075315866783119 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 70 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 39232 | 0x7fdfcdfa6800 | 0x7fdec2435980 | 1741392 | 1569335 | 65536 | 217673 | 217673 | 217673 | 0 | 2275 | 0 | 0 | 208551 | 0 | 12075315866820739 | 12075319761624916 | 12075319761750355 | 12075315867205634 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 72 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 40192 | 0x7fdfcdfa6700 | 0x7fdec24359c0 | 1791840 | 1633357 | 65536 | 223979 | 223979 | 223979 | 0 | 2351 | 0 | 0 | 215906 | 0 | 12075315867247572 | 12075319761780435 | 12075319761906673 | 12075315867644670 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 74 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 41152 | 0x7fdfcdfa6600 | 0x7fdec2435a00 | 1742032 | 1580598 | 65536 | 217753 | 217753 | 217753 | 0 | 4425 | 0 | 0 | 214796 | 0 | 12075315867702437 | 12075319761948273 | 12075319762072432 | 12075315868078666 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 76 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 42624 | 0x7fdfcdfa6500 | 0x7fdec2435a40 | 3198624 | 3027370 | 65536 | 399827 | 399827 | 399827 | 0 | 2530 | 0 | 0 | 396804 | 0 | 12075315868128289 | 12075319762101072 | 12075319762350030 | 12075315868679603 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 78 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 43584 | 0x7fdfcdfa6400 | 0x7fdec2435a80 | 3139600 | 2974211 | 65536 | 392449 | 392449 | 392449 | 0 | 2490 | 0 | 0 | 395918 | 0 | 12075315868707275 | 12075319762398189 | 12075319762642187 | 12075315869260813 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 80 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 44544 | 0x7fdfcdfa6300 | 0x7fdec2435ac0 | 1715880 | 1558726 | 65536 | 214484 | 214484 | 214484 | 0 | 2264 | 0 | 0 | 213692 | 0 | 12075315869284387 | 12075319762686187 | 12075319762811146 | 12075315869693347 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 82 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 45760 | 0x7fdfcdfa6a00 | 0x7fdec2435b00 | 1817488 | 1658876 | 65536 | 227185 | 227185 | 227185 | 0 | 2321 | 0 | 0 | 221438 | 0 | 12075315869743690 | 12075319762840266 | 12075319762965705 | 12075315870082821 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 84 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 46720 | 0x7fdfcdfa6900 | 0x7fdec2435b40 | 1662248 | 1502006 | 65536 | 207780 | 207780 | 207780 | 0 | 2297 | 0 | 0 | 219443 | 0 | 12075315870145487 | 12075319763007624 | 12075319763132423 | 12075315870453700 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 86 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 48448 | 0x7fdfcdfa6800 | 0x7fdec2435b80 | 3208208 | 3029913 | 65536 | 401025 | 401025 | 401025 | 0 | 2493 | 0 | 0 | 413315 | 0 | 12075315870504013 | 12075319763162183 | 12075319763412261 | 12075315870989235 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 88 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 49408 | 0x7fdfcdfa6700 | 0x7fdec2435bc0 | 3684008 | 3027904 | 65536 | 460500 | 460500 | 460500 | 158 | 2474 | 0 | 0 | 400634 | 0 | 12075315871024430 | 12075319763457221 | 12075319763739778 | 12075315871551790 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 90 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 50368 | 0x7fdfcdfa6600 | 0x7fdec2435c00 | 1672464 | 1500763 | 65536 | 209057 | 209057 | 209057 | 0 | 2326 | 0 | 0 | 218189 | 0 | 12075315871581736 | 12075319763786178 | 12075319763911137 | 12075315871900859 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 92 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 51584 | 0x7fdfcdfa6500 | 0x7fdec2435c40 | 1754336 | 1589687 | 65536 | 219291 | 219291 | 219291 | 0 | 2315 | 0 | 0 | 219284 | 0 | 12075315871951823 | 12075319763944897 | 12075319764071776 | 12075315872283289 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 94 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 52544 | 0x7fdfcdfa6400 | 0x7fdec2435c80 | 1657360 | 1494334 | 65536 | 207169 | 207169 | 207169 | 0 | 2291 | 0 | 0 | 217406 | 0 | 12075315872344593 | 12075319764114495 | 12075319764239934 | 12075315872649650 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 96 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 54272 | 0x7fdfcdfa6300 | 0x7fdec2435cc0 | 3251936 | 3086276 | 65536 | 406491 | 406491 | 406491 | 0 | 2453 | 0 | 0 | 409653 | 0 | 12075315872698331 | 12075319764268574 | 12075319764522172 | 12075315873184554 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 98 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 55488 | 0x7fdfcdfa6a00 | 0x7fdec2435d00 | 3146256 | 2963988 | 65536 | 393281 | 393281 | 393281 | 0 | 2477 | 0 | 0 | 398124 | 0 | 12075315873208398 | 12075319764567132 | 12075319764811770 | 12075315873700913 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 100 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 56704 | 0x7fdfcdfa6900 | 0x7fdec2435d40 | 1772712 | 1613211 | 65536 | 221588 | 221588 | 221588 | 0 | 4143 | 0 | 0 | 209665 | 0 | 12075315873725409 | 12075319764860249 | 12075319764984728 | 12075315874054200 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 102 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 58176 | 0x7fdfcdfa6800 | 0x7fdec2435d80 | 1782736 | 1624303 | 65536 | 222841 | 222841 | 222841 | 0 | 2497 | 0 | 0 | 225310 | 0 | 12075315874103231 | 12075319765018008 | 12075319765146487 | 12075315874429698 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 104 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 59392 | 0x7fdfcdfa6700 | 0x7fdec2435dc0 | 1716072 | 1555585 | 65536 | 214508 | 214508 | 214508 | 0 | 2332 | 0 | 0 | 209489 | 0 | 12075315874489068 | 12075319765189207 | 12075319765314486 | 12075315874799495 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 106 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 61376 | 0x7fdfcdfa6600 | 0x7fdec2435e00 | 3143376 | 2966495 | 65536 | 392921 | 392921 | 392921 | 0 | 2328 | 0 | 0 | 416211 | 0 | 12075315874849157 | 12075319765346325 | 12075319765598163 | 12075315875332685 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 108 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 62592 | 0x7fdfcdfa6500 | 0x7fdec2435e40 | 3194024 | 3016831 | 65536 | 399252 | 399252 | 399252 | 0 | 2537 | 0 | 0 | 394249 | 0 | 12075315875355508 | 12075319765641843 | 12075319765886321 | 12075315875856218 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 110 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 63808 | 0x7fdfcdfa6400 | 0x7fdec2435e80 | 1699856 | 1543353 | 65536 | 212481 | 212481 | 212481 | 0 | 2298 | 0 | 0 | 268359 | 0 | 12075315875887607 | 12075319765932561 | 12075319766058000 | 12075315876207301 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 112 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 65280 | 0x7fdfcdfa6300 | 0x7fdec2435ec0 | 1812128 | 1641405 | 65536 | 226515 | 226515 | 226515 | 0 | 2374 | 0 | 0 | 226059 | 0 | 12075315876258145 | 12075319766085679 | 12075319766215758 | 12075315876586746 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 114 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 66496 | 0x7fdfcdfa6a00 | 0x7fdec2435f00 | 1699792 | 1540917 | 65536 | 212473 | 212473 | 212473 | 0 | 4736 | 0 | 0 | 215804 | 0 | 12075315876648390 | 12075319766258478 | 12075319766383757 | 12075315876952215 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 116 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 68480 | 0x7fdfcdfa6900 | 0x7fdec2435f40 | 3266216 | 3062308 | 65536 | 408276 | 408276 | 408276 | 0 | 2474 | 0 | 0 | 415635 | 0 | 12075315877007377 | 12075319766416397 | 12075319766670795 | 12075315877496246 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 118 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 69696 | 0x7fdfcdfa6800 | 0x7fdec2435f80 | 3151504 | 2969093 | 65536 | 393937 | 393937 | 393937 | 0 | 2471 | 0 | 0 | 397002 | 0 | 12075315877519168 | 12075319766710954 | 12075319766955112 | 12075315878016021 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 120 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 70912 | 0x7fdfcdfa6700 | 0x7fdec2435fc0 | 1724008 | 1562109 | 65536 | 215500 | 215500 | 215500 | 0 | 2371 | 0 | 0 | 278390 | 0 | 12075315878039325 | 12075319766994152 | 12075319767118951 | 12075315878374147 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 122 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 72384 | 0x7fdfcdfa6600 | 0x7fdec2436000 | 1798736 | 1629771 | 65536 | 224841 | 224841 | 224841 | 0 | 2462 | 0 | 0 | 221798 | 0 | 12075315878423659 | 12075319767146791 | 12075319767284230 | 12075315878752350 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 124 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 73600 | 0x7fdfcdfa6500 | 0x7fdec2436040 | 1688168 | 1527612 | 65536 | 211020 | 211020 | 211020 | 0 | 2471 | 0 | 0 | 208450 | 0 | 12075315878812862 | 12075319767328229 | 12075319767452868 | 12075315879116186 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 126 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 75840 | 0x7fdfcdfa6400 | 0x7fdec2436080 | 3209744 | 3030054 | 65536 | 401217 | 401217 | 401217 | 0 | 2513 | 0 | 0 | 408694 | 0 | 12075315879166048 | 12075319767481668 | 12075319767739746 | 12075315879652833 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 128 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 77056 | 0x7fdfcdfa6300 | 0x7fdec24360c0 | 3202016 | 3024999 | 65536 | 400251 | 400251 | 400251 | 0 | 2516 | 0 | 0 | 392940 | 158 | 12075315879682949 | 12075319767784065 | 12075319768030143 | 12075315880163953 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 130 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 78272 | 0x7fdfcdfa6a00 | 0x7fdec2436100 | 1704336 | 1547526 | 65536 | 213041 | 213041 | 213041 | 0 | 2347 | 0 | 0 | 208865 | 0 | 12075315880190843 | 12075319768082943 | 12075319768207902 | 12075315880514183 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 132 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 80000 | 0x7fdfcdfa6900 | 0x7fdec2436140 | 1869088 | 1685810 | 65536 | 233635 | 233635 | 233635 | 0 | 2347 | 0 | 0 | 229472 | 0 | 12075315880564357 | 12075319768238622 | 12075319768381021 | 12075315880890012 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 134 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 81216 | 0x7fdfcdfa6800 | 0x7fdec2436180 | 1685840 | 1522231 | 65536 | 210729 | 210729 | 210729 | 0 | 2290 | 0 | 0 | 217990 | 0 | 12075315880953109 | 12075319768423580 | 12075319768548859 | 12075315881263335 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 136 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 83456 | 0x7fdfcdfa6700 | 0x7fdec24361c0 | 3262504 | 3066923 | 65536 | 407812 | 407812 | 407812 | 0 | 2542 | 0 | 0 | 412569 | 0 | 12075315881311836 | 12075319768577819 | 12075319768831417 | 12075315881801866 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 138 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 84928 | 0x7fdfcdfa6600 | 0x7fdec2436200 | 3140432 | 2955754 | 65536 | 392553 | 392553 | 392553 | 0 | 2466 | 0 | 0 | 399940 | 0 | 12075315881829628 | 12075319768877816 | 12075319769124054 | 12075315882322704 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 140 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 86400 | 0x7fdfcdfa6500 | 0x7fdec2436240 | 1788712 | 1627335 | 65536 | 223588 | 223588 | 223588 | 0 | 2273 | 0 | 0 | 225217 | 0 | 12075315882345787 | 12075319769174934 | 12075319769300533 | 12075315882683063 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 142 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 88128 | 0x7fdfcdfa6400 | 0x7fdec2436280 | 1915728 | 1760416 | 65536 | 239465 | 239465 | 239465 | 0 | 2337 | 0 | 0 | 239501 | 0 | 12075315882731523 | 12075319769330293 | 12075319769478612 | 12075315883066065 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 144 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 89600 | 0x7fdfcdfa6300 | 0x7fdec24362c0 | 1721504 | 1558477 | 65536 | 215187 | 215187 | 215187 | 0 | 2346 | 0 | 0 | 220439 | 0 | 12075315883148378 | 12075319769585811 | 12075319769711410 | 12075315883453776 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 146 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 92096 | 0x7fdfcdfa6a00 | 0x7fdec2436300 | 3258256 | 3075537 | 65536 | 407281 | 407281 | 407281 | 0 | 2482 | 0 | 0 | 415160 | 0 | 12075315883502035 | 12075319769743409 | 12075319770004687 | 12075315883987407 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 148 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 93568 | 0x7fdfcdfa6900 | 0x7fdec2436340 | 3159584 | 2978848 | 65536 | 394947 | 394947 | 394947 | 0 | 2458 | 0 | 0 | 388300 | 0 | 12075315884015519 | 12075319770059407 | 12075319770307085 | 12075315884502795 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 150 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 95040 | 0x7fdfcdfa6800 | 0x7fdec2436380 | 1674320 | 1504545 | 65536 | 209289 | 209289 | 209289 | 0 | 2275 | 0 | 0 | 222023 | 0 | 12075315884530506 | 12075319770360044 | 12075319770485323 | 12075315884855260 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 152 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 96768 | 0x7fdfcdfa6700 | 0x7fdec24363c0 | 2013032 | 1851755 | 65536 | 251628 | 251628 | 251628 | 0 | 2354 | 0 | 0 | 250938 | 0 | 12075315884903028 | 12075319770515723 | 12075319770671722 | 12075315885251967 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 154 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 98240 | 0x7fdfcdfa6600 | 0x7fdec2436400 | 1740368 | 1574023 | 65536 | 217545 | 217545 | 217545 | 0 | 2334 | 0 | 0 | 217470 | 0 | 12075315885316567 | 12075319770717161 | 12075319770842280 | 12075315885626833 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 156 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 100736 | 0x7fdfcdfa6500 | 0x7fdec2436440 | 3258208 | 3061829 | 65536 | 407275 | 407275 | 407275 | 0 | 2324 | 0 | 0 | 411828 | 0 | 12075315885674281 | 12075319770872200 | 12075319771129158 | 12075315886163090 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 158 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 102208 | 0x7fdfcdfa6400 | 0x7fdec2436480 | 3149840 | 2982199 | 65536 | 393729 | 393729 | 393729 | 0 | 2373 | 0 | 0 | 398782 | 0 | 12075315886187044 | 12075319771178438 | 12075319771457795 | 12075315886679099 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 160 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 103680 | 0x7fdfcdfa6300 | 0x7fdec24364c0 | 1779104 | 1614984 | 65536 | 222387 | 222387 | 222387 | 0 | 2326 | 0 | 0 | 221933 | 0 | 12075315886703564 | 12075319771546595 | 12075319771672354 | 12075315887040039 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 162 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 105664 | 0x7fdfcdfa6a00 | 0x7fdec2436500 | 2101456 | 1941218 | 65536 | 262681 | 262681 | 262681 | 0 | 2315 | 0 | 0 | 262303 | 0 | 12075315887089672 | 12075319771702753 | 12075319771868352 | 12075315887447937 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 164 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 107136 | 0x7fdfcdfa6900 | 0x7fdec2436540 | 1689704 | 1525493 | 65536 | 211212 | 211212 | 211212 | 0 | 2310 | 0 | 0 | 220903 | 0 | 12075315887509421 | 12075319771915712 | 12075319772041311 | 12075315887815751 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 166 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 109888 | 0x7fdfcdfa6800 | 0x7fdec2436580 | 3278032 | 3092273 | 65536 | 409753 | 409753 | 409753 | 0 | 2535 | 0 | 0 | 404114 | 0 | 12075315887863790 | 12075319772068990 | 12075319772323228 | 12075315888364260 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 168 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 111360 | 0x7fdfcdfa6700 | 0x7fdec24365c0 | 3623080 | 2991650 | 65536 | 452884 | 452884 | 452884 | 158 | 2518 | 0 | 0 | 401253 | 0 | 12075315888387643 | 12075319772369468 | 12075319772620666 | 12075315888919942 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 170 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 112832 | 0x7fdfcdfa6600 | 0x7fdec2436600 | 1782224 | 1610564 | 65536 | 222777 | 222777 | 222777 | 0 | 2358 | 0 | 0 | 224183 | 0 | 12075315888943656 | 12075319772669626 | 12075319772794744 | 12075315889278719 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 172 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 114816 | 0x7fdfcdfa6500 | 0x7fdec2436640 | 2203688 | 2040406 | 65536 | 275460 | 275460 | 275460 | 0 | 2376 | 0 | 0 | 273836 | 0 | 12075315889328672 | 12075319772823064 | 12075319772995063 | 12075315889685134 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 174 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 116288 | 0x7fdfcdfa6400 | 0x7fdec2436680 | 1746896 | 1579678 | 65536 | 218361 | 218361 | 218361 | 0 | 2331 | 0 | 0 | 214030 | 0 | 12075315889749664 | 12075319773038582 | 12075319773164341 | 12075315890056494 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 176 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 119040 | 0x7fdfcdfa6300 | 0x7fdec24366c0 | 3256040 | 3053749 | 65536 | 407004 | 407004 | 407004 | 0 | 2436 | 0 | 0 | 401940 | 0 | 12075315890104754 | 12075319773195861 | 12075319773455379 | 12075315890594734 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 178 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 120768 | 0x7fdfcdfa6a00 | 0x7fdec2436700 | 3145872 | 2975449 | 65536 | 393233 | 393233 | 393233 | 0 | 2439 | 0 | 0 | 396925 | 0 | 12075315890618358 | 12075319773501779 | 12075319773748817 | 12075315891105283 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 180 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 122496 | 0x7fdfcdfa6900 | 0x7fdec2436740 | 1755168 | 1589910 | 65536 | 219395 | 219395 | 219395 | 0 | 2307 | 0 | 0 | 224344 | 0 | 12075315891128476 | 12075319773794576 | 12075319773920175 | 12075315891458279 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 182 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 124736 | 0x7fdfcdfa6800 | 0x7fdec2436780 | 2304016 | 2146117 | 65536 | 288001 | 288001 | 288001 | 0 | 2537 | 0 | 0 | 288302 | 0 | 12075315891508001 | 12075319773947855 | 12075319774130094 | 12075315891871447 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 184 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 126464 | 0x7fdfcdfa6700 | 0x7fdec24367c0 | 1783400 | 1621132 | 65536 | 222924 | 222924 | 222924 | 0 | 2290 | 0 | 0 | 226669 | 0 | 12075315891934464 | 12075319774171533 | 12075319774297452 | 12075315892246333 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 186 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 20 | 24 | 127936 | 0x7fdfcdfa6600 | 0x7fdec2436800 | 3277456 | 3114069 | 65536 | 409681 | 409681 | 409681 | 0 | 2425 | 0 | 0 | 407978 | 0 | 12075315892295374 | 12075319774328812 | 12075319774591850 | 12075315892783802 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 188 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 129664 | 0x7fdfcdfa6500 | 0x7fdec2436840 | 3595944 | 2983415 | 65536 | 449492 | 449492 | 449492 | 158 | 2443 | 0 | 0 | 403305 | 0 | 12075315892805662 | 12075319774634569 | 12075319774880327 | 12075315893332481 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 190 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 131392 | 0x7fdfcdfa6400 | 0x7fdec2436880 | 1726928 | 1555361 | 65536 | 215865 | 215865 | 215865 | 0 | 2646 | 0 | 0 | 221427 | 0 | 12075315893356636 | 12075319774917767 | 12075319775044326 | 12075315893684756 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 192 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 133632 | 0x7fdfcdfa6300 | 0x7fdec24368c0 | 2848424 | 2249644 | 65536 | 356052 | 356052 | 356052 | 158 | 2372 | 0 | 0 | 352580 | 0 | 12075315893734288 | 12075319775076326 | 12075319775266884 | 12075315894185226 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 194 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 135360 | 0x7fdfcdfa6a00 | 0x7fdec2436900 | 1782544 | 1605987 | 65536 | 222817 | 222817 | 222817 | 0 | 2378 | 0 | 0 | 211404 | 0 | 12075315894220622 | 12075319775307524 | 12075319775434563 | 12075315894534856 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 196 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 20 | 24 | 136832 | 0x7fdfcdfa6900 | 0x7fdec2436940 | 3876768 | 3332759 | 65536 | 484595 | 484595 | 484595 | 158 | 4690 | 0 | 0 | 435515 | 0 | 12075315894582995 | 12075319775465283 | 12075319775746560 | 12075315895121896 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 198 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 138560 | 0x7fdfcdfa6800 | 0x7fdec2436980 | 3145872 | 2932143 | 65536 | 393233 | 393233 | 393233 | 0 | 2402 | 0 | 0 | 400195 | 0 | 12075315895144989 | 12075319775792640 | 12075319776043198 | 12075315895638506 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 200 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 140288 | 0x7fdfcdfa6700 | 0x7fdec24369c0 | 1809184 | 1647957 | 65536 | 226147 | 226147 | 226147 | 0 | 2363 | 0 | 0 | 222765 | 0 | 12075315895661800 | 12075319776093598 | 12075319776219196 | 12075315895997453 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 202 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 142784 | 0x7fdfcdfa6600 | 0x7fdec2436a00 | 2635984 | 2469422 | 65536 | 329497 | 329497 | 329497 | 0 | 2320 | 0 | 0 | 330020 | 0 | 12075315896047997 | 12075319776247196 | 12075319776455515 | 12075315896481743 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 204 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 144768 | 0x7fdfcdfa6500 | 0x7fdec2436a40 | 1816040 | 1651470 | 65536 | 227004 | 227004 | 227004 | 0 | 2327 | 0 | 0 | 225204 | 0 | 12075315896519884 | 12075319776513914 | 12075319776641433 | 12075315896831523 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 206 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 20 | 24 | 145728 | 0x7fdfcdfa6400 | 0x7fdec2436a80 | 3784400 | 3619910 | 65536 | 473049 | 473049 | 473049 | 0 | 2506 | 0 | 0 | 512223 | 158 | 12075315896884461 | 12075319776668633 | 12075319776974230 | 12075315897398667 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 208 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 147712 | 0x7fdfcdfa6300 | 0x7fdec2436ac0 | 3154216 | 2960287 | 65536 | 394276 | 394276 | 394276 | 0 | 2448 | 0 | 0 | 398366 | 0 | 12075315897439904 | 12075319777020630 | 12075319777270388 | 12075315897886734 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 210 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 149696 | 0x7fdfcdfa6a00 | 0x7fdec2436b00 | 1752592 | 1572698 | 65536 | 219073 | 219073 | 219073 | 0 | 4258 | 0 | 0 | 214497 | 0 | 12075315897931026 | 12075319777319028 | 12075319777446866 | 12075315898245751 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 212 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 152192 | 0x7fdfcdfa6900 | 0x7fdec2436b40 | 2849704 | 2690377 | 65536 | 356212 | 356212 | 356212 | 0 | 2436 | 0 | 0 | 355526 | 0 | 12075315898291796 | 12075319777477266 | 12075319777704784 | 12075315898721084 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 214 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 154176 | 0x7fdfcdfa6800 | 0x7fdec2436b80 | 1777680 | 1592381 | 65536 | 222209 | 222209 | 222209 | 0 | 2397 | 0 | 0 | 222800 | 0 | 12075315898782528 | 12075319777759984 | 12075319777887023 | 12075315899092244 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 216 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 20 | 24 | 155648 | 0x7fdfcdfa6700 | 0x7fdec2436bc0 | 4037864 | 3876945 | 65536 | 504732 | 504732 | 504732 | 0 | 2460 | 0 | 0 | 543404 | 158 | 12075315899142928 | 12075319777920783 | 12075319778248460 | 12075315899678854 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 218 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 157632 | 0x7fdfcdfa6600 | 0x7fdec2436c00 | 3139024 | 2961123 | 65536 | 392377 | 392377 | 392377 | 0 | 2451 | 0 | 0 | 396251 | 0 | 12075315899722395 | 12075319778293260 | 12075319778541258 | 12075315900170928 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 220 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 159616 | 0x7fdfcdfa6500 | 0x7fdec2436c40 | 1787424 | 1627018 | 65536 | 223427 | 223427 | 223427 | 0 | 2312 | 0 | 0 | 228658 | 0 | 12075315900213507 | 12075319778582857 | 12075319778711016 | 12075315900525778 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 222 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 162368 | 0x7fdfcdfa6400 | 0x7fdec2436c80 | 3053584 | 2892792 | 65536 | 381697 | 381697 | 381697 | 0 | 2506 | 0 | 0 | 427751 | 0 | 12075315900574047 | 12075319778743176 | 12075319778986694 | 12075315901023262 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 224 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 164608 | 0x7fdfcdfa6300 | 0x7fdec2436cc0 | 1819680 | 1654431 | 65536 | 227459 | 227459 | 227459 | 0 | 2276 | 0 | 0 | 222686 | 0 | 12075315901080568 | 12075319779044453 | 12075319779174212 | 12075315901396786 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 226 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 20 | 24 | 166336 | 0x7fdfcdfa6a00 | 0x7fdec2436d00 | 4588048 | 4424360 | 65536 | 573505 | 573505 | 573505 | 0 | 2442 | 0 | 0 | 571778 | 0 | 12075315901445036 | 12075319779201732 | 12075319779575969 | 12075315902019663 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 228 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 168576 | 0x7fdfcdfa6900 | 0x7fdec2436d40 | 3183712 | 2999084 | 65536 | 397963 | 397963 | 397963 | 0 | 2506 | 0 | 0 | 408103 | 0 | 12075315902067332 | 12075319779619809 | 12075319779871007 | 12075315902517869 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 230 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 170816 | 0x7fdfcdfa6800 | 0x7fdec2436d80 | 1780688 | 1616816 | 65536 | 222585 | 222585 | 222585 | 0 | 2298 | 0 | 0 | 217123 | 0 | 12075315902561280 | 12075319779923326 | 12075319780053405 | 12075315902868601 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 232 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 174080 | 0x7fdfcdfa6700 | 0x7fdec2436dc0 | 3478568 | 3312687 | 65536 | 434820 | 434820 | 434820 | 0 | 2292 | 0 | 0 | 433157 | 0 | 12075315902916550 | 12075319780085885 | 12075319780363323 | 12075315903407963 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 234 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 176576 | 0x7fdfcdfa6600 | 0x7fdec2436e00 | 1825296 | 1656842 | 65536 | 228161 | 228161 | 228161 | 0 | 2352 | 0 | 0 | 217185 | 0 | 12075315903463055 | 12075319780463482 | 12075319780599321 | 12075315903776628 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 236 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 20 | 24 | 177792 | 0x7fdfcdfa6500 | 0x7fdec2436e40 | 5166304 | 5004683 | 65536 | 645787 | 645787 | 645787 | 0 | 4253 | 0 | 0 | 675009 | 0 | 12075315903824026 | 12075319780629240 | 12075319781050837 | 12075315904455970 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 238 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 180288 | 0x7fdfcdfa6400 | 0x7fdec2436e80 | 3254480 | 3076317 | 65536 | 406809 | 406809 | 406809 | 0 | 4086 | 0 | 0 | 397458 | 0 | 12075315904497267 | 12075319781138356 | 12075319781399634 | 12075315904947574 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 240 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 182784 | 0x7fdfcdfa6300 | 0x7fdec2436ec0 | 1786336 | 1613291 | 65536 | 223291 | 223291 | 223291 | 0 | 2350 | 0 | 0 | 226529 | 0 | 12075315904990814 | 12075319781446514 | 12075319781581553 | 12075315905305409 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 242 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 186304 | 0x7fdfcdfa6a00 | 0x7fdec2436f00 | 4240912 | 3734948 | 65536 | 530113 | 530113 | 530113 | 158 | 2495 | 0 | 0 | 485508 | 158 | 12075315905354891 | 12075319781611312 | 12075319781924430 | 12075315905900024 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 244 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 189312 | 0x7fdfcdfa6900 | 0x7fdec2436f40 | 1952480 | 1785754 | 65536 | 244059 | 244059 | 244059 | 0 | 2316 | 0 | 0 | 239716 | 0 | 12075315905956519 | 12075319781983469 | 12075319782174348 | 12075315906288976 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 246 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 20 | 24 | 190784 | 0x7fdfcdfa6800 | 0x7fdec2436f80 | 6292112 | 6122417 | 65536 | 786513 | 786513 | 786513 | 0 | 4327 | 0 | 0 | 806421 | 0 | 12075315906337316 | 12075319782203148 | 12075319782718343 | 12075315907059007 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 248 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 193792 | 0x7fdfcdfa6700 | 0x7fdec2436fc0 | 3657248 | 3471055 | 65536 | 457155 | 457155 | 457155 | 0 | 2451 | 0 | 0 | 457902 | 0 | 12075315907104181 | 12075319782764263 | 12075319783065061 | 12075315907594823 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 250 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 196800 | 0x7fdfcdfa6600 | 0x7fdec2437000 | 1967696 | 1811449 | 65536 | 245961 | 245961 | 245961 | 0 | 2386 | 0 | 0 | 244996 | 0 | 12075315907646679 | 12075319783111940 | 12075319783266019 | 12075315907975941 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 252 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 201088 | 0x7fdfcdfa6500 | 0x7fdec2437040 | 4750560 | 4585272 | 65536 | 593819 | 593819 | 593819 | 0 | 2507 | 0 | 0 | 590381 | 0 | 12075315908029200 | 12075319783294019 | 12075319783679616 | 12075315908633893 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 254 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 204608 | 0x7fdfcdfa6400 | 0x7fdec2437080 | 2226192 | 2069831 | 65536 | 278273 | 278273 | 278273 | 0 | 2359 | 0 | 0 | 277114 | 0 | 12075315908688475 | 12075319783739615 | 12075319783914014 | 12075315909039136 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 256 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 20 | 24 | 206080 | 0x7fdfcdfa6300 | 0x7fdec24370c0 | 7414240 | 7243515 | 65536 | 926779 | 926779 | 926779 | 0 | 2327 | 0 | 0 | 922718 | 0 | 12075315909088608 | 12075319783941533 | 12075319784549848 | 12075315909908502 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 258 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 209600 | 0x7fdfcdfa6a00 | 0x7fdec2437100 | 4011856 | 3848541 | 65536 | 501481 | 501481 | 501481 | 0 | 2471 | 0 | 0 | 502684 | 0 | 12075315909953826 | 12075319784600888 | 12075319784931925 | 12075315910500201 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 260 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 213120 | 0x7fdfcdfa6900 | 0x7fdec2437140 | 2258408 | 2100363 | 65536 | 282300 | 282300 | 282300 | 0 | 2336 | 0 | 0 | 281005 | 0 | 12075315910523144 | 12075319784976885 | 12075319785154003 | 12075315910895576 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 262 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 218176 | 0x7fdfcdfa6800 | 0x7fdec2437180 | 5581456 | 5424385 | 65536 | 697681 | 697681 | 697681 | 0 | 2420 | 0 | 0 | 696483 | 0 | 12075315910944437 | 12075319785186323 | 12075319785640719 | 12075315911630001 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 264 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 222208 | 0x7fdfcdfa6700 | 0x7fdec24371c0 | 2513832 | 2354343 | 65536 | 314228 | 314228 | 314228 | 0 | 2298 | 0 | 0 | 310783 | 0 | 12075315911666909 | 12075319785699119 | 12075319785898637 | 12075315912084916 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 266 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 20 | 24 | 223936 | 0x7fdfcdfa6600 | 0x7fdec2437200 | 8513040 | 8356820 | 65536 | 1064129 | 1064129 | 1064129 | 0 | 4340 | 0 | 0 | 1062892 | 0 | 12075315912109792 | 12075319785941837 | 12075319786643911 | 12075315913048386 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 268 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 227968 | 0x7fdfcdfa6500 | 0x7fdec2437240 | 4571936 | 4394802 | 65536 | 571491 | 571491 | 571491 | 0 | 2434 | 0 | 0 | 565400 | 0 | 12075315913072130 | 12075319786701351 | 12075319787072548 | 12075315913684759 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 270 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 232000 | 0x7fdfcdfa6400 | 0x7fdec2437280 | 2535248 | 2377512 | 65536 | 316905 | 316905 | 316905 | 0 | 2317 | 0 | 0 | 316075 | 0 | 12075315913707040 | 12075319787118627 | 12075319787320386 | 12075315914147038 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 272 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 237824 | 0x7fdfcdfa6300 | 0x7fdec24372c0 | 6429224 | 6264254 | 65536 | 803652 | 803652 | 803652 | 0 | 2462 | 0 | 0 | 825517 | 0 | 12075315914170291 | 12075319787368865 | 12075319787893981 | 12075315914929872 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 274 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 242368 | 0x7fdfcdfa6a00 | 0x7fdec2437300 | 2790672 | 2633764 | 65536 | 348833 | 348833 | 348833 | 0 | 2461 | 0 | 0 | 399195 | 0 | 12075315914965839 | 12075319787951901 | 12075319788210298 | 12075315915418710 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 276 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 20 | 24 | 243584 | 0x7fdfcdfa6900 | 0x7fdec2437340 | 9672288 | 9494969 | 65536 | 1209035 | 1209035 | 1209035 | 0 | 2415 | 0 | 0 | 1204584 | 0 | 12075315915442254 | 12075319788262458 | 12075319789058612 | 12075315916479201 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 278 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 248128 | 0x7fdfcdfa6800 | 0x7fdec2437380 | 5097936 | 4933135 | 65536 | 637241 | 637241 | 637241 | 0 | 2443 | 0 | 0 | 637183 | 158 | 12075315916503376 | 12075319789105651 | 12075319789523568 | 12075315917160367 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 280 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 252672 | 0x7fdfcdfa6700 | 0x7fdec24373c0 | 2819944 | 2653711 | 65536 | 352492 | 352492 | 352492 | 0 | 2554 | 0 | 0 | 352588 | 0 | 12075315917190753 | 12075319789567887 | 12075319789791566 | 12075315917649666 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 282 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 259264 | 0x7fdfcdfa6600 | 0x7fdec2437400 | 7267216 | 7110963 | 65536 | 908401 | 908401 | 908401 | 0 | 2461 | 0 | 0 | 906583 | 0 | 12075315917675304 | 12075319789841325 | 12075319790436680 | 12075315918502340 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 284 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 264832 | 0x7fdfcdfa6500 | 0x7fdec2437440 | 3338472 | 3170744 | 65536 | 417308 | 417308 | 417308 | 0 | 2506 | 0 | 0 | 414882 | 0 | 12075315918539700 | 12075319790503240 | 12075319790770918 | 12075315919028508 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 286 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 20 | 24 | 266304 | 0x7fdfcdfa6400 | 0x7fdec2437480 | 11902416 | 11740618 | 65536 | 1487801 | 1487801 | 1487801 | 0 | 2465 | 0 | 0 | 1484318 | 0 | 12075315919051831 | 12075319790816197 | 12075319791799389 | 12075315920275896 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 288 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 28 | 24 | 271872 | 0x7fdfcdfa6300 | 0x7fdec24374c0 | 6227360 | 6054824 | 65536 | 778419 | 778419 | 778419 | 0 | 2429 | 0 | 0 | 778418 | 0 | 12075315920298528 | 12075319791845469 | 12075319792358585 | 12075315921062738 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 290 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 277440 | 0x7fdfcdfa6a00 | 0x7fdec2437500 | 3373072 | 3213750 | 65536 | 421633 | 421633 | 421633 | 0 | 2415 | 0 | 0 | 420803 | 0 | 12075315921092253 | 12075319792416984 | 12075319792687062 | 12075315921603783 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 292 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 12 | 24 | 279936 | 0x7fdfcdfa6900 | 0x7fdec2437540 | 9009696 | 8841419 | 65536 | 1126211 | 1126211 | 1126211 | 0 | 2442 | 0 | 0 | 1123923 | 0 | 12075315921626505 | 12075319792732822 | 12075319793469136 | 12075315922600736 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 294 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 286528 | 0x7fdfcdfa6800 | 0x7fdec2437580 | 3894800 | 3736105 | 65536 | 486849 | 486849 | 486849 | 0 | 2403 | 0 | 0 | 487515 | 0 | 12075315922639428 | 12075319793529935 | 12075319793845932 | 12075315923176005 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 296 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 20 | 24 | 288000 | 0x7fdfcdfa6700 | 0x7fdec24375c0 | 14145632 | 13977079 | 65536 | 1768203 | 1768203 | 1768203 | 0 | 2506 | 0 | 0 | 1765941 | 0 | 12075315923199138 | 12075319793890252 | 12075319795058243 | 12075315924604318 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 298 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 32 | 24 | 294592 | 0x7fdfcdfa6600 | 0x7fdec2437600 | 7342864 | 7172130 | 65536 | 917857 | 917857 | 917857 | 0 | 2456 | 0 | 0 | 915849 | 0 | 12075315924634284 | 12075319795114082 | 12075319795722717 | 12075315925480977 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 300 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 301184 | 0x7fdfcdfa6500 | 0x7fdec2437640 | 3957864 | 3760747 | 65536 | 494732 | 494732 | 494732 | 0 | 2496 | 0 | 0 | 531722 | 0 | 12075315925504682 | 12075319795770717 | 12075319796088634 | 12075315926066526 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 302 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 12 | 24 | 304960 | 0x7fdfcdfa6400 | 0x7fdec2437680 | 10690576 | 10526703 | 65536 | 1336321 | 1336321 | 1336321 | 0 | 2395 | 0 | 0 | 1335518 | 0 | 12075315926090280 | 12075319796140314 | 12075319797016627 | 12075315927204540 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 304 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 24 | 24 | 313600 | 0x7fdfcdfa6300 | 0x7fdec24376c0 | 5295456 | 4858194 | 65536 | 661931 | 661931 | 661931 | 158 | 4370 | 0 | 0 | 661579 | 0 | 12075315927239435 | 12075319797076466 | 12075319797487023 | 12075315927906615 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 306 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 20 | 24 | 314816 | 0x7fdfcdfa6a00 | 0x7fdec2437700 | 18663568 | 18495120 | 65536 | 2332945 | 2332945 | 2332945 | 0 | 2507 | 0 | 0 | 2331478 | 0 | 12075315927929347 | 12075319797535502 | 12075319799082050 | 12075315929727458 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 308 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 20 | 24 | 317568 | 0x7fdfcdfa6900 | 0x7fdec2437740 | 9617256 | 9436837 | 65536 | 1202156 | 1202156 | 1202156 | 158 | 2300 | 0 | 0 | 1197048 | 0 | 12075315929751182 | 12075319799131009 | 12075319799922683 | 12075315930789361 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 310 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 12 | 24 | 320320 | 0x7fdfcdfa6800 | 0x7fdec2437780 | 5118992 | 4951905 | 65536 | 639873 | 639873 | 639873 | 0 | 2456 | 0 | 0 | 638141 | 158 | 12075315930821731 | 12075319799992282 | 12075319800405079 | 12075315931464035 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 312 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 12 | 24 | 324096 | 0x7fdfcdfa6700 | 0x7fdec24377c0 | 14050720 | 13891670 | 65536 | 1756339 | 1756339 | 1756339 | 0 | 2376 | 0 | 0 | 1756290 | 0 | 12075315931494261 | 12075319800475316 | 12075319801633228 | 12075315932888321 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 314 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 12 | 24 | 328896 | 0x7fdfcdfa6600 | 0x7fdec2437800 | 9585168 | 9423228 | 65536 | 1198145 | 1198145 | 1198145 | 0 | 2428 | 0 | 0 | 1196429 | 0 | 12075315932928356 | 12075319801743146 | 12075319802527461 | 12075315933950164 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 316 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 20 | 24 | 330112 | 0x7fdfcdfa6500 | 0x7fdec2437840 | 36666728 | 36506837 | 65536 | 4583340 | 4583340 | 4583340 | 0 | 2560 | 0 | 0 | 4581013 | 0 | 12075315933980120 | 12075319802619460 | 12075319805670479 | 12075315937268310 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 318 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 20 | 24 | 332864 | 0x7fdfcdfa6400 | 0x7fdec2437880 | 18597072 | 18425477 | 65536 | 2324633 | 2324633 | 2324633 | 0 | 2570 | 0 | 0 | 2322087 | 0 | 12075315937295110 | 12075319805759758 | 12075319807300227 | 12075315939083833 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 320 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 12 | 24 | 335616 | 0x7fdfcdfa6300 | 0x7fdec24378c0 | 9607584 | 9452307 | 65536 | 1200947 | 1200947 | 1200947 | 0 | 2535 | 0 | 0 | 1199674 | 0 | 12075315939111385 | 12075319807394467 | 12075319808185021 | 12075315940139555 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 322 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 12 | 24 | 339392 | 0x7fdfcdfa6a00 | 0x7fdec2437900 | 27541072 | 27379842 | 65536 | 3442633 | 3442633 | 3442633 | 0 | 2393 | 0 | 0 | 3440013 | 0 | 12075315940163019 | 12075319808277341 | 12075319810557645 | 12075315942686708 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 324 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 12 | 24 | 344192 | 0x7fdfcdfa6900 | 0x7fdec2437940 | 18580256 | 18419864 | 65536 | 2322531 | 2322531 | 2322531 | 158 | 2352 | 0 | 0 | 2321590 | 0 | 12075315942730379 | 12075319810671404 | 12075319812205633 | 12075315944485490 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 326 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 20 | 24 | 345408 | 0x7fdfcdfa6800 | 0x7fdec2437980 | 72713552 | 72531960 | 65536 | 9089193 | 9089193 | 9089193 | 158 | 4667 | 0 | 0 | 9084380 | 158 | 12075315944516959 | 12075319812294752 | 12075319818351350 | 12075315950808660 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 328 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 20 | 24 | 348160 | 0x7fdfcdfa6700 | 0x7fdec24379c0 | 36587616 | 36419712 | 65536 | 4573451 | 4573451 | 4573451 | 0 | 4154 | 0 | 0 | 4569115 | 0 | 12075315950836822 | 12075319818441109 | 12075319821483968 | 12075315954129921 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 330 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 12 | 24 | 350912 | 0x7fdfcdfa6600 | 0x7fdec2437a00 | 18620624 | 18468332 | 65536 | 2327577 | 2327577 | 2327577 | 0 | 2488 | 0 | 0 | 2327718 | 0 | 12075315954160448 | 12075319821572928 | 12075319823111157 | 12075315955938041 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 332 | 869685 | 869690 | 4194304 | 256 | 0 | 0 | 12 | 24 | 0 | 0x7fdfcdfa6500 | 0x7fdec2437a40 | 54490728 | 54325942 | 65536 | 6811340 | 6811340 | 6811340 | 0 | 2452 | 0 | 0 | 6809737 | 0 | 12075315955966774 | 12075319823199796 | 12075319827726164 | 12075315960741406 |