46 KiB
46 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | GRBM_COUNT | GRBM_GUI_ACTIVE | SQ_WAVES | SQ_IFETCH | SQ_IFETCH_LEVEL | SQ_ACCUM_PREV_HIRES | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBuffer.kd | 0 | 0 | 0 | 924536 | 924541 | 33554432 | 256 | 0 | 0 | 8 | 32 | 6464 | 0x0 | 0x7f84a5404180 | 508009 | 508009 | 524288 | 6291456 | 792387 | 101370816 | 12076394277442031 | 12076394523449847 | 12076394523776405 | 12076394523886678 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 924536 | 924541 | 32768 | 256 | 0 | 0 | 24 | 24 | 12480 | 0x0 | 0x7f84a5435100 | 27741 | 27741 | 512 | 8192 | 8751 | 1126412 | 12076394538250372 | 12076394538559556 | 12076394538565956 | 12076394538575757 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 4 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 12928 | 0x7f85b1136900 | 0x7f84a5435140 | 218843 | 218843 | 65536 | 917504 | 140602 | 17958860 | 12076394538634306 | 12076394538864355 | 12076394538997634 | 12076394539014192 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 6 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 36 | 24 | 13632 | 0x7f85b1136800 | 0x7f84a5435180 | 391369 | 391369 | 65536 | 1245184 | 183513 | 23498744 | 12076394539075696 | 12076394539274272 | 12076394539523711 | 12076394539595061 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 8 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 14080 | 0x7f85b1136700 | 0x7f84a54351c0 | 459723 | 459723 | 65536 | 983040 | 136938 | 17571916 | 12076394539625418 | 12076394539941628 | 12076394540235866 | 12076394540307836 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 10 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14528 | 0x7f85b1136600 | 0x7f84a5435200 | 216625 | 216625 | 65536 | 1048576 | 170808 | 21847984 | 12076394540363680 | 12076394540602264 | 12076394540734583 | 12076394540738847 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 12 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14976 | 0x7f85b1136500 | 0x7f84a5435240 | 216499 | 216499 | 65536 | 983040 | 149257 | 19157660 | 12076394540790944 | 12076394541008822 | 12076394541141301 | 12076394541145693 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 14 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 15424 | 0x7f85b1136400 | 0x7f84a5435280 | 215993 | 215993 | 65536 | 1048576 | 146565 | 18735948 | 12076394541232685 | 12076394541411699 | 12076394541543379 | 12076394541547580 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 16 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16128 | 0x7f85b1136300 | 0x7f84a54352c0 | 399139 | 399139 | 65536 | 1572864 | 237124 | 30346960 | 12076394541601440 | 12076394541793777 | 12076394542048496 | 12076394542117970 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 18 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16832 | 0x7f85b1136a00 | 0x7f84a5435300 | 399897 | 399897 | 65536 | 1179648 | 158176 | 20239020 | 12076394542172681 | 12076394542416333 | 12076394542670732 | 12076394542712996 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 20 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 17536 | 0x7f85b1136900 | 0x7f84a5435340 | 217291 | 217291 | 65536 | 1114112 | 157088 | 20083496 | 12076394542748261 | 12076394542932810 | 12076394543066090 | 12076394543069909 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 22 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18240 | 0x7f85b1136800 | 0x7f84a5435380 | 217929 | 217929 | 65536 | 1114112 | 155738 | 19935120 | 12076394543124460 | 12076394543306248 | 12076394543439047 | 12076394543442962 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 24 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18944 | 0x7f85b1136700 | 0x7f84a54353c0 | 218755 | 218755 | 65536 | 1179648 | 173440 | 22182620 | 12076394543507562 | 12076394543678406 | 12076394543812645 | 12076394543816476 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 26 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 19904 | 0x7f85b1136600 | 0x7f84a5435400 | 465249 | 465249 | 65536 | 1835008 | 255787 | 32758228 | 12076394543868283 | 12076394544052164 | 12076394544350242 | 12076394544420930 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 28 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 20608 | 0x7f85b1136500 | 0x7f84a5435440 | 399475 | 399475 | 65536 | 1310720 | 179487 | 22864852 | 12076394544451286 | 12076394544641920 | 12076394544895519 | 12076394544963879 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 30 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 21312 | 0x7f85b1136400 | 0x7f84a5435480 | 216897 | 216897 | 65536 | 1245184 | 176267 | 22583788 | 12076394544987924 | 12076394545186237 | 12076394545319356 | 12076394545323467 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 32 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22016 | 0x7f85b1136300 | 0x7f84a54354c0 | 220843 | 220843 | 65536 | 1310720 | 183428 | 23425492 | 12076394545375704 | 12076394545553915 | 12076394545688794 | 12076394545692703 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 34 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22720 | 0x7f85b1136a00 | 0x7f84a5435500 | 211281 | 211281 | 65536 | 1310720 | 183109 | 23455256 | 12076394545758866 | 12076394545933113 | 12076394546063192 | 12076394546067239 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 36 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 23680 | 0x7f85b1136900 | 0x7f84a5435540 | 394163 | 394163 | 65536 | 2097152 | 289216 | 37034116 | 12076394546117743 | 12076394546307990 | 12076394546559669 | 12076394546600791 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 38 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 24384 | 0x7f85b1136800 | 0x7f84a5435580 | 401737 | 401737 | 65536 | 1441792 | 196656 | 25133280 | 12076394546635005 | 12076394546815027 | 12076394547072306 | 12076394547142267 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 40 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25088 | 0x7f85b1136700 | 0x7f84a54355c0 | 218019 | 218019 | 65536 | 1376256 | 190054 | 24327260 | 12076394547196508 | 12076394547422864 | 12076394547555823 | 12076394547559533 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 42 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25792 | 0x7f85b1136600 | 0x7f84a5435600 | 207489 | 207489 | 65536 | 1507328 | 209969 | 26848516 | 12076394547612161 | 12076394547800141 | 12076394547927661 | 12076394547931334 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 44 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 26496 | 0x7f85b1136500 | 0x7f84a5435640 | 211827 | 211827 | 65536 | 1441792 | 200077 | 25672280 | 12076394548007616 | 12076394548179179 | 12076394548308138 | 12076394548311921 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 46 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 27712 | 0x7f85b1136400 | 0x7f84a5435680 | 408737 | 408737 | 65536 | 2359296 | 325791 | 41654336 | 12076394548363327 | 12076394548541577 | 12076394548803975 | 12076394548871982 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 48 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 28416 | 0x7f85b1136300 | 0x7f84a54356c0 | 395955 | 395955 | 65536 | 1572864 | 210524 | 26946064 | 12076394548900274 | 12076394549099174 | 12076394549351652 | 12076394549420732 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 50 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 29120 | 0x7f85b1136a00 | 0x7f84a5435700 | 209553 | 209553 | 65536 | 1507328 | 207734 | 26577844 | 12076394549445598 | 12076394549638691 | 12076394549767650 | 12076394549771624 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 52 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30080 | 0x7f85b1136900 | 0x7f84a5435740 | 215451 | 215451 | 65536 | 1703936 | 235503 | 30137552 | 12076394549831054 | 12076394549997728 | 12076394550130048 | 12076394550133877 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 54 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30784 | 0x7f85b1136800 | 0x7f84a5435780 | 213393 | 213393 | 65536 | 1572864 | 214492 | 27519684 | 12076394550201222 | 12076394550369566 | 12076394550501085 | 12076394550504686 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 56 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32000 | 0x7f85b1136700 | 0x7f84a54357c0 | 402939 | 402939 | 65536 | 2621440 | 357768 | 45808736 | 12076394550555541 | 12076394550732284 | 12076394550990203 | 12076394551077982 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 58 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32960 | 0x7f85b1136600 | 0x7f84a5435800 | 399281 | 399281 | 65536 | 1703936 | 231234 | 29605180 | 12076394551103649 | 12076394551299161 | 12076394551555159 | 12076394551621933 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 60 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 33920 | 0x7f85b1136500 | 0x7f84a5435840 | 218707 | 218707 | 65536 | 1638400 | 227431 | 29107716 | 12076394551645787 | 12076394551836437 | 12076394551969877 | 12076394551973716 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 62 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 34880 | 0x7f85b1136400 | 0x7f84a5435880 | 217433 | 217433 | 65536 | 1900544 | 275013 | 35145920 | 12076394552031594 | 12076394552214995 | 12076394552349234 | 12076394552353482 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 64 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 35840 | 0x7f85b1136300 | 0x7f84a54358c0 | 210987 | 210987 | 65536 | 1703936 | 253442 | 32434964 | 12076394552414455 | 12076394552552753 | 12076394552681872 | 12076394552685870 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 66 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 37312 | 0x7f85b1136a00 | 0x7f84a5435900 | 400161 | 400161 | 65536 | 2883584 | 403348 | 51637124 | 12076394552733108 | 12076394552893231 | 12076394553149870 | 12076394553219031 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 68 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 38272 | 0x7f85b1136900 | 0x7f84a5435940 | 397379 | 397379 | 65536 | 1835008 | 249947 | 32028716 | 12076394553241092 | 12076394553407468 | 12076394553660907 | 12076394553729059 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 70 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 39232 | 0x7f85b1136800 | 0x7f84a5435980 | 215969 | 215969 | 65536 | 1769472 | 265991 | 34120144 | 12076394553749487 | 12076394553912585 | 12076394554046024 | 12076394554049895 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 72 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 40192 | 0x7f85b1136700 | 0x7f84a54359c0 | 227435 | 227435 | 65536 | 2097152 | 323541 | 41485036 | 12076394554100489 | 12076394554260103 | 12076394554400102 | 12076394554404203 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 74 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 41152 | 0x7f85b1136600 | 0x7f84a5435a00 | 218145 | 218145 | 65536 | 1835008 | 247707 | 31764128 | 12076394554463273 | 12076394554600741 | 12076394554734820 | 12076394554738885 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 76 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 42624 | 0x7f85b1136500 | 0x7f84a5435a40 | 403219 | 403219 | 65536 | 3145728 | 428137 | 54678012 | 12076394554788718 | 12076394554940899 | 12076394555199618 | 12076394555269141 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 78 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 43584 | 0x7f85b1136400 | 0x7f84a5435a80 | 403073 | 403073 | 65536 | 1966080 | 260494 | 33445580 | 12076394555290490 | 12076394555456096 | 12076394555714174 | 12076394555782054 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 80 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 44544 | 0x7f85b1136300 | 0x7f84a5435ac0 | 223571 | 223571 | 65536 | 1900544 | 262770 | 33609016 | 12076394555801801 | 12076394555971293 | 12076394556108252 | 12076394556112648 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 82 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 45760 | 0x7f85b1136a00 | 0x7f84a5435b00 | 220801 | 220801 | 65536 | 2293760 | 310194 | 39858824 | 12076394556160668 | 12076394556322011 | 12076394556459130 | 12076394556463060 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 84 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 46720 | 0x7f85b1136900 | 0x7f84a5435b40 | 213323 | 213323 | 65536 | 1966080 | 282600 | 36139652 | 12076394556522590 | 12076394556666329 | 12076394556797048 | 12076394556801018 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 86 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 48448 | 0x7f85b1136800 | 0x7f84a5435b80 | 417185 | 417185 | 65536 | 3407872 | 461138 | 59116300 | 12076394556848135 | 12076394557003447 | 12076394557272405 | 12076394557342835 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 88 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 49408 | 0x7f85b1136700 | 0x7f84a5435bc0 | 394547 | 394547 | 65536 | 2097152 | 282588 | 36060172 | 12076394557366469 | 12076394557529364 | 12076394557781362 | 12076394557848885 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 90 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 50368 | 0x7f85b1136600 | 0x7f84a5435c00 | 211753 | 211753 | 65536 | 2031616 | 281775 | 36162248 | 12076394557868421 | 12076394558035921 | 12076394558166160 | 12076394558170202 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 92 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 51584 | 0x7f85b1136500 | 0x7f84a5435c40 | 223619 | 223619 | 65536 | 2490368 | 347873 | 44311868 | 12076394558219764 | 12076394558376399 | 12076394558514318 | 12076394558518319 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 94 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 52544 | 0x7f85b1136400 | 0x7f84a5435c80 | 215881 | 215881 | 65536 | 2097152 | 288541 | 36946784 | 12076394558578501 | 12076394558715117 | 12076394558848556 | 12076394558852560 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 96 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 54272 | 0x7f85b1136300 | 0x7f84a5435cc0 | 418107 | 418107 | 65536 | 3670016 | 488120 | 62376360 | 12076394558899788 | 12076394559068555 | 12076394559336713 | 12076394559405648 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 98 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 55488 | 0x7f85b1136a00 | 0x7f84a5435d00 | 451945 | 451945 | 65536 | 2228224 | 295244 | 37809348 | 12076394559426447 | 12076394559592231 | 12076394559883750 | 12076394559926005 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 100 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 56704 | 0x7f85b1136900 | 0x7f84a5435d40 | 218507 | 218507 | 65536 | 2162688 | 310409 | 39682372 | 12076394559954428 | 12076394560114948 | 12076394560248548 | 12076394560252743 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 102 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 58176 | 0x7f85b1136800 | 0x7f84a5435d80 | 230497 | 230497 | 65536 | 2686976 | 378487 | 48458052 | 12076394560302285 | 12076394560457026 | 12076394560598305 | 12076394560602382 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 104 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 59392 | 0x7f85b1136700 | 0x7f84a5435dc0 | 219083 | 219083 | 65536 | 2228224 | 311436 | 39865016 | 12076394560665480 | 12076394560802624 | 12076394560936703 | 12076394560940711 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 106 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 61376 | 0x7f85b1136600 | 0x7f84a5435e00 | 408673 | 408673 | 65536 | 3932160 | 547575 | 70145348 | 12076394560988199 | 12076394561151742 | 12076394561415261 | 12076394561482037 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 108 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 62592 | 0x7f85b1136500 | 0x7f84a5435e40 | 397115 | 397115 | 65536 | 2359296 | 314882 | 40241684 | 12076394561502425 | 12076394561665019 | 12076394561918938 | 12076394561986244 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 110 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 63808 | 0x7f85b1136400 | 0x7f84a5435e80 | 212729 | 212729 | 65536 | 2293760 | 317122 | 40559472 | 12076394562009507 | 12076394562179736 | 12076394562311255 | 12076394562315196 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 112 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 65280 | 0x7f85b1136300 | 0x7f84a5435ec0 | 226875 | 226875 | 65536 | 2883584 | 427142 | 54621976 | 12076394562363425 | 12076394562522934 | 12076394562663093 | 12076394562667090 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 114 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 66496 | 0x7f85b1136a00 | 0x7f84a5435f00 | 216289 | 216289 | 65536 | 2359296 | 317754 | 40624624 | 12076394562727281 | 12076394562865172 | 12076394562998771 | 12076394563006039 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 116 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 68480 | 0x7f85b1136900 | 0x7f84a5435f40 | 401155 | 401155 | 65536 | 4194304 | 600484 | 76901700 | 12076394563052295 | 12076394563205970 | 12076394563463888 | 12076394563531466 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 118 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 69696 | 0x7f85b1136800 | 0x7f84a5435f80 | 405033 | 405033 | 65536 | 2490368 | 328969 | 42142620 | 12076394563553787 | 12076394563715727 | 12076394563976045 | 12076394564043528 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 120 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 70912 | 0x7f85b1136700 | 0x7f84a5435fc0 | 223715 | 223715 | 65536 | 2424832 | 343438 | 43883756 | 12076394564064827 | 12076394564228524 | 12076394564366123 | 12076394564370285 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 122 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 72384 | 0x7f85b1136600 | 0x7f84a5436000 | 224513 | 224513 | 65536 | 3080192 | 464010 | 59672732 | 12076394564418855 | 12076394564568682 | 12076394564708681 | 12076394564712641 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 124 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 73600 | 0x7f85b1136500 | 0x7f84a5436040 | 211371 | 211371 | 65536 | 2490368 | 347655 | 44529308 | 12076394564773264 | 12076394564908040 | 12076394565037479 | 12076394565041793 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 126 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 75840 | 0x7f85b1136400 | 0x7f84a5436080 | 412809 | 412809 | 65536 | 4456448 | 586308 | 74984116 | 12076394565091345 | 12076394565244518 | 12076394565511556 | 12076394565579422 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 128 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 77056 | 0x7f85b1136300 | 0x7f84a54360c0 | 395483 | 395483 | 65536 | 2621440 | 342768 | 43797944 | 12076394565599489 | 12076394565766115 | 12076394566019873 | 12076394566090331 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 130 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 78272 | 0x7f85b1136a00 | 0x7f84a5436100 | 213825 | 213825 | 65536 | 2555904 | 395261 | 50542672 | 12076394566112102 | 12076394566280992 | 12076394566413151 | 12076394566417309 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 132 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 80000 | 0x7f85b1136900 | 0x7f84a5436140 | 231251 | 231251 | 65536 | 3276800 | 492681 | 62719112 | 12076394566468995 | 12076394566617310 | 12076394566760989 | 12076394566765035 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 134 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 81216 | 0x7f85b1136800 | 0x7f84a5436180 | 214153 | 214153 | 65536 | 2621440 | 352858 | 45188552 | 12076394566825428 | 12076394566960988 | 12076394567093467 | 12076394567097724 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 136 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 83456 | 0x7f85b1136700 | 0x7f84a54361c0 | 416795 | 416795 | 65536 | 4718592 | 640511 | 82074152 | 12076394567146955 | 12076394567300346 | 12076394567569144 | 12076394567616949 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 138 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 84928 | 0x7f85b1136600 | 0x7f84a5436200 | 400361 | 400361 | 65536 | 2752512 | 358327 | 45890156 | 12076394567637407 | 12076394567798583 | 12076394568056181 | 12076394568105657 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 140 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 86400 | 0x7f85b1136500 | 0x7f84a5436240 | 216067 | 216067 | 65536 | 2686976 | 368040 | 47130456 | 12076394568127898 | 12076394568293460 | 12076394568425939 | 12076394568429990 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 142 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 88128 | 0x7f85b1136400 | 0x7f84a5436280 | 240185 | 240185 | 65536 | 3473408 | 513828 | 65539412 | 12076394568479071 | 12076394568629778 | 12076394568780177 | 12076394568784589 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 144 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 89600 | 0x7f85b1136300 | 0x7f84a54362c0 | 223243 | 223243 | 65536 | 2752512 | 444161 | 56814928 | 12076394568860860 | 12076394568998256 | 12076394569135375 | 12076394569139438 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 146 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 92096 | 0x7f85b1136a00 | 0x7f84a5436300 | 420569 | 420569 | 65536 | 4980736 | 653440 | 83712528 | 12076394569189722 | 12076394569348173 | 12076394569620332 | 12076394569670806 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 148 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 93568 | 0x7f85b1136900 | 0x7f84a5436340 | 393995 | 393995 | 65536 | 2883584 | 380916 | 48762788 | 12076394569692065 | 12076394569852490 | 12076394570105289 | 12076394570154695 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 150 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 95040 | 0x7f85b1136800 | 0x7f84a5436380 | 228097 | 228097 | 65536 | 2818048 | 422910 | 54136748 | 12076394570175784 | 12076394570336968 | 12076394570478407 | 12076394570482614 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 152 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 96768 | 0x7f85b1136700 | 0x7f84a54363c0 | 252819 | 252819 | 65536 | 3670016 | 560712 | 71933264 | 12076394570531245 | 12076394570682886 | 12076394570840805 | 12076394570844918 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 154 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 98240 | 0x7f85b1136600 | 0x7f84a5436400 | 225545 | 225545 | 65536 | 2883584 | 406697 | 51979784 | 12076394570902425 | 12076394571049443 | 12076394571189283 | 12076394571193796 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 156 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 100736 | 0x7f85b1136500 | 0x7f84a5436440 | 411867 | 411867 | 65536 | 5177344 | 679984 | 87116940 | 12076394571243428 | 12076394571395841 | 12076394571661760 | 12076394571710476 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 158 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 102208 | 0x7f85b1136400 | 0x7f84a5436480 | 397769 | 397769 | 65536 | 3014656 | 401702 | 51306320 | 12076394571733459 | 12076394571892958 | 12076394572149277 | 12076394572200697 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 160 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 103680 | 0x7f85b1136300 | 0x7f84a54364c0 | 221011 | 221011 | 65536 | 2949120 | 432754 | 55390136 | 12076394572221696 | 12076394572390555 | 12076394572526555 | 12076394572530721 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 162 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 105664 | 0x7f85b1136a00 | 0x7f84a5436500 | 261777 | 261777 | 65536 | 3866624 | 559335 | 71068084 | 12076394572579852 | 12076394572730873 | 12076394572895832 | 12076394572899877 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 164 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 107136 | 0x7f85b1136900 | 0x7f84a5436540 | 220923 | 220923 | 65536 | 3014656 | 412468 | 52836328 | 12076394572961311 | 12076394573105751 | 12076394573241430 | 12076394573245890 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 166 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 109888 | 0x7f85b1136800 | 0x7f84a5436580 | 416209 | 416209 | 65536 | 5439488 | 707032 | 90501800 | 12076394573297125 | 12076394573468309 | 12076394573738387 | 12076394573786384 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 168 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 111360 | 0x7f85b1136700 | 0x7f84a54365c0 | 389995 | 389995 | 65536 | 3145728 | 419525 | 53644572 | 12076394573807383 | 12076394573967826 | 12076394574218385 | 12076394574267498 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 170 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 112832 | 0x7f85b1136600 | 0x7f84a5436600 | 221665 | 221665 | 65536 | 3080192 | 444792 | 56883940 | 12076394574289920 | 12076394574453743 | 12076394574591342 | 12076394574595338 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 172 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 114816 | 0x7f85b1136500 | 0x7f84a5436640 | 276083 | 276083 | 65536 | 4063232 | 570477 | 72755108 | 12076394574644910 | 12076394574795021 | 12076394574969260 | 12076394574973220 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 174 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 116288 | 0x7f85b1136400 | 0x7f84a5436680 | 220417 | 220417 | 65536 | 3145728 | 466762 | 59785900 | 12076394575038561 | 12076394575187819 | 12076394575324138 | 12076394575328670 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 176 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 119040 | 0x7f85b1136300 | 0x7f84a54366c0 | 409899 | 409899 | 65536 | 5701632 | 804756 | 102918980 | 12076394575377391 | 12076394575530057 | 12076394575795175 | 12076394575842305 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 178 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 120768 | 0x7f85b1136a00 | 0x7f84a5436700 | 401065 | 401065 | 65536 | 3276800 | 433141 | 55525216 | 12076394575863725 | 12076394576028294 | 12076394576287812 | 12076394576338908 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 180 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 122496 | 0x7f85b1136900 | 0x7f84a5436740 | 222435 | 222435 | 65536 | 3211264 | 485627 | 62144024 | 12076394576360558 | 12076394576523811 | 12076394576661250 | 12076394576665325 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 182 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 124736 | 0x7f85b1136800 | 0x7f84a5436780 | 288257 | 288257 | 65536 | 4259840 | 600592 | 76564800 | 12076394576713544 | 12076394576863009 | 12076394577045888 | 12076394577050731 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 184 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 126464 | 0x7f85b1136700 | 0x7f84a54367c0 | 223123 | 223123 | 65536 | 3276800 | 523882 | 67001020 | 12076394577112886 | 12076394577252447 | 12076394577390366 | 12076394577394570 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 186 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 20 | 24 | 127936 | 0x7f85b1136600 | 0x7f84a5436800 | 406689 | 406689 | 65536 | 7733248 | 977032 | 125139544 | 12076394577442729 | 12076394577591965 | 12076394577856763 | 12076394577878870 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 188 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 129664 | 0x7f85b1136500 | 0x7f84a5436840 | 392435 | 392435 | 65536 | 3407872 | 447705 | 57345132 | 12076394577921159 | 12076394578070842 | 12076394578323480 | 12076394578348643 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 190 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 131392 | 0x7f85b1136400 | 0x7f84a5436880 | 225177 | 225177 | 65536 | 3342336 | 484622 | 62004348 | 12076394578386744 | 12076394578534519 | 12076394578674838 | 12076394578678847 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 192 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 133632 | 0x7f85b1136300 | 0x7f84a54368c0 | 301955 | 301955 | 65536 | 4456448 | 617684 | 79069940 | 12076394578728068 | 12076394578877077 | 12076394579068116 | 12076394579091473 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 194 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 135360 | 0x7f85b1136a00 | 0x7f84a5436900 | 221065 | 221065 | 65536 | 3538944 | 573601 | 73427056 | 12076394579144382 | 12076394579282834 | 12076394579420274 | 12076394579424372 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 196 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 20 | 24 | 136832 | 0x7f85b1136900 | 0x7f84a5436940 | 437035 | 437035 | 65536 | 8978432 | 1132090 | 144950544 | 12076394579472722 | 12076394579623632 | 12076394579907791 | 12076394579929020 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 198 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 138560 | 0x7f85b1136800 | 0x7f84a5436980 | 408537 | 408537 | 65536 | 3670016 | 502879 | 64491488 | 12076394579970477 | 12076394580119310 | 12076394580382188 | 12076394580430141 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 200 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 140288 | 0x7f85b1136700 | 0x7f84a54369c0 | 229731 | 229731 | 65536 | 3604480 | 505028 | 64568960 | 12076394580450659 | 12076394580616587 | 12076394580758826 | 12076394580762880 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 202 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 142784 | 0x7f85b1136600 | 0x7f84a5436a00 | 328681 | 328681 | 65536 | 4849664 | 739838 | 94720156 | 12076394580811109 | 12076394580962984 | 12076394581173223 | 12076394581223716 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 204 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 144768 | 0x7f85b1136500 | 0x7f84a5436a40 | 225867 | 225867 | 65536 | 3801088 | 606670 | 77619824 | 12076394581254063 | 12076394581408902 | 12076394581548261 | 12076394581552467 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 206 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 20 | 24 | 145728 | 0x7f85b1136400 | 0x7f84a5436a80 | 471825 | 471825 | 65536 | 14548992 | 1829735 | 233827908 | 12076394581599645 | 12076394581755940 | 12076394582064898 | 12076394582113760 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 208 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 147712 | 0x7f85b1136300 | 0x7f84a5436ac0 | 415603 | 415603 | 65536 | 3932160 | 567327 | 72632140 | 12076394582134519 | 12076394582298337 | 12076394582566655 | 12076394582614671 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 210 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 149696 | 0x7f85b1136a00 | 0x7f84a5436b00 | 223529 | 223529 | 65536 | 3866624 | 611458 | 78257460 | 12076394582635099 | 12076394582803934 | 12076394582943453 | 12076394582947680 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 212 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 152192 | 0x7f85b1136900 | 0x7f84a5436b40 | 357187 | 357187 | 65536 | 5242880 | 771734 | 98867584 | 12076394582995479 | 12076394583152251 | 12076394583380250 | 12076394583406793 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 214 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 154176 | 0x7f85b1136800 | 0x7f84a5436b80 | 215025 | 215025 | 65536 | 4063232 | 603163 | 77163572 | 12076394583460473 | 12076394583596889 | 12076394583730648 | 12076394583734903 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 216 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 20 | 24 | 155648 | 0x7f85b1136700 | 0x7f84a5436bc0 | 504107 | 504107 | 65536 | 10027008 | 1260428 | 161244304 | 12076394583782842 | 12076394583934967 | 12076394584264565 | 12076394584293992 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 218 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 157632 | 0x7f85b1136600 | 0x7f84a5436c00 | 410745 | 410745 | 65536 | 4194304 | 550946 | 70603604 | 12076394584334207 | 12076394584470804 | 12076394584737522 | 12076394584762453 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 220 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 159616 | 0x7f85b1136500 | 0x7f84a5436c40 | 219771 | 219771 | 65536 | 4128768 | 578611 | 74084184 | 12076394584805102 | 12076394584941041 | 12076394585076720 | 12076394585080995 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 222 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 162368 | 0x7f85b1136400 | 0x7f84a5436c80 | 381161 | 381161 | 65536 | 5636096 | 776734 | 99540724 | 12076394585129765 | 12076394585289679 | 12076394585534477 | 12076394585560656 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 224 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 164608 | 0x7f85b1136300 | 0x7f84a5436cc0 | 222019 | 222019 | 65536 | 4587520 | 689115 | 88176324 | 12076394585613735 | 12076394585751756 | 12076394585889515 | 12076394585893665 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 226 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 20 | 24 | 166336 | 0x7f85b1136a00 | 0x7f84a5436d00 | 572033 | 572033 | 65536 | 11075584 | 1391619 | 178243852 | 12076394585940983 | 12076394586098989 | 12076394586474988 | 12076394586503579 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 228 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 168576 | 0x7f85b1136900 | 0x7f84a5436d40 | 397195 | 397195 | 65536 | 4718592 | 638924 | 82119948 | 12076394586540678 | 12076394586687466 | 12076394586945385 | 12076394586967361 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 230 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 170816 | 0x7f85b1136800 | 0x7f84a5436d80 | 225185 | 225185 | 65536 | 4653056 | 672181 | 85960612 | 12076394587013426 | 12076394587152424 | 12076394587293384 | 12076394587295540 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 232 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 174080 | 0x7f85b1136700 | 0x7f84a5436dc0 | 433723 | 433723 | 65536 | 6422528 | 888286 | 113327244 | 12076394587344902 | 12076394587503303 | 12076394587782982 | 12076394587805799 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 234 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 176576 | 0x7f85b1136600 | 0x7f84a5436e00 | 225601 | 225601 | 65536 | 5111808 | 803435 | 102905232 | 12076394587863897 | 12076394588002821 | 12076394588144101 | 12076394588146732 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 236 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 20 | 24 | 177792 | 0x7f85b1136500 | 0x7f84a5436e40 | 642187 | 642187 | 65536 | 15007744 | 1882749 | 241078992 | 12076394588199160 | 12076394588355460 | 12076394588778659 | 12076394588802060 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 238 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 180288 | 0x7f85b1136400 | 0x7f84a5436e80 | 408009 | 408009 | 65536 | 5242880 | 749117 | 95452144 | 12076394588844900 | 12076394588988578 | 12076394589254337 | 12076394589281772 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 240 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 182784 | 0x7f85b1136300 | 0x7f84a5436ec0 | 225819 | 225819 | 65536 | 5177344 | 827943 | 105944924 | 12076394589322057 | 12076394589467776 | 12076394589608256 | 12076394589610332 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 242 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 186304 | 0x7f85b1136a00 | 0x7f84a5436f00 | 485833 | 485833 | 65536 | 7208960 | 1040779 | 132640312 | 12076394589658913 | 12076394589812735 | 12076394590127614 | 12076394590151278 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 244 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 189312 | 0x7f85b1136900 | 0x7f84a5436f40 | 244539 | 244539 | 65536 | 6160384 | 938204 | 120027268 | 12076394590205609 | 12076394590347453 | 12076394590500733 | 12076394590502771 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 246 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 20 | 24 | 190784 | 0x7f85b1136800 | 0x7f84a5436f80 | 783585 | 783585 | 65536 | 17104896 | 2146970 | 274733416 | 12076394590549598 | 12076394590703452 | 12076394591221370 | 12076394591245672 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 248 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 193792 | 0x7f85b1136700 | 0x7f84a5436fc0 | 454227 | 454227 | 65536 | 6291456 | 830578 | 106700028 | 12076394591288261 | 12076394591428729 | 12076394591725208 | 12076394591748025 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 250 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 196800 | 0x7f85b1136600 | 0x7f84a5437000 | 244425 | 244425 | 65536 | 6225920 | 894639 | 114569284 | 12076394591794992 | 12076394591935928 | 12076394592090647 | 12076394592093036 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 252 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 201088 | 0x7f85b1136500 | 0x7f84a5437040 | 593707 | 593707 | 65536 | 8781824 | 1269549 | 163541400 | 12076394592141927 | 12076394592311446 | 12076394592697205 | 12076394592720613 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 254 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 204608 | 0x7f85b1136400 | 0x7f84a5437080 | 278033 | 278033 | 65536 | 7208960 | 1098756 | 140684696 | 12076394592778741 | 12076394592918964 | 12076394593095764 | 12076394593098285 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 256 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 20 | 24 | 206080 | 0x7f85b1136300 | 0x7f84a54370c0 | 924387 | 924387 | 65536 | 19202048 | 2404863 | 308160928 | 12076394593145202 | 12076394593312243 | 12076394593921681 | 12076394593947122 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 258 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 209600 | 0x7f85b1136a00 | 0x7f84a5437100 | 502977 | 502977 | 65536 | 7340032 | 1020302 | 131125348 | 12076394593986706 | 12076394594128880 | 12076394594458959 | 12076394594482928 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 260 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 213120 | 0x7f85b1136900 | 0x7f84a5437140 | 283051 | 283051 | 65536 | 7274496 | 1082589 | 138723932 | 12076394594526278 | 12076394594664718 | 12076394594843918 | 12076394594846223 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 262 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 218176 | 0x7f85b1136800 | 0x7f84a5437180 | 698249 | 698249 | 65536 | 10354688 | 1488836 | 191763836 | 12076394594893751 | 12076394595047437 | 12076394595504075 | 12076394595530505 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 264 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 222208 | 0x7f85b1136700 | 0x7f84a54371c0 | 314723 | 314723 | 65536 | 8257536 | 1275168 | 163161972 | 12076394595583052 | 12076394595724074 | 12076394595924874 | 12076394595947941 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 266 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 20 | 24 | 223936 | 0x7f85b1136600 | 0x7f84a5437200 | 1064873 | 1064873 | 65536 | 21299200 | 2670697 | 341731916 | 12076394595990790 | 12076394596132713 | 12076394596836711 | 12076394596863813 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 268 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 227968 | 0x7f85b1136500 | 0x7f84a5437240 | 567307 | 567307 | 65536 | 8388608 | 1159924 | 147786660 | 12076394596904498 | 12076394597056870 | 12076394597429669 | 12076394597482322 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 270 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 232000 | 0x7f85b1136400 | 0x7f84a5437280 | 318361 | 318361 | 65536 | 8323072 | 1313689 | 168136196 | 12076394597503271 | 12076394597668228 | 12076394597872547 | 12076394597896532 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 272 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 237824 | 0x7f85b1136300 | 0x7f84a54372c0 | 802811 | 802811 | 65536 | 11927552 | 1672761 | 213608128 | 12076394597938600 | 12076394598082786 | 12076394598609024 | 12076394598654671 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 274 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 242368 | 0x7f85b1136a00 | 0x7f84a5437300 | 348561 | 348561 | 65536 | 9306112 | 1410362 | 180570488 | 12076394598686670 | 12076394598841024 | 12076394599065503 | 12076394599089749 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 276 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 20 | 24 | 243584 | 0x7f85b1136900 | 0x7f84a5437340 | 1205987 | 1205987 | 65536 | 29163520 | 3650762 | 467561372 | 12076394599133781 | 12076394599271422 | 12076394600069339 | 12076394600114674 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 278 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 248128 | 0x7f85b1136800 | 0x7f84a5437380 | 635561 | 635561 | 65536 | 9437184 | 1269038 | 162651796 | 12076394600136254 | 12076394600300539 | 12076394600720377 | 12076394600766586 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 280 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 252672 | 0x7f85b1136700 | 0x7f84a54373c0 | 351915 | 351915 | 65536 | 9371648 | 1337710 | 171186720 | 12076394600787054 | 12076394600957816 | 12076394601183255 | 12076394601233574 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 282 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 259264 | 0x7f85b1136600 | 0x7f84a5437400 | 926905 | 926905 | 65536 | 13500416 | 1937603 | 249252548 | 12076394601254903 | 12076394601419095 | 12076394602029173 | 12076394602076500 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 284 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 264832 | 0x7f85b1136500 | 0x7f84a5437440 | 419475 | 419475 | 65536 | 11403264 | 1738043 | 222567892 | 12076394602106736 | 12076394602270132 | 12076394602540531 | 12076394602586688 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 286 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 20 | 24 | 266304 | 0x7f85b1136400 | 0x7f84a5437480 | 1486705 | 1486705 | 65536 | 33357824 | 4177648 | 534484224 | 12076394602608429 | 12076394602778130 | 12076394603763887 | 12076394603811004 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 288 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 28 | 24 | 271872 | 0x7f85b1136300 | 0x7f84a54374c0 | 779363 | 779363 | 65536 | 11534336 | 1569613 | 200475772 | 12076394603832524 | 12076394604002926 | 12076394604518444 | 12076394604585232 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 290 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 277440 | 0x7f85b1136a00 | 0x7f84a5437500 | 422193 | 422193 | 65536 | 11534336 | 1747045 | 223657000 | 12076394604607233 | 12076394604777483 | 12076394605050602 | 12076394605116360 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 292 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 12 | 24 | 279936 | 0x7f85b1136900 | 0x7f84a5437540 | 1126971 | 1126971 | 65536 | 19267584 | 2418459 | 309730760 | 12076394605137769 | 12076394605307881 | 12076394606046599 | 12076394606113092 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 294 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 286528 | 0x7f85b1136800 | 0x7f84a5437580 | 489377 | 489377 | 65536 | 13500416 | 2112691 | 270386452 | 12076394606150992 | 12076394606296838 | 12076394606613957 | 12076394606679745 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 296 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 20 | 24 | 288000 | 0x7f85b1136700 | 0x7f84a54375c0 | 1768499 | 1768499 | 65536 | 37552128 | 4693847 | 601664884 | 12076394606699472 | 12076394606862756 | 12076394608033152 | 12076394608101517 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 298 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 32 | 24 | 294592 | 0x7f85b1136600 | 0x7f84a5437600 | 920009 | 920009 | 65536 | 13631488 | 1915570 | 244674520 | 12076394608124099 | 12076394608294911 | 12076394608903549 | 12076394608968929 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 300 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 301184 | 0x7f85b1136500 | 0x7f84a5437640 | 492099 | 492099 | 65536 | 13631488 | 2051524 | 262566612 | 12076394608988836 | 12076394609170748 | 12076394609488827 | 12076394609554878 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 302 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 12 | 24 | 304960 | 0x7f85b1136400 | 0x7f84a5437680 | 1335553 | 1335553 | 65536 | 21692416 | 2743755 | 351376888 | 12076394609574074 | 12076394609739226 | 12076394610617623 | 12076394610683947 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 304 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 24 | 24 | 313600 | 0x7f85b1136300 | 0x7f84a54376c0 | 663411 | 663411 | 65536 | 17694720 | 2737236 | 350382576 | 12076394610715946 | 12076394610876022 | 12076394611308020 | 12076394611377205 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 306 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 20 | 24 | 314816 | 0x7f85b1136a00 | 0x7f84a5437700 | 2332961 | 2332961 | 65536 | 57475072 | 7197130 | 920842756 | 12076394611397272 | 12076394611568339 | 12076394613114414 | 12076394613182540 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 308 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 20 | 24 | 317568 | 0x7f85b1136900 | 0x7f84a5437740 | 1198891 | 1198891 | 65536 | 19660800 | 2472184 | 316474392 | 12076394613206926 | 12076394613376013 | 12076394614170250 | 12076394614236830 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 310 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 12 | 24 | 320320 | 0x7f85b1136800 | 0x7f84a5437780 | 638769 | 638769 | 65536 | 20381696 | 2570790 | 329187612 | 12076394614261746 | 12076394614427689 | 12076394614841768 | 12076394614882449 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 312 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 12 | 24 | 324096 | 0x7f85b1136700 | 0x7f84a54377c0 | 1757651 | 1757651 | 65536 | 28704768 | 3614922 | 462704820 | 12076394614910962 | 12076394615071527 | 12076394616230563 | 12076394616297499 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 314 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 12 | 24 | 328896 | 0x7f85b1136600 | 0x7f84a5437800 | 1197233 | 1197233 | 65536 | 36634624 | 4633520 | 593108056 | 12076394616330150 | 12076394616489442 | 12076394617274880 | 12076394617341920 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 316 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 20 | 24 | 330112 | 0x7f85b1136500 | 0x7f84a5437840 | 4583411 | 4583411 | 65536 | 105840640 | 13243961 | 1694938312 | 12076394617366977 | 12076394617537119 | 12076394620587348 | 12076394620653825 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 318 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 20 | 24 | 332864 | 0x7f85b1136400 | 0x7f84a5437880 | 2324329 | 2324329 | 65536 | 38535168 | 4831186 | 618580568 | 12076394620679343 | 12076394620847827 | 12076394622388462 | 12076394622456856 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 320 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 12 | 24 | 335616 | 0x7f85b1136300 | 0x7f84a54378c0 | 1201755 | 1201755 | 65536 | 39124992 | 4914502 | 629220172 | 12076394622481181 | 12076394622647021 | 12076394623435818 | 12076394623502519 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 322 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 12 | 24 | 339392 | 0x7f85b1136a00 | 0x7f84a5437900 | 3443177 | 3443177 | 65536 | 55771136 | 7003824 | 896714668 | 12076394623528748 | 12076394623692937 | 12076394625974849 | 12076394626044914 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 324 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 12 | 24 | 344192 | 0x7f85b1136900 | 0x7f84a5437940 | 2324123 | 2324123 | 65536 | 72548352 | 9118942 | 1167406468 | 12076394626079899 | 12076394626230528 | 12076394627767163 | 12076394627833468 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 326 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 20 | 24 | 345408 | 0x7f85b1136800 | 0x7f84a5437980 | 9086041 | 9086041 | 65536 | 210698240 | 26343929 | 3372642212 | 12076394627853566 | 12076394628027482 | 12076394634082821 | 12076394634155207 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 328 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 20 | 24 | 348160 | 0x7f85b1136700 | 0x7f84a54379c0 | 4572611 | 4572611 | 65536 | 76283904 | 9544849 | 1221644700 | 12076394634177388 | 12076394634346980 | 12076394637389850 | 12076394637459708 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 330 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 12 | 24 | 350912 | 0x7f85b1136600 | 0x7f84a5437a00 | 2327761 | 2327761 | 65536 | 77398016 | 9700171 | 1241683948 | 12076394637481369 | 12076394637654169 | 12076394639194163 | 12076394639260846 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 332 | 924536 | 924541 | 4194304 | 256 | 0 | 0 | 12 | 24 | 0 | 0x7f85b1136500 | 0x7f84a5437a40 | 6813243 | 6813243 | 65536 | 110821376 | 13883238 | 1777247684 | 12076394639286283 | 12076394639451443 | 12076394643979107 | 12076394644052551 |