48 KiB
48 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | GRBM_COUNT | GRBM_GUI_ACTIVE | CPC_ME1_BUSY_FOR_PACKET_DECODE | SQ_CYCLES | SQ_WAVES | SQ_WAVE_CYCLES | SQ_BUSY_CYCLES | SQ_LEVEL_WAVES | SQ_ACCUM_PREV_HIRES | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBufferAligned.kd | 0 | 0 | 0 | 223519 | 223519 | 33554432 | 256 | 0 | 0 | 4 | 32 | 4160 | 0x0 | 0x7f1ce5804280 | 387094 | 387094 | 8680 | 3096760 | 524288 | 245771477 | 3011216 | 0 | 999328548 | 17599035568236 | 17598328816642 | 17599185813205 | 17599185923274 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 223519 | 223519 | 32768 | 256 | 0 | 0 | 12 | 24 | 13888 | 0x0 | 0x7f1ce5823f80 | 33003 | 33003 | 29549 | 264032 | 512 | 1739044 | 167125 | 0 | 6969744 | 17599191082624 | 17599185813205 | 17599191227446 | 17599191232450 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 5 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 14336 | 0x7f1ce8890380 | 0x7f1ce5823fc0 | 164050 | 164050 | 13444 | 1312408 | 65536 | 80869811 | 1211279 | 0 | 325210296 | 17599191275709 | 17599191227446 | 17599191628886 | 17599191631540 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 8 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15040 | 0x7f1ce8890200 | 0x7f1ce5824000 | 311255 | 311255 | 23766 | 2490048 | 65536 | 228598662 | 2391429 | 0 | 916123524 | 17599191677268 | 17599191628886 | 17599192061846 | 17599192064219 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 11 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15488 | 0x7f1ce8890080 | 0x7f1ce5824040 | 308986 | 308986 | 23354 | 2471896 | 65536 | 214366751 | 2367416 | 0 | 859195568 | 17599192106057 | 17599192061846 | 17599192489046 | 17599192491478 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 14 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 15936 | 0x7f1ce60edf00 | 0x7f1ce5824080 | 164229 | 164229 | 13653 | 1313840 | 65536 | 79739799 | 1213367 | 0 | 320688912 | 17599192533847 | 17599192489046 | 17599192819766 | 17599192822219 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 17 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 16384 | 0x7f1ce60edd80 | 0x7f1ce58240c0 | 164270 | 164270 | 13901 | 1314168 | 65536 | 85085195 | 1213013 | 0 | 342069648 | 17599192862578 | 17599192819766 | 17599193152407 | 17599193154691 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 20 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 17088 | 0x7f1ce60edc00 | 0x7f1ce5824100 | 163629 | 163629 | 14731 | 1309040 | 65536 | 101700322 | 1209658 | 0 | 408527388 | 17599193208009 | 17599193152407 | 17599193479927 | 17599193482142 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 23 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 16 | 24 | 17792 | 0x7f1ce60eda80 | 0x7f1ce5824140 | 309902 | 309902 | 24313 | 2479224 | 65536 | 231331788 | 2381450 | 0 | 927056496 | 17599193522181 | 17599193479927 | 17599193913527 | 17599193915851 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 26 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 16 | 24 | 18496 | 0x7f1ce60ed900 | 0x7f1ce5824180 | 308147 | 308147 | 20797 | 2465184 | 65536 | 223234065 | 2366125 | 0 | 894661804 | 17599193956680 | 17599193913527 | 17599194343127 | 17599194345870 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 29 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19200 | 0x7f1ce60ed780 | 0x7f1ce58241c0 | 164420 | 164420 | 14978 | 1315368 | 65536 | 107801666 | 1219890 | 0 | 432931588 | 17599194385889 | 17599194343127 | 17599194681207 | 17599194683762 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 32 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19904 | 0x7f1ce60ed600 | 0x7f1ce5824200 | 163850 | 163850 | 14054 | 1310808 | 65536 | 95767506 | 1210869 | 0 | 384799188 | 17599194725201 | 17599194681207 | 17599195007127 | 17599195008974 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 35 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 20608 | 0x7f1ce8890480 | 0x7f1ce5824240 | 163455 | 163455 | 15069 | 1307648 | 65536 | 101466803 | 1209268 | 0 | 407590804 | 17599195059352 | 17599195007127 | 17599195307927 | 17599195309736 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 38 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 16 | 24 | 21312 | 0x7f1ce8890300 | 0x7f1ce5824280 | 311296 | 311296 | 24891 | 2490376 | 65536 | 223027785 | 2383286 | 0 | 893843148 | 17599195357015 | 17599195307927 | 17599195716887 | 17599195718836 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 41 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22016 | 0x7f1ce8890180 | 0x7f1ce58242c0 | 307413 | 307413 | 21313 | 2459312 | 65536 | 220377875 | 2358693 | 0 | 883239532 | 17599195762984 | 17599195716887 | 17599196109847 | 17599196111616 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 44 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22720 | 0x7f1ce8890000 | 0x7f1ce5824300 | 163860 | 163860 | 14357 | 1310888 | 65536 | 86177672 | 1212931 | 0 | 346446296 | 17599196153195 | 17599196109847 | 17599196415287 | 17599196417118 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 47 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 23424 | 0x7f1ce60ede80 | 0x7f1ce5824340 | 164511 | 164511 | 14669 | 1316096 | 65536 | 101894090 | 1212423 | 0 | 409304048 | 17599196458717 | 17599196415287 | 17599196715448 | 17599196717250 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 50 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 24128 | 0x7f1ce60edd00 | 0x7f1ce5824380 | 162566 | 162566 | 15450 | 1300536 | 65536 | 99198555 | 1201088 | 0 | 398516296 | 17599196765099 | 17599196715448 | 17599197022328 | 17599197024232 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 53 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 16 | 24 | 24832 | 0x7f1ce60edb80 | 0x7f1ce58243c0 | 308486 | 308486 | 24663 | 2467896 | 65536 | 229072215 | 2363677 | 0 | 918017676 | 17599197068781 | 17599197022328 | 17599197411768 | 17599197413713 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 56 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 16 | 24 | 25536 | 0x7f1ce60eda00 | 0x7f1ce5824400 | 306838 | 306838 | 20483 | 2454712 | 65536 | 210761440 | 2347060 | 0 | 844777756 | 17599197455062 | 17599197411768 | 17599197800408 | 17599197802393 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 59 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26240 | 0x7f1ce60ed880 | 0x7f1ce5824440 | 162963 | 162963 | 14416 | 1303712 | 65536 | 102793107 | 1207484 | 0 | 412900652 | 17599197843782 | 17599197800408 | 17599198102808 | 17599198104865 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 62 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26944 | 0x7f1ce60ed700 | 0x7f1ce5824480 | 162934 | 162934 | 14706 | 1303480 | 65536 | 96090233 | 1205611 | 0 | 386088624 | 17599198146734 | 17599198102808 | 17599198410648 | 17599198412477 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 65 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 27648 | 0x7f1ce8890580 | 0x7f1ce58244c0 | 162859 | 162859 | 15238 | 1302880 | 65536 | 107652555 | 1202144 | 0 | 432340532 | 17599198460616 | 17599198410648 | 17599198715448 | 17599198717769 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 68 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 16 | 24 | 28608 | 0x7f1ce8890400 | 0x7f1ce5824500 | 308990 | 308990 | 23423 | 2471928 | 65536 | 228161252 | 2373553 | 0 | 914374868 | 17599198760228 | 17599198715448 | 17599199109368 | 17599199111219 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 71 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 16 | 24 | 29312 | 0x7f1ce8890280 | 0x7f1ce5824540 | 306147 | 306147 | 20151 | 2449184 | 65536 | 209050543 | 2343388 | 0 | 837930064 | 17599199154828 | 17599199109368 | 17599199502008 | 17599199504109 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 74 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 30016 | 0x7f1ce8890100 | 0x7f1ce5824580 | 162202 | 162202 | 14765 | 1297624 | 65536 | 103659410 | 1202611 | 0 | 416368760 | 17599199546568 | 17599199502008 | 17599199803928 | 17599199805552 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 77 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 30976 | 0x7f1ce60edf80 | 0x7f1ce58245c0 | 163079 | 163079 | 14477 | 1304640 | 65536 | 92718497 | 1200483 | 0 | 372605996 | 17599199847341 | 17599199803928 | 17599200104729 | 17599200106534 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 80 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 31680 | 0x7f1ce60ede00 | 0x7f1ce5824600 | 162400 | 162400 | 15348 | 1299208 | 65536 | 103841820 | 1202899 | 0 | 417091548 | 17599200155193 | 17599200104729 | 17599200404889 | 17599200406786 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 83 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 16 | 24 | 32640 | 0x7f1ce60edc80 | 0x7f1ce5824640 | 306398 | 306398 | 23515 | 2451192 | 65536 | 223314085 | 2344298 | 0 | 894986284 | 17599200448415 | 17599200404889 | 17599200808089 | 17599200809996 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 86 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 33600 | 0x7f1ce60edb00 | 0x7f1ce5824680 | 307952 | 307952 | 21104 | 2463624 | 65536 | 224650082 | 2360765 | 0 | 900327932 | 17599200852475 | 17599200808089 | 17599201203769 | 17599201205546 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 89 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 34560 | 0x7f1ce60ed980 | 0x7f1ce58246c0 | 164008 | 164008 | 14762 | 1312072 | 65536 | 103895188 | 1212089 | 0 | 417308724 | 17599201248205 | 17599201203769 | 17599201510649 | 17599201512478 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 92 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 35520 | 0x7f1ce60ed800 | 0x7f1ce5824700 | 163010 | 163010 | 14494 | 1304088 | 65536 | 91860289 | 1202289 | 0 | 369166104 | 17599201553217 | 17599201510649 | 17599201813689 | 17599201815460 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 95 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 36224 | 0x7f1ce60ed680 | 0x7f1ce5824740 | 162473 | 162473 | 14752 | 1299792 | 65536 | 103696532 | 1200924 | 0 | 416512052 | 17599201863219 | 17599201813689 | 17599202114969 | 17599202117173 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 98 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 37184 | 0x7f1ce8890500 | 0x7f1ce5824780 | 305506 | 305506 | 23301 | 2444056 | 65536 | 228169266 | 2345763 | 0 | 914408976 | 17599202157842 | 17599202114969 | 17599202506969 | 17599202508773 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 101 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 38144 | 0x7f1ce8890380 | 0x7f1ce58247c0 | 305717 | 305717 | 23005 | 2445744 | 65536 | 218981174 | 2346926 | 0 | 877653892 | 17599202549452 | 17599202506969 | 17599202897049 | 17599202898983 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 104 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 39104 | 0x7f1ce8890200 | 0x7f1ce5824800 | 162681 | 162681 | 14041 | 1301456 | 65536 | 91860945 | 1201279 | 0 | 369175496 | 17599202940142 | 17599202897049 | 17599203203449 | 17599203205385 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 107 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 40320 | 0x7f1ce8890080 | 0x7f1ce5824840 | 163191 | 163191 | 15242 | 1305536 | 65536 | 90365610 | 1203195 | 0 | 363194352 | 17599203245884 | 17599203203449 | 17599203506809 | 17599203508637 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 110 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 41280 | 0x7f1ce60edf00 | 0x7f1ce5824880 | 161859 | 161859 | 13858 | 1294880 | 65536 | 95579670 | 1195909 | 0 | 384043028 | 17599203556696 | 17599203506809 | 17599203817210 | 17599203819140 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 113 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 42240 | 0x7f1ce60edd80 | 0x7f1ce58248c0 | 305636 | 305636 | 23779 | 2445096 | 65536 | 226682733 | 2348095 | 0 | 908461292 | 17599203859599 | 17599203817210 | 17599204207610 | 17599204209530 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 116 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 43200 | 0x7f1ce60edc00 | 0x7f1ce5824900 | 305030 | 305030 | 22101 | 2440248 | 65536 | 221200829 | 2340228 | 0 | 886532660 | 17599204255839 | 17599204207610 | 17599204600730 | 17599204602530 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 119 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 44160 | 0x7f1ce60eda80 | 0x7f1ce5824940 | 162587 | 162587 | 14434 | 1300704 | 65536 | 99405729 | 1197980 | 0 | 399347716 | 17599204643099 | 17599204600730 | 17599204904250 | 17599204906362 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 122 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 45376 | 0x7f1ce60ed900 | 0x7f1ce5824980 | 163091 | 163091 | 15330 | 1304736 | 65536 | 97276801 | 1204513 | 0 | 390833256 | 17599204946401 | 17599204904250 | 17599205207130 | 17599205208974 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 125 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 46336 | 0x7f1ce60ed780 | 0x7f1ce58249c0 | 161124 | 161124 | 14416 | 1289000 | 65536 | 100075238 | 1192158 | 0 | 402025064 | 17599205257133 | 17599205207130 | 17599205522330 | 17599205524216 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 128 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 47552 | 0x7f1ce60ed600 | 0x7f1ce5824a00 | 303879 | 303879 | 23298 | 2431040 | 65536 | 219789456 | 2327008 | 0 | 880886488 | 17599205564615 | 17599205522330 | 17599205912090 | 17599205914126 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 131 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 48512 | 0x7f1ce8890480 | 0x7f1ce5824a40 | 302284 | 302284 | 20793 | 2418280 | 65536 | 213011727 | 2317029 | 0 | 853773992 | 17599205956235 | 17599205912090 | 17599206310490 | 17599206312506 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 134 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 49472 | 0x7f1ce8890300 | 0x7f1ce5824a80 | 161795 | 161795 | 14420 | 1294368 | 65536 | 91035453 | 1195696 | 0 | 365874580 | 17599206353075 | 17599206310490 | 17599206621530 | 17599206623439 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 137 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 50688 | 0x7f1ce8890180 | 0x7f1ce5824ac0 | 162106 | 162106 | 16119 | 1296856 | 65536 | 98051476 | 1192092 | 0 | 393944684 | 17599206664607 | 17599206621530 | 17599206925850 | 17599206927641 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 140 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 51648 | 0x7f1ce8890000 | 0x7f1ce5824b00 | 162005 | 162005 | 15343 | 1296048 | 65536 | 93865820 | 1188512 | 0 | 377192988 | 17599206975550 | 17599206925850 | 17599207236091 | 17599207238053 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 143 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 52864 | 0x7f1ce60ede80 | 0x7f1ce5824b40 | 301396 | 301396 | 22283 | 2411176 | 65536 | 216674007 | 2310551 | 0 | 868426856 | 17599207279362 | 17599207236091 | 17599207626971 | 17599207628953 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 146 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 54080 | 0x7f1ce60edd00 | 0x7f1ce5824b80 | 299539 | 299539 | 19642 | 2396320 | 65536 | 206194038 | 2294934 | 0 | 826505180 | 17599207675052 | 17599207626971 | 17599208024891 | 17599208026693 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 149 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 55296 | 0x7f1ce60edb80 | 0x7f1ce5824bc0 | 162125 | 162125 | 14190 | 1297008 | 65536 | 92278615 | 1195744 | 0 | 370838364 | 17599208066542 | 17599208024891 | 17599208327611 | 17599208329595 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 152 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 56768 | 0x7f1ce60eda00 | 0x7f1ce5824c00 | 162214 | 162214 | 15205 | 1297720 | 65536 | 99118282 | 1195610 | 0 | 398203984 | 17599208369894 | 17599208327611 | 17599208635931 | 17599208637747 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 155 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 57728 | 0x7f1ce60ed880 | 0x7f1ce5824c40 | 161613 | 161613 | 14120 | 1292912 | 65536 | 94041848 | 1195216 | 0 | 377893920 | 17599208685976 | 17599208635931 | 17599208945211 | 17599208947010 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 158 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 58944 | 0x7f1ce60ed700 | 0x7f1ce5824c80 | 303465 | 303465 | 23759 | 2427728 | 65536 | 222226826 | 2324855 | 0 | 890641624 | 17599208987999 | 17599208945211 | 17599209336891 | 17599209338770 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 161 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 60160 | 0x7f1ce8890580 | 0x7f1ce5824cc0 | 300746 | 300746 | 20912 | 2405976 | 65536 | 213737641 | 2304791 | 0 | 856677840 | 17599209378479 | 17599209336891 | 17599209726171 | 17599209727870 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 164 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 61376 | 0x7f1ce8890400 | 0x7f1ce5824d00 | 163757 | 163757 | 14773 | 1310064 | 65536 | 92688980 | 1207571 | 0 | 372498780 | 17599209768679 | 17599209726171 | 17599210032251 | 17599210034002 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 167 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 62848 | 0x7f1ce8890280 | 0x7f1ce5824d40 | 168983 | 168983 | 15942 | 1351872 | 65536 | 107111514 | 1248405 | 0 | 430178284 | 17599210073981 | 17599210032251 | 17599210352411 | 17599210354654 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 170 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 63808 | 0x7f1ce8890100 | 0x7f1ce5824d80 | 161745 | 161745 | 14451 | 1293968 | 65536 | 91622561 | 1194499 | 0 | 368211260 | 17599210402933 | 17599210352411 | 17599210656892 | 17599210658776 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 173 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 65024 | 0x7f1ce60edf80 | 0x7f1ce5824dc0 | 302236 | 302236 | 22091 | 2417896 | 65536 | 223073394 | 2317704 | 0 | 894026480 | 17599210699905 | 17599210656892 | 17599211043932 | 17599211045686 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 176 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 66240 | 0x7f1ce60ede00 | 0x7f1ce5824e00 | 298968 | 298968 | 20392 | 2391752 | 65536 | 214210680 | 2293417 | 0 | 858573496 | 17599211085865 | 17599211043932 | 17599211432732 | 17599211434506 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 179 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 67456 | 0x7f1ce60edc80 | 0x7f1ce5824e40 | 162020 | 162020 | 14214 | 1296168 | 65536 | 84657719 | 1193390 | 0 | 340358196 | 17599211474565 | 17599211432732 | 17599211734652 | 17599211736449 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 182 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 68928 | 0x7f1ce60edb00 | 0x7f1ce5824e80 | 176643 | 176643 | 16123 | 1413152 | 65536 | 101640233 | 1310164 | 0 | 408295768 | 17599211776988 | 17599211734652 | 17599212049372 | 17599212051331 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 185 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 70144 | 0x7f1ce60ed980 | 0x7f1ce5824ec0 | 160200 | 160200 | 14521 | 1281608 | 65536 | 91774639 | 1182297 | 0 | 368831804 | 17599212099949 | 17599212049372 | 17599212357212 | 17599212359093 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 188 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 71616 | 0x7f1ce60ed800 | 0x7f1ce5824f00 | 300790 | 300790 | 22942 | 2406328 | 65536 | 221184750 | 2307117 | 0 | 886468016 | 17599212400082 | 17599212357212 | 17599212751612 | 17599212753523 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 191 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 72832 | 0x7f1ce60ed680 | 0x7f1ce5824f40 | 303214 | 303214 | 22979 | 2425720 | 65536 | 222476360 | 2321620 | 0 | 891634332 | 17599212794032 | 17599212751612 | 17599213140412 | 17599213142303 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 194 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 74048 | 0x7f1ce8890500 | 0x7f1ce5824f80 | 161576 | 161576 | 15002 | 1292616 | 65536 | 93329451 | 1194379 | 0 | 375045880 | 17599213183432 | 17599213140412 | 17599213444252 | 17599213446165 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 197 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 75776 | 0x7f1ce8890380 | 0x7f1ce5824fc0 | 181529 | 181529 | 16342 | 1452240 | 65536 | 114055396 | 1352505 | 0 | 457967400 | 17599213487164 | 17599213444252 | 17599213760092 | 17599213762087 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 200 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 76992 | 0x7f1ce8890200 | 0x7f1ce5825000 | 160547 | 160547 | 14019 | 1284384 | 65536 | 93572627 | 1185461 | 0 | 376026100 | 17599213822025 | 17599213760092 | 17599214066012 | 17599214067799 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 203 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 78464 | 0x7f1ce8890080 | 0x7f1ce5825040 | 302277 | 302277 | 23450 | 2418224 | 65536 | 217494928 | 2312009 | 0 | 871710180 | 17599214122088 | 17599214066012 | 17599214472573 | 17599214474469 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 206 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 79936 | 0x7f1ce60edf00 | 0x7f1ce5825080 | 300853 | 300853 | 22552 | 2406832 | 65536 | 218269792 | 2309166 | 0 | 874808368 | 17599214515098 | 17599214472573 | 17599214869213 | 17599214871149 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 209 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 81408 | 0x7f1ce60edd80 | 0x7f1ce58250c0 | 161705 | 161705 | 14563 | 1293648 | 65536 | 89960658 | 1192520 | 0 | 361574908 | 17599214911058 | 17599214869213 | 17599215179133 | 17599215181001 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 212 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 83136 | 0x7f1ce60edc00 | 0x7f1ce5825100 | 189879 | 189879 | 17280 | 1519040 | 65536 | 121152417 | 1416318 | 0 | 486350000 | 17599215221200 | 17599215179133 | 17599215500893 | 17599215502703 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 215 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 84352 | 0x7f1ce60eda80 | 0x7f1ce5825140 | 161595 | 161595 | 14563 | 1292768 | 65536 | 90832627 | 1194715 | 0 | 365068304 | 17599215550182 | 17599215500893 | 17599215809853 | 17599215811795 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 218 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 85824 | 0x7f1ce60ed900 | 0x7f1ce5825180 | 304373 | 304373 | 23948 | 2434992 | 65536 | 223154907 | 2330830 | 0 | 894352844 | 17599215852234 | 17599215809853 | 17599216202173 | 17599216204095 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 221 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 87296 | 0x7f1ce60ed780 | 0x7f1ce58251c0 | 302025 | 302025 | 23949 | 2416208 | 65536 | 217635570 | 2314395 | 0 | 872271164 | 17599216245874 | 17599216202173 | 17599216596253 | 17599216598185 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 224 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 88768 | 0x7f1ce60ed600 | 0x7f1ce5825200 | 164231 | 164231 | 15505 | 1313856 | 65536 | 95391188 | 1208938 | 0 | 383297012 | 17599216637724 | 17599216596253 | 17599216905213 | 17599216907147 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 227 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 90752 | 0x7f1ce8890480 | 0x7f1ce5825240 | 201157 | 201157 | 18060 | 1609264 | 65536 | 132808019 | 1502674 | 0 | 532976100 | 17599216946956 | 17599216905213 | 17599217238173 | 17599217240109 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 230 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 91968 | 0x7f1ce8890300 | 0x7f1ce5825280 | 161439 | 161439 | 15055 | 1291520 | 65536 | 90394810 | 1183386 | 0 | 363313524 | 17599217287948 | 17599217238173 | 17599217542653 | 17599217544551 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 233 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 93440 | 0x7f1ce8890180 | 0x7f1ce58252c0 | 301599 | 301599 | 23523 | 2412800 | 65536 | 215903820 | 2308263 | 0 | 865345476 | 17599217591970 | 17599217542653 | 17599217943454 | 17599217945341 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 236 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 94912 | 0x7f1ce8890000 | 0x7f1ce5825300 | 299893 | 299893 | 21400 | 2399152 | 65536 | 213209361 | 2302281 | 0 | 854568564 | 17599217988350 | 17599217943454 | 17599218342974 | 17599218345131 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 239 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 96384 | 0x7f1ce60ede80 | 0x7f1ce5825340 | 161857 | 161857 | 15079 | 1294864 | 65536 | 95410546 | 1195944 | 0 | 383376428 | 17599218384950 | 17599218342974 | 17599218651454 | 17599218653103 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 242 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 98368 | 0x7f1ce60edd00 | 0x7f1ce5825380 | 210961 | 210961 | 18726 | 1687696 | 65536 | 122671910 | 1573016 | 0 | 492430000 | 17599218693772 | 17599218651454 | 17599218984894 | 17599218986714 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 245 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 99584 | 0x7f1ce60edb80 | 0x7f1ce58253c0 | 160722 | 160722 | 14786 | 1285784 | 65536 | 94041126 | 1188643 | 0 | 377898620 | 17599219034443 | 17599218984894 | 17599219287454 | 17599219289297 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 248 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 101312 | 0x7f1ce60eda00 | 0x7f1ce5825400 | 297215 | 297215 | 22381 | 2377728 | 65536 | 217376199 | 2272933 | 0 | 871240176 | 17599219330136 | 17599219287454 | 17599219682174 | 17599219684177 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 251 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 102784 | 0x7f1ce60ed880 | 0x7f1ce5825440 | 301892 | 301892 | 23159 | 2415144 | 65536 | 215777482 | 2310252 | 0 | 864839608 | 17599219724166 | 17599219682174 | 17599220073374 | 17599220075207 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 254 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 104256 | 0x7f1ce60ed700 | 0x7f1ce5825480 | 160875 | 160875 | 15512 | 1287008 | 65536 | 99935082 | 1186832 | 0 | 401477760 | 17599220115886 | 17599220073374 | 17599220377214 | 17599220378979 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 257 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 106240 | 0x7f1ce8890580 | 0x7f1ce58254c0 | 218300 | 218300 | 18735 | 1746408 | 65536 | 142545557 | 1643475 | 0 | 571929752 | 17599220419158 | 17599220377214 | 17599220715934 | 17599220717570 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 260 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 107712 | 0x7f1ce8890400 | 0x7f1ce5825500 | 160809 | 160809 | 14981 | 1286480 | 65536 | 93024188 | 1185194 | 0 | 373830336 | 17599220765819 | 17599220715934 | 17599221020254 | 17599221022052 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 263 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 16 | 24 | 109440 | 0x7f1ce8890280 | 0x7f1ce5825540 | 295522 | 295522 | 20151 | 2364184 | 65536 | 200099648 | 2263590 | 0 | 802131380 | 17599221063171 | 17599221020254 | 17599221406975 | 17599221408643 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 266 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 16 | 24 | 111168 | 0x7f1ce8890100 | 0x7f1ce5825580 | 299497 | 299497 | 21829 | 2395984 | 65536 | 209498979 | 2296410 | 0 | 839727244 | 17599221450281 | 17599221406975 | 17599221798175 | 17599221800103 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 269 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 112896 | 0x7f1ce60edf80 | 0x7f1ce58255c0 | 162801 | 162801 | 15814 | 1302416 | 65536 | 103762745 | 1199482 | 0 | 416787216 | 17599221840192 | 17599221798175 | 17599222108415 | 17599222110135 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 272 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 115136 | 0x7f1ce60ede00 | 0x7f1ce5825600 | 229107 | 229107 | 19573 | 1832864 | 65536 | 144180621 | 1723384 | 0 | 578468672 | 17599222150574 | 17599222108415 | 17599222461055 | 17599222463096 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 275 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 116608 | 0x7f1ce60edc80 | 0x7f1ce5825640 | 160681 | 160681 | 14462 | 1285456 | 65536 | 88587366 | 1188462 | 0 | 356082704 | 17599222510745 | 17599222461055 | 17599222765375 | 17599222767318 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 278 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 16 | 24 | 118336 | 0x7f1ce60edb00 | 0x7f1ce5825680 | 297135 | 297135 | 21739 | 2377088 | 65536 | 215165967 | 2275049 | 0 | 862398400 | 17599222807817 | 17599222765375 | 17599223152895 | 17599223154718 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 281 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 16 | 24 | 120064 | 0x7f1ce60ed980 | 0x7f1ce58256c0 | 297121 | 297121 | 20305 | 2376976 | 65536 | 207446767 | 2276839 | 0 | 831518832 | 17599223194867 | 17599223152895 | 17599223543295 | 17599223545098 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 284 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 121792 | 0x7f1ce60ed800 | 0x7f1ce5825700 | 163001 | 163001 | 15187 | 1304016 | 65536 | 101362097 | 1204674 | 0 | 407182516 | 17599223585347 | 17599223543295 | 17599223848415 | 17599223850391 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 287 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 124032 | 0x7f1ce60ed680 | 0x7f1ce5825740 | 239925 | 239925 | 19579 | 1919408 | 65536 | 146470535 | 1808793 | 0 | 587628928 | 17599223889670 | 17599223848415 | 17599224205695 | 17599224207662 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 290 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 125504 | 0x7f1ce8890500 | 0x7f1ce5825780 | 161045 | 161045 | 14978 | 1288368 | 65536 | 97847558 | 1190238 | 0 | 393119288 | 17599224255930 | 17599224205695 | 17599224515615 | 17599224517514 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 293 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 127488 | 0x7f1ce8890380 | 0x7f1ce58257c0 | 295651 | 295651 | 19251 | 2365216 | 65536 | 206874185 | 2267595 | 0 | 829232996 | 17599224559303 | 17599224515615 | 17599224902816 | 17599224904564 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 296 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 129216 | 0x7f1ce8890200 | 0x7f1ce5825800 | 297826 | 297826 | 19792 | 2382616 | 65536 | 195167844 | 2278001 | 0 | 782408712 | 17599224945893 | 17599224902816 | 17599225291616 | 17599225293464 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 299 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 130944 | 0x7f1ce8890080 | 0x7f1ce5825840 | 169599 | 169599 | 15760 | 1356800 | 65536 | 105285452 | 1254879 | 0 | 422889800 | 17599225333593 | 17599225291616 | 17599225601376 | 17599225603426 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 302 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 133440 | 0x7f1ce60edf00 | 0x7f1ce5825880 | 258737 | 258737 | 20664 | 2069904 | 65536 | 175156884 | 1963536 | 0 | 702375908 | 17599225644165 | 17599225601376 | 17599225965696 | 17599225967437 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 305 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 134912 | 0x7f1ce60edd80 | 0x7f1ce58258c0 | 161133 | 161133 | 14947 | 1289072 | 65536 | 99668799 | 1191612 | 0 | 400406652 | 17599226015586 | 17599225965696 | 17599226278336 | 17599226280129 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 308 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 136896 | 0x7f1ce60edc00 | 0x7f1ce5825900 | 296613 | 296613 | 19851 | 2372912 | 65536 | 211958812 | 2274899 | 0 | 849585212 | 17599226320258 | 17599226278336 | 17599226674976 | 17599226676669 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 311 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 138880 | 0x7f1ce60eda80 | 0x7f1ce5825940 | 296549 | 296549 | 20612 | 2372400 | 65536 | 215890509 | 2271918 | 0 | 865297144 | 17599226716468 | 17599226674976 | 17599227064736 | 17599227066609 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 314 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 140864 | 0x7f1ce60ed900 | 0x7f1ce5825980 | 177585 | 177585 | 16300 | 1420688 | 65536 | 111061850 | 1320744 | 0 | 445990508 | 17599227106748 | 17599227064736 | 17599227379776 | 17599227381681 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 317 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 143616 | 0x7f1ce60ed780 | 0x7f1ce58259c0 | 279463 | 279463 | 22603 | 2235712 | 65536 | 197343747 | 2128046 | 0 | 791123128 | 17599227421940 | 17599227379776 | 17599227754656 | 17599227756442 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 320 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 145344 | 0x7f1ce60ed600 | 0x7f1ce5825a00 | 161081 | 161081 | 15306 | 1288656 | 65536 | 99456610 | 1190651 | 0 | 399561276 | 17599227807350 | 17599227754656 | 17599228061216 | 17599228063134 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 323 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 147584 | 0x7f1ce8890480 | 0x7f1ce5825a40 | 296477 | 296477 | 21328 | 2371824 | 65536 | 210194267 | 2268497 | 0 | 842536848 | 17599228103923 | 17599228061216 | 17599228449377 | 17599228451204 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 326 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 149568 | 0x7f1ce8890300 | 0x7f1ce5825a80 | 295165 | 295165 | 20099 | 2361328 | 65536 | 201808227 | 2257080 | 0 | 808969600 | 17599228491953 | 17599228449377 | 17599228839137 | 17599228841014 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 329 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 151552 | 0x7f1ce8890180 | 0x7f1ce5825ac0 | 186107 | 186107 | 16970 | 1488864 | 65536 | 111191723 | 1383831 | 0 | 446514828 | 17599228881633 | 17599228839137 | 17599229164257 | 17599229166026 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 332 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 154304 | 0x7f1ce8890000 | 0x7f1ce5825b00 | 298821 | 298821 | 22956 | 2390576 | 65536 | 206564441 | 2285325 | 0 | 828009164 | 17599229206785 | 17599229164257 | 17599229559937 | 17599229561796 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 335 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 156288 | 0x7f1ce60ede80 | 0x7f1ce5825b40 | 170807 | 170807 | 16146 | 1366464 | 65536 | 109618243 | 1269968 | 0 | 440209784 | 17599229610025 | 17599229559937 | 17599229873537 | 17599229875448 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 338 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 158784 | 0x7f1ce60edd00 | 0x7f1ce5825b80 | 297260 | 297260 | 19673 | 2378088 | 65536 | 197640121 | 2268866 | 0 | 792318656 | 17599229916137 | 17599229873537 | 17599230277377 | 17599230279218 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 341 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 161024 | 0x7f1ce60edb80 | 0x7f1ce5825bc0 | 296572 | 296572 | 19642 | 2372584 | 65536 | 194914504 | 2267424 | 0 | 781405500 | 17599230321996 | 17599230277377 | 17599230673697 | 17599230675577 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 344 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 163264 | 0x7f1ce60eda00 | 0x7f1ce5825c00 | 204886 | 204886 | 17647 | 1639096 | 65536 | 129298815 | 1534522 | 0 | 518943612 | 17599230716236 | 17599230673697 | 17599231008737 | 17599231010679 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 347 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 166528 | 0x7f1ce60ed880 | 0x7f1ce5825c40 | 339686 | 339686 | 25283 | 2717496 | 65536 | 242270464 | 2610005 | 0 | 970841356 | 17599231050558 | 17599231008737 | 17599231425377 | 17599231471227 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 350 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 168512 | 0x7f1ce60ed700 | 0x7f1ce5825c80 | 185664 | 185664 | 17222 | 1485320 | 65536 | 99311144 | 1372031 | 0 | 398988988 | 17599231490727 | 17599231425377 | 17599231779778 | 17599231781749 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 353 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 171264 | 0x7f1ce8890580 | 0x7f1ce5825cc0 | 297130 | 297130 | 19433 | 2377048 | 65536 | 168980440 | 2248208 | 0 | 677690108 | 17599231823418 | 17599231779778 | 17599232174178 | 17599232175999 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 356 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 173760 | 0x7f1ce8890400 | 0x7f1ce5825d00 | 297218 | 297218 | 19703 | 2377752 | 65536 | 185841999 | 2256938 | 0 | 745121804 | 17599232217538 | 17599232174178 | 17599232566018 | 17599232567829 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 359 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 176256 | 0x7f1ce8890280 | 0x7f1ce5825d40 | 224070 | 224070 | 19255 | 1792568 | 65536 | 154540594 | 1688496 | 0 | 619907912 | 17599232608188 | 17599232566018 | 17599232912098 | 17599232914040 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 362 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 179776 | 0x7f1ce8890100 | 0x7f1ce5825d80 | 379408 | 379408 | 27524 | 3035272 | 65536 | 280912538 | 2929123 | 0 | 1125410688 | 17599232955209 | 17599232912098 | 17599233355938 | 17599233375539 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 365 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 182272 | 0x7f1ce60edf80 | 0x7f1ce5825dc0 | 210274 | 210274 | 18456 | 1682200 | 65536 | 134166152 | 1577796 | 0 | 538415380 | 17599233406918 | 17599233355938 | 17599233698658 | 17599233700550 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 368 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 185536 | 0x7f1ce60ede00 | 0x7f1ce5825e00 | 305566 | 305566 | 22879 | 2444536 | 65536 | 187311519 | 2330912 | 0 | 751005704 | 17599233741359 | 17599233698658 | 17599234099938 | 17599234122930 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 371 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 188544 | 0x7f1ce60edc80 | 0x7f1ce5825e40 | 311488 | 311488 | 20664 | 2491912 | 65536 | 175379827 | 2359463 | 0 | 703290192 | 17599234141379 | 17599234099938 | 17599234521698 | 17599234542609 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 374 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 191552 | 0x7f1ce60edb00 | 0x7f1ce5825e80 | 265298 | 265298 | 21716 | 2122392 | 65536 | 189167497 | 2016701 | 0 | 758434840 | 17599234564818 | 17599234521698 | 17599234894818 | 17599234896660 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 377 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 195840 | 0x7f1ce60ed980 | 0x7f1ce5825ec0 | 460424 | 460424 | 31007 | 3683400 | 65536 | 345057397 | 3579176 | 0 | 1381993856 | 17599234937179 | 17599234894818 | 17599235386979 | 17599235407387 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 380 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 198592 | 0x7f1ce60ed800 | 0x7f1ce5825f00 | 243092 | 243092 | 20413 | 1944744 | 65536 | 159122048 | 1834807 | 0 | 638237984 | 17599235437666 | 17599235386979 | 17599235748579 | 17599235750578 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 383 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 202368 | 0x7f1ce60ed680 | 0x7f1ce5825f40 | 335827 | 335827 | 22384 | 2686624 | 65536 | 210448050 | 2567232 | 0 | 843565464 | 17599235791957 | 17599235748579 | 17599236170179 | 17599236187327 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 386 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 205888 | 0x7f1ce8890500 | 0x7f1ce5825f80 | 331346 | 331346 | 22209 | 2650776 | 65536 | 198664952 | 2529289 | 0 | 796429876 | 17599236212287 | 17599236170179 | 17599236590339 | 17599236607707 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 389 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 209408 | 0x7f1ce8890380 | 0x7f1ce5825fc0 | 305461 | 305461 | 22903 | 2443696 | 65536 | 221925267 | 2342350 | 0 | 889461912 | 17599236633026 | 17599236590339 | 17599236995139 | 17599236996897 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 392 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 214464 | 0x7f1ce8890200 | 0x7f1ce5826000 | 541098 | 541098 | 35284 | 4328792 | 65536 | 409061131 | 4223430 | 0 | 1638009952 | 17599237038946 | 17599236995139 | 17599237539139 | 17599237556653 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 395 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 217728 | 0x7f1ce8890080 | 0x7f1ce5826040 | 272959 | 272959 | 21652 | 2183680 | 65536 | 178872300 | 2075612 | 0 | 717253708 | 17599237590162 | 17599237539139 | 17599237917699 | 17599237919603 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 398 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 222016 | 0x7f1ce60edf00 | 0x7f1ce5826080 | 364250 | 364250 | 25231 | 2914008 | 65536 | 239595938 | 2797370 | 0 | 960166628 | 17599237960842 | 17599237917699 | 17599238367779 | 17599238389651 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 401 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 226048 | 0x7f1ce60edd80 | 0x7f1ce58260c0 | 361051 | 361051 | 27014 | 2888416 | 65536 | 250347820 | 2778101 | 0 | 1003159244 | 17599238410891 | 17599238367779 | 17599238811939 | 17599238830550 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 404 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 230080 | 0x7f1ce60edc00 | 0x7f1ce5826100 | 346530 | 346530 | 25136 | 2772248 | 65536 | 244408330 | 2664140 | 0 | 979393744 | 17599238853770 | 17599238811939 | 17599239233540 | 17599239275419 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 407 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 235904 | 0x7f1ce60eda80 | 0x7f1ce5826140 | 623342 | 623342 | 39540 | 4986744 | 65536 | 471218885 | 4874374 | 0 | 1886645808 | 17599239286438 | 17599239233540 | 17599239849060 | 17599239890713 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 410 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 239424 | 0x7f1ce60ed900 | 0x7f1ce5826180 | 302245 | 302245 | 22652 | 2417968 | 65536 | 211097802 | 2315171 | 0 | 846156844 | 17599239909623 | 17599239849060 | 17599240271140 | 17599240272963 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 413 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 244224 | 0x7f1ce60ed780 | 0x7f1ce58261c0 | 398016 | 398016 | 27369 | 3184136 | 65536 | 275015593 | 3079684 | 0 | 1101844828 | 17599240314332 | 17599240271140 | 17599240730980 | 17599240773121 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 416 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 248768 | 0x7f1ce60ed600 | 0x7f1ce5826200 | 390013 | 390013 | 27492 | 3120112 | 65536 | 291609356 | 3017706 | 0 | 1168212064 | 17599240783620 | 17599240730980 | 17599241209380 | 17599241250958 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 419 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 253312 | 0x7f1ce8890480 | 0x7f1ce5826240 | 387234 | 387234 | 27184 | 3097880 | 65536 | 282987002 | 2990575 | 0 | 1133715156 | 17599241261508 | 17599241209380 | 17599241679140 | 17599241696477 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 422 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 255552 | 0x7f1ce8890300 | 0x7f1ce5826280 | 717821 | 717821 | 44413 | 5742576 | 65536 | 546838363 | 5632697 | 0 | 2189093020 | 17599241722076 | 17599241679140 | 17599242343780 | 17599242387169 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 425 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 259840 | 0x7f1ce8890180 | 0x7f1ce58262c0 | 364617 | 364617 | 26978 | 2916944 | 65536 | 260693479 | 2797452 | 0 | 1044537040 | 17599242407419 | 17599242343780 | 17599242803461 | 17599242846368 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 428 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 265664 | 0x7f1ce8890000 | 0x7f1ce5826300 | 469179 | 469179 | 31717 | 3753440 | 65536 | 353955504 | 3646651 | 0 | 1417608640 | 17599242858207 | 17599242803461 | 17599243334021 | 17599243375194 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 431 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 271232 | 0x7f1ce60ede80 | 0x7f1ce5826340 | 471620 | 471620 | 31455 | 3772968 | 65536 | 345981677 | 3668790 | 0 | 1385710812 | 17599243386094 | 17599243334021 | 17599243868101 | 17599243909021 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 434 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 273216 | 0x7f1ce60edd00 | 0x7f1ce5826380 | 482209 | 482209 | 32420 | 3857680 | 65536 | 354559889 | 3749819 | 0 | 1419981536 | 17599243920091 | 17599243868101 | 17599244393061 | 17599244434278 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 437 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 275712 | 0x7f1ce60edb80 | 0x7f1ce58263c0 | 880176 | 880176 | 52084 | 7041416 | 65536 | 681498431 | 6923810 | 0 | 2727736368 | 17599244445347 | 17599244393061 | 17599245177381 | 17599245219388 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 440 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 280768 | 0x7f1ce60eda00 | 0x7f1ce5826400 | 425683 | 425683 | 29165 | 3405472 | 65536 | 316443620 | 3299976 | 0 | 1267551264 | 17599245239757 | 17599245177381 | 17599245674661 | 17599245715875 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 443 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 287616 | 0x7f1ce60ed880 | 0x7f1ce5826440 | 548412 | 548412 | 35509 | 4387304 | 65536 | 413610552 | 4276643 | 0 | 1656223928 | 17599245726375 | 17599245674661 | 17599246252902 | 17599246298150 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 446 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 294208 | 0x7f1ce60ed700 | 0x7f1ce5826480 | 552445 | 552445 | 36131 | 4419568 | 65536 | 417015369 | 4310800 | 0 | 1669847868 | 17599246309380 | 17599246252902 | 17599246841702 | 17599246885505 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 449 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 296960 | 0x7f1ce8890580 | 0x7f1ce58264c0 | 563006 | 563006 | 36733 | 4504056 | 65536 | 424308773 | 4389098 | 0 | 1698977668 | 17599246896825 | 17599246841702 | 17599247423782 | 17599247465411 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 452 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 299968 | 0x7f1ce8890400 | 0x7f1ce5826500 | 1040093 | 1040093 | 60372 | 8320752 | 65536 | 810548879 | 8216909 | 0 | 3243938040 | 17599247476150 | 17599247423782 | 17599248319942 | 17599248366608 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 455 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 302720 | 0x7f1ce8890280 | 0x7f1ce5826540 | 721316 | 721316 | 43820 | 5770536 | 65536 | 547642350 | 5655562 | 0 | 2192311268 | 17599248385817 | 17599248320010 | 17599249006890 | 17599249054840 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 458 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 305472 | 0x7f1ce8890100 | 0x7f1ce5826580 | 723963 | 723963 | 44643 | 5791712 | 65536 | 552976262 | 5675103 | 0 | 2213647452 | 17599249066990 | 17599249006890 | 17599249730090 | 17599249773812 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 461 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 308224 | 0x7f1ce60edf80 | 0x7f1ce58265c0 | 722060 | 722060 | 44378 | 5776488 | 65536 | 542615241 | 5659951 | 0 | 2172201896 | 17599249785422 | 17599249730090 | 17599250439850 | 17599250485594 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 464 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 310976 | 0x7f1ce60ede00 | 0x7f1ce5826600 | 726898 | 726898 | 45383 | 5815192 | 65536 | 561271847 | 5695667 | 0 | 2246829592 | 17599250497754 | 17599250439850 | 17599251146250 | 17599251191836 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 467 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 313216 | 0x7f1ce60edc80 | 0x7f1ce5826640 | 1366597 | 1366597 | 77082 | 10932784 | 65536 | 1065672643 | 10816229 | 0 | 4264431668 | 17599251202856 | 17599251146250 | 17599252263051 | 17599252307888 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 470 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 315968 | 0x7f1ce60edb00 | 0x7f1ce5826680 | 1372024 | 1372024 | 77782 | 10976200 | 65536 | 1078387006 | 10851064 | 0 | 4315289956 | 17599252326697 | 17599252263051 | 17599253373931 | 17599253438249 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 473 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 318720 | 0x7f1ce60ed980 | 0x7f1ce58266c0 | 1371247 | 1371247 | 77518 | 10969984 | 65536 | 1075762186 | 10855352 | 0 | 4304789940 | 17599253449938 | 17599253373931 | 17599254525932 | 17599254595319 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 476 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 321472 | 0x7f1ce60ed800 | 0x7f1ce5826700 | 1372553 | 1372553 | 77477 | 10980432 | 65536 | 1073980519 | 10850584 | 0 | 4297662720 | 17599254607839 | 17599254525932 | 17599255686252 | 17599255751820 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 479 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 324224 | 0x7f1ce60ed680 | 0x7f1ce5826740 | 1374673 | 1374673 | 77106 | 10997392 | 65536 | 1076734019 | 10880624 | 0 | 4308678552 | 17599255763570 | 17599255686252 | 17599256816973 | 17599256888421 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 482 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 326464 | 0x7f1ce8890500 | 0x7f1ce5826780 | 2664917 | 2664917 | 143141 | 21319344 | 65536 | 2130988346 | 21187794 | 0 | 8525694980 | 17599256899991 | 17599256816973 | 17599258795214 | 17599258866681 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 485 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 329216 | 0x7f1ce8890380 | 0x7f1ce58267c0 | 2666982 | 2666982 | 142454 | 21335864 | 65536 | 2131372878 | 21221592 | 0 | 8527232704 | 17599258887370 | 17599258795214 | 17599260740654 | 17599260807081 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 488 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 331968 | 0x7f1ce8890200 | 0x7f1ce5826800 | 2671177 | 2671177 | 143385 | 21369424 | 65536 | 2133795562 | 21240229 | 0 | 8536923260 | 17599260818851 | 17599260740654 | 17599262739695 | 17599262809690 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 491 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 12 | 24 | 334720 | 0x7f1ce8890080 | 0x7f1ce5826840 | 2667766 | 2667766 | 142943 | 21342136 | 65536 | 2126319326 | 21225480 | 0 | 8507017148 | 17599262822020 | 17599262739695 | 17599264745136 | 17599264813360 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 494 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 337472 | 0x7f1ce60edf00 | 0x7f1ce5826880 | 2672975 | 2672975 | 142959 | 21383808 | 65536 | 2135790377 | 21263256 | 0 | 8544903336 | 17599264824699 | 17599264745136 | 17599266696017 | 17599266764750 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 497 | 223519 | 223519 | 4194304 | 256 | 0 | 0 | 8 | 24 | 0 | 0x7f1ce60edd80 | 0x7f1ce58268c0 | 5255554 | 5255554 | 272964 | 42044440 | 65536 | 4232756362 | 41926604 | 0 | 16932766864 | 17599266776960 | 17599266696017 | 17599270334258 | 17599270406228 |