50 KiB
50 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | GRBM_COUNT | GRBM_GUI_ACTIVE | CPC_ME1_BUSY_FOR_PACKET_DECODE | SQ_CYCLES | SQ_WAVES | SQ_WAVE_CYCLES | SQ_BUSY_CYCLES | SQ_LEVEL_WAVES | SQ_ACCUM_PREV_HIRES | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBuffer.kd | 0 | 0 | 0 | 911507 | 911514 | 33554432 | 256 | 0 | 0 | 8 | 32 | 6464 | 0x0 | 0x7f1631e04180 | 502997 | 502997 | 16972 | 4023984 | 524288 | 370817117 | 3823622 | 0 | 1498078928 | 12076135631582317 | 12076135876277375 | 12076135876600893 | 12076135876709715 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 911507 | 911514 | 32768 | 256 | 0 | 0 | 24 | 24 | 12480 | 0x0 | 0x7f1631e35100 | 27681 | 27681 | 20333 | 221456 | 512 | 1086316 | 73178 | 0 | 4359244 | 12076135891263753 | 12076135891585458 | 12076135891591698 | 12076135891600299 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 4 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 12928 | 0x7f173da09900 | 0x7f1631e35140 | 219780 | 219780 | 20940 | 1758248 | 65536 | 130207192 | 1588334 | 0 | 522643840 | 12076135891675879 | 12076135891912816 | 12076135892046576 | 12076135892050806 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 6 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 36 | 24 | 13632 | 0x7f173da09800 | 0x7f1631e35180 | 398021 | 398021 | 32638 | 3184176 | 65536 | 270873378 | 3018567 | 0 | 1085308604 | 12076135892131917 | 12076135892328174 | 12076135892581453 | 12076135892649929 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 8 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 14080 | 0x7f173da09700 | 0x7f1631e351c0 | 401188 | 401188 | 29658 | 3209512 | 65536 | 322666084 | 3040737 | 0 | 1292478324 | 12076135892687149 | 12076135892886251 | 12076135893141450 | 12076135893212014 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 10 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14528 | 0x7f173da09600 | 0x7f1631e35200 | 220692 | 220692 | 20222 | 1765544 | 65536 | 136739482 | 1590035 | 0 | 548771228 | 12076135893238704 | 12076135893449769 | 12076135893584648 | 12076135893588514 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 12 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14976 | 0x7f173da09500 | 0x7f1631e35240 | 219413 | 219413 | 20153 | 1755312 | 65536 | 143398510 | 1583438 | 0 | 575407704 | 12076135893651310 | 12076135893844327 | 12076135893977926 | 12076135893981825 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 14 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 15424 | 0x7f173da09400 | 0x7f1631e35280 | 217188 | 217188 | 21450 | 1737512 | 65536 | 147974236 | 1569850 | 0 | 593713316 | 12076135894077813 | 12076135894263525 | 12076135894396324 | 12076135894399992 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 16 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16128 | 0x7f173da09300 | 0x7f1631e352c0 | 407948 | 407948 | 35786 | 3263592 | 65536 | 260586085 | 3094283 | 0 | 1044160908 | 12076135894458390 | 12076135894657123 | 12076135894916641 | 12076135894956015 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 18 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16832 | 0x7f173da09a00 | 0x7f1631e35300 | 403084 | 403084 | 35873 | 3224680 | 65536 | 340335479 | 3047988 | 0 | 1363157084 | 12076135894992904 | 12076135895220640 | 12076135895476319 | 12076135895543427 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 20 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 17536 | 0x7f173da09900 | 0x7f1631e35340 | 213868 | 213868 | 21625 | 1710952 | 65536 | 156814464 | 1543648 | 0 | 629075148 | 12076135895572341 | 12076135895781597 | 12076135895911996 | 12076135895915639 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 22 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18240 | 0x7f173da09800 | 0x7f1631e35380 | 222837 | 222837 | 20388 | 1782704 | 65536 | 124303745 | 1609580 | 0 | 499028544 | 12076135895976442 | 12076135896182555 | 12076135896318714 | 12076135896322385 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 24 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18944 | 0x7f173da09700 | 0x7f1631e353c0 | 215773 | 215773 | 22513 | 1726192 | 65536 | 146083609 | 1560049 | 0 | 586149352 | 12076135896399067 | 12076135896580793 | 12076135896712472 | 12076135896716317 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 26 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 19904 | 0x7f173da09600 | 0x7f1631e35400 | 397044 | 397044 | 33384 | 3176360 | 65536 | 362986535 | 3008652 | 0 | 1453764412 | 12076135896777731 | 12076135896968631 | 12076135897222070 | 12076135897289161 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 28 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 20608 | 0x7f173da09500 | 0x7f1631e35440 | 455509 | 455509 | 16313 | 3644080 | 65536 | 326842630 | 3014537 | 0 | 1309187300 | 12076135897315731 | 12076135897525748 | 12076135897817267 | 12076135897884047 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 30 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 21312 | 0x7f173da09400 | 0x7f1631e35480 | 224989 | 224989 | 22080 | 1799920 | 65536 | 130162555 | 1625013 | 0 | 522461220 | 12076135897908152 | 12076135898125105 | 12076135898262385 | 12076135898266207 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 32 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22016 | 0x7f173da09300 | 0x7f1631e354c0 | 220701 | 220701 | 21554 | 1765616 | 65536 | 131956534 | 1597676 | 0 | 529640592 | 12076135898327852 | 12076135898527183 | 12076135898662383 | 12076135898666260 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 34 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22720 | 0x7f173da09a00 | 0x7f1631e35500 | 214940 | 214940 | 20630 | 1719528 | 65536 | 148382530 | 1550887 | 0 | 595345928 | 12076135898740869 | 12076135898923341 | 12076135899054701 | 12076135899058650 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 36 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 23680 | 0x7f173da09900 | 0x7f1631e35540 | 402532 | 402532 | 33049 | 3220264 | 65536 | 349425565 | 3050451 | 0 | 1399518564 | 12076135899119873 | 12076135899315980 | 12076135899572618 | 12076135899637155 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 38 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 24384 | 0x7f173da09800 | 0x7f1631e35580 | 400940 | 400940 | 25404 | 3207528 | 65536 | 266559204 | 3030253 | 0 | 1068054772 | 12076135899661089 | 12076135899864937 | 12076135900120456 | 12076135900191405 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 40 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25088 | 0x7f173da09700 | 0x7f1631e355c0 | 215636 | 215636 | 21065 | 1725096 | 65536 | 150075504 | 1554215 | 0 | 602119556 | 12076135900218064 | 12076135900418534 | 12076135900550533 | 12076135900554630 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 42 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25792 | 0x7f173da09600 | 0x7f1631e35600 | 215925 | 215925 | 22195 | 1727408 | 65536 | 145311145 | 1545970 | 0 | 583068004 | 12076135900615102 | 12076135900808132 | 12076135900939811 | 12076135900943483 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 44 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 26496 | 0x7f173da09500 | 0x7f1631e35640 | 215973 | 215973 | 22280 | 1727792 | 65536 | 155371071 | 1558826 | 0 | 623299616 | 12076135901024363 | 12076135901206530 | 12076135901338209 | 12076135901342103 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 46 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 27712 | 0x7f173da09400 | 0x7f1631e35680 | 393533 | 393533 | 33143 | 3148272 | 65536 | 356103467 | 2979972 | 0 | 1426232856 | 12076135901402967 | 12076135901594528 | 12076135901845887 | 12076135901911472 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 48 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 28416 | 0x7f173da09300 | 0x7f1631e356c0 | 399612 | 399612 | 33558 | 3196904 | 65536 | 335710181 | 3019301 | 0 | 1344656216 | 12076135901934565 | 12076135902138206 | 12076135902392764 | 12076135902458919 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 50 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 29120 | 0x7f173da09a00 | 0x7f1631e35700 | 212932 | 212932 | 19091 | 1703464 | 65536 | 161336101 | 1524992 | 0 | 647161268 | 12076135902483304 | 12076135902682683 | 12076135902812762 | 12076135902816474 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 52 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30080 | 0x7f173da09900 | 0x7f1631e35740 | 220157 | 220157 | 22822 | 1761264 | 65536 | 138683645 | 1592636 | 0 | 556547436 | 12076135902878779 | 12076135903065401 | 12076135903200760 | 12076135903204555 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 54 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30784 | 0x7f173da09800 | 0x7f1631e35780 | 218708 | 218708 | 22584 | 1749672 | 65536 | 146240734 | 1573755 | 0 | 586792960 | 12076135903279434 | 12076135903454999 | 12076135903588438 | 12076135903592756 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 56 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32000 | 0x7f173da09700 | 0x7f1631e357c0 | 411557 | 411557 | 34602 | 3292464 | 65536 | 260449053 | 3125311 | 0 | 1043615300 | 12076135903653078 | 12076135903844597 | 12076135904107796 | 12076135904178445 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 58 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32960 | 0x7f173da09600 | 0x7f1631e35800 | 394965 | 394965 | 32592 | 3159728 | 65536 | 335154546 | 2991645 | 0 | 1342433960 | 12076135904206166 | 12076135904405394 | 12076135904657393 | 12076135904723478 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 60 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 33920 | 0x7f173da09500 | 0x7f1631e35840 | 219821 | 219821 | 22006 | 1758576 | 65536 | 151006420 | 1586594 | 0 | 605845656 | 12076135904747222 | 12076135904945392 | 12076135905079791 | 12076135905083907 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 62 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 34880 | 0x7f173da09400 | 0x7f1631e35880 | 216277 | 216277 | 21473 | 1730224 | 65536 | 136915853 | 1559714 | 0 | 549518040 | 12076135905145151 | 12076135905332910 | 12076135905465549 | 12076135905469334 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 64 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 35840 | 0x7f173da09300 | 0x7f1631e358c0 | 210453 | 210453 | 21904 | 1683632 | 65536 | 167739000 | 1514623 | 0 | 672774932 | 12076135905542369 | 12076135905720268 | 12076135905848587 | 12076135905852496 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 66 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 37312 | 0x7f173da09a00 | 0x7f1631e35900 | 386196 | 386196 | 34835 | 3089576 | 65536 | 350683069 | 2914603 | 0 | 1404550944 | 12076135905913078 | 12076135906112266 | 12076135906359305 | 12076135906424319 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 68 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 38272 | 0x7f173da09900 | 0x7f1631e35940 | 395700 | 395700 | 32772 | 3165608 | 65536 | 332623720 | 2997210 | 0 | 1332309692 | 12076135906449706 | 12076135906652423 | 12076135906904742 | 12076135906969903 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 70 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 39232 | 0x7f173da09800 | 0x7f1631e35980 | 215796 | 215796 | 20467 | 1726376 | 65536 | 149643411 | 1549858 | 0 | 600397108 | 12076135906992464 | 12076135907198500 | 12076135907330500 | 12076135907334480 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 72 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 40192 | 0x7f173da09700 | 0x7f1631e359c0 | 221933 | 221933 | 26036 | 1775472 | 65536 | 139862487 | 1598686 | 0 | 561289856 | 12076135907397477 | 12076135907588099 | 12076135907724738 | 12076135907728633 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 74 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 41152 | 0x7f173da09600 | 0x7f1631e35a00 | 215020 | 215020 | 22041 | 1720168 | 65536 | 150407740 | 1552684 | 0 | 603451080 | 12076135907805646 | 12076135907984577 | 12076135908116256 | 12076135908120371 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 76 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 42624 | 0x7f173da09500 | 0x7f1631e35a40 | 402069 | 402069 | 30018 | 3216560 | 65536 | 316648154 | 3043310 | 0 | 1268409256 | 12076135908172798 | 12076135908370655 | 12076135908628413 | 12076135908694127 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 78 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 43584 | 0x7f173da09400 | 0x7f1631e35a80 | 401477 | 401477 | 33803 | 3211824 | 65536 | 289394045 | 3042461 | 0 | 1159389024 | 12076135908718432 | 12076135908914332 | 12076135909170651 | 12076135909239711 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 80 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 44544 | 0x7f173da09300 | 0x7f1631e35ac0 | 210061 | 210061 | 22792 | 1680496 | 65536 | 162852394 | 1509145 | 0 | 653230860 | 12076135909266100 | 12076135909465369 | 12076135909594009 | 12076135909597896 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 82 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 45760 | 0x7f173da09a00 | 0x7f1631e35b00 | 221325 | 221325 | 21489 | 1770608 | 65536 | 152646754 | 1579671 | 0 | 612405868 | 12076135909658940 | 12076135909850007 | 12076135909986167 | 12076135909990386 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 84 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 46720 | 0x7f173da09900 | 0x7f1631e35b40 | 218661 | 218661 | 21285 | 1749296 | 65536 | 145444286 | 1582761 | 0 | 583599500 | 12076135910065956 | 12076135910244885 | 12076135910378805 | 12076135910382585 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 86 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 48448 | 0x7f173da09800 | 0x7f1631e35b80 | 417309 | 417309 | 30063 | 3338480 | 65536 | 335502899 | 3162249 | 0 | 1343830484 | 12076135910443799 | 12076135910631123 | 12076135910899122 | 12076135910964276 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 88 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 49408 | 0x7f173da09700 | 0x7f1631e35bc0 | 399948 | 399948 | 28536 | 3199592 | 65536 | 309698915 | 3027553 | 0 | 1240614292 | 12076135910993360 | 12076135911191921 | 12076135911447599 | 12076135911486106 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 90 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 50368 | 0x7f173da09600 | 0x7f1631e35c00 | 214421 | 214421 | 20969 | 1715376 | 65536 | 150273533 | 1541979 | 0 | 602919804 | 12076135911528995 | 12076135911705198 | 12076135911836077 | 12076135911839973 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 92 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 51584 | 0x7f173da09500 | 0x7f1631e35c40 | 226044 | 226044 | 24410 | 1808360 | 65536 | 153600165 | 1626686 | 0 | 616219396 | 12076135911901658 | 12076135912112396 | 12076135912251595 | 12076135912255746 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 94 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 52544 | 0x7f173da09400 | 0x7f1631e35c80 | 216973 | 216973 | 23315 | 1735792 | 65536 | 145955546 | 1555970 | 0 | 585638720 | 12076135912329052 | 12076135912504394 | 12076135912637354 | 12076135912641653 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 96 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 54272 | 0x7f173da09300 | 0x7f1631e35cc0 | 408756 | 408756 | 32139 | 3270056 | 65536 | 332274376 | 3096299 | 0 | 1330914168 | 12076135912701605 | 12076135912895432 | 12076135913158631 | 12076135913225909 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 98 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 55488 | 0x7f173da09a00 | 0x7f1631e35d00 | 404748 | 404748 | 33222 | 3237992 | 65536 | 327639119 | 3070753 | 0 | 1312369128 | 12076135913250364 | 12076135913453509 | 12076135913712708 | 12076135913778446 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 100 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 56704 | 0x7f173da09900 | 0x7f1631e35d40 | 213268 | 213268 | 23014 | 1706152 | 65536 | 146207357 | 1530702 | 0 | 586669452 | 12076135913803051 | 12076135914001027 | 12076135914131266 | 12076135914135139 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 102 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 58176 | 0x7f173da09800 | 0x7f1631e35d80 | 224965 | 224965 | 27345 | 1799728 | 65536 | 147588610 | 1622381 | 0 | 592202636 | 12076135914198617 | 12076135914387585 | 12076135914527104 | 12076135914530964 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 104 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 59392 | 0x7f173da09700 | 0x7f1631e35dc0 | 215564 | 215564 | 22126 | 1724520 | 65536 | 154458939 | 1559923 | 0 | 619651436 | 12076135914603609 | 12076135914780703 | 12076135914913022 | 12076135914916892 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 106 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 61376 | 0x7f173da09600 | 0x7f1631e35e00 | 406573 | 406573 | 30162 | 3252592 | 65536 | 318609098 | 3079105 | 0 | 1276266300 | 12076135914976222 | 12076135915169181 | 12076135915431260 | 12076135915470050 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 108 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 62592 | 0x7f173da09500 | 0x7f1631e35e40 | 402044 | 402044 | 24614 | 3216360 | 65536 | 274582675 | 3041967 | 0 | 1100147984 | 12076135915515494 | 12076135915691258 | 12076135915949177 | 12076135916017497 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 110 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 63808 | 0x7f173da09400 | 0x7f1631e35e80 | 214188 | 214188 | 21939 | 1713512 | 65536 | 153279462 | 1549376 | 0 | 614938960 | 12076135916042514 | 12076135916246776 | 12076135916378455 | 12076135916382195 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 112 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 65280 | 0x7f173da09300 | 0x7f1631e35ec0 | 223652 | 223652 | 26658 | 1789224 | 65536 | 165488511 | 1600116 | 0 | 663790392 | 12076135916442116 | 12076135916626934 | 12076135916765653 | 12076135916769394 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 114 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 66496 | 0x7f173da09a00 | 0x7f1631e35f00 | 216957 | 216957 | 22108 | 1735664 | 65536 | 152092797 | 1564451 | 0 | 610198372 | 12076135916842019 | 12076135917024692 | 12076135917157971 | 12076135917162355 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 116 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 68480 | 0x7f173da09900 | 0x7f1631e35f40 | 407733 | 407733 | 30288 | 3261872 | 65536 | 326297562 | 3087056 | 0 | 1307018564 | 12076135917222006 | 12076135917413170 | 12076135917676049 | 12076135917740659 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 118 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 69696 | 0x7f173da09800 | 0x7f1631e35f80 | 404853 | 404853 | 29922 | 3238832 | 65536 | 324746065 | 3068579 | 0 | 1300797624 | 12076135917768181 | 12076135917959247 | 12076135918218926 | 12076135918283899 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 120 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 70912 | 0x7f173da09700 | 0x7f1631e35fc0 | 217500 | 217500 | 22294 | 1740008 | 65536 | 149810950 | 1573899 | 0 | 601097328 | 12076135918308996 | 12076135918506124 | 12076135918639724 | 12076135918643507 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 122 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 72384 | 0x7f173da09600 | 0x7f1631e36000 | 227060 | 227060 | 27087 | 1816488 | 65536 | 189821719 | 1647988 | 0 | 761139112 | 12076135918703709 | 12076135918896203 | 12076135919037002 | 12076135919040836 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 124 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 73600 | 0x7f173da09500 | 0x7f1631e36040 | 215988 | 215988 | 20681 | 1727912 | 65536 | 148964851 | 1558513 | 0 | 597687536 | 12076135919114543 | 12076135919291881 | 12076135919424680 | 12076135919428536 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 126 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 75840 | 0x7f173da09400 | 0x7f1631e36080 | 420684 | 420684 | 30841 | 3365480 | 65536 | 325896201 | 3176564 | 0 | 1305404156 | 12076135919489800 | 12076135919673639 | 12076135919944997 | 12076135920017020 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 128 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 77056 | 0x7f173da09300 | 0x7f1631e360c0 | 401965 | 401965 | 32715 | 3215728 | 65536 | 341367166 | 3050163 | 0 | 1367282372 | 12076135920044140 | 12076135920242756 | 12076135920500515 | 12076135920565670 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 130 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 78272 | 0x7f173da09a00 | 0x7f1631e36100 | 215653 | 215653 | 23728 | 1725232 | 65536 | 155661467 | 1552374 | 0 | 624482664 | 12076135920589764 | 12076135920785313 | 12076135920917633 | 12076135920921601 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 132 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 80000 | 0x7f173da09900 | 0x7f1631e36140 | 231869 | 231869 | 26505 | 1854960 | 65536 | 199168746 | 1690692 | 0 | 798561468 | 12076135920984518 | 12076135921168991 | 12076135921312991 | 12076135921317186 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 134 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 81216 | 0x7f173da09800 | 0x7f1631e36180 | 218892 | 218892 | 25165 | 1751144 | 65536 | 149762211 | 1571812 | 0 | 600883872 | 12076135921390883 | 12076135921569309 | 12076135921704189 | 12076135921708153 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 136 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 83456 | 0x7f173da09700 | 0x7f1631e361c0 | 406604 | 406604 | 35239 | 3252840 | 65536 | 348093929 | 3061599 | 0 | 1394206984 | 12076135921766161 | 12076135921962267 | 12076135922223866 | 12076135922289944 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 138 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 84928 | 0x7f173da09600 | 0x7f1631e36200 | 385892 | 385892 | 33601 | 3087144 | 65536 | 344128494 | 2917265 | 0 | 1378333408 | 12076135922314059 | 12076135922514425 | 12076135922762263 | 12076135922827132 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 140 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 86400 | 0x7f173da09500 | 0x7f1631e36240 | 218436 | 218436 | 22209 | 1747496 | 65536 | 141894914 | 1579059 | 0 | 569443280 | 12076135922851137 | 12076135923052822 | 12076135923187221 | 12076135923191109 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 142 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 88128 | 0x7f173da09400 | 0x7f1631e36280 | 244660 | 244660 | 29146 | 1957288 | 65536 | 210835781 | 1782158 | 0 | 845210856 | 12076135923254517 | 12076135923438900 | 12076135923591379 | 12076135923595380 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 144 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 89600 | 0x7f173da09300 | 0x7f1631e362c0 | 220252 | 220252 | 24299 | 1762024 | 65536 | 148472931 | 1590727 | 0 | 595752240 | 12076135923687572 | 12076135923864978 | 12076135924000497 | 12076135924005693 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 146 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 92096 | 0x7f173da09a00 | 0x7f1631e36300 | 412828 | 412828 | 31871 | 3302632 | 65536 | 304053984 | 3113380 | 0 | 1218032228 | 12076135924066596 | 12076135924250256 | 12076135924516815 | 12076135924556536 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 148 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 93568 | 0x7f173da09900 | 0x7f1631e36340 | 401197 | 401197 | 27661 | 3209584 | 65536 | 244948921 | 3022444 | 0 | 981613028 | 12076135924598024 | 12076135924772333 | 12076135925030412 | 12076135925096600 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 150 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 95040 | 0x7f173da09800 | 0x7f1631e36380 | 221381 | 221381 | 24420 | 1771056 | 65536 | 147174777 | 1600334 | 0 | 590547836 | 12076135925121566 | 12076135925325131 | 12076135925461610 | 12076135925465365 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 152 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 96768 | 0x7f173da09700 | 0x7f1631e363c0 | 254204 | 254204 | 28677 | 2033640 | 65536 | 225058251 | 1865796 | 0 | 902103312 | 12076135925525417 | 12076135925712649 | 12076135925871688 | 12076135925875738 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 154 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 98240 | 0x7f173da09600 | 0x7f1631e36400 | 214940 | 214940 | 23544 | 1719528 | 65536 | 154503355 | 1555671 | 0 | 619877392 | 12076135925950156 | 12076135926133447 | 12076135926265766 | 12076135926269991 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 156 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 100736 | 0x7f173da09500 | 0x7f1631e36440 | 411284 | 411284 | 35339 | 3290280 | 65536 | 312742819 | 3087322 | 0 | 1252790504 | 12076135926331465 | 12076135926524485 | 12076135926790083 | 12076135926855850 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 158 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 102208 | 0x7f173da09400 | 0x7f1631e36480 | 402629 | 402629 | 34278 | 3221040 | 65536 | 348166460 | 3051926 | 0 | 1394482548 | 12076135926880195 | 12076135927086402 | 12076135927345761 | 12076135927412083 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 160 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 103680 | 0x7f173da09300 | 0x7f1631e364c0 | 227317 | 227317 | 23937 | 1818544 | 65536 | 144876392 | 1641602 | 0 | 581373540 | 12076135927437140 | 12076135927632479 | 12076135927772479 | 12076135927776330 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 162 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 105664 | 0x7f173da09a00 | 0x7f1631e36500 | 263444 | 263444 | 28573 | 2107560 | 65536 | 233729296 | 1943228 | 0 | 936761400 | 12076135927839778 | 12076135928032957 | 12076135928198236 | 12076135928202072 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 164 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 107136 | 0x7f173da09900 | 0x7f1631e36540 | 218117 | 218117 | 22409 | 1744944 | 65536 | 152130092 | 1583186 | 0 | 610376012 | 12076135928276911 | 12076135928455995 | 12076135928590554 | 12076135928594611 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 166 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 109888 | 0x7f173da09800 | 0x7f1631e36580 | 411877 | 411877 | 36324 | 3295024 | 65536 | 319586172 | 3091313 | 0 | 1280168656 | 12076135928653250 | 12076135928841593 | 12076135929108312 | 12076135929175791 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 168 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 111360 | 0x7f173da09700 | 0x7f1631e365c0 | 401221 | 401221 | 31108 | 3209776 | 65536 | 271821564 | 3040541 | 0 | 1089110712 | 12076135929199455 | 12076135929397590 | 12076135929655989 | 12076135929720954 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 170 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 112832 | 0x7f173da09600 | 0x7f1631e36600 | 222180 | 222180 | 26733 | 1777448 | 65536 | 149249017 | 1595206 | 0 | 598875368 | 12076135929745049 | 12076135929940468 | 12076135930077587 | 12076135930081875 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 172 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 114816 | 0x7f173da09500 | 0x7f1631e36640 | 276436 | 276436 | 30817 | 2211496 | 65536 | 246689439 | 2040067 | 0 | 988604532 | 12076135930142157 | 12076135930333426 | 12076135930507505 | 12076135930511514 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 174 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 116288 | 0x7f173da09400 | 0x7f1631e36680 | 219037 | 219037 | 25268 | 1752304 | 65536 | 151112141 | 1572668 | 0 | 606270624 | 12076135930583838 | 12076135930761584 | 12076135930896783 | 12076135930900817 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 176 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 119040 | 0x7f173da09300 | 0x7f1631e366c0 | 404588 | 404588 | 34708 | 3236712 | 65536 | 347407339 | 3064117 | 0 | 1391450328 | 12076135930960408 | 12076135931160782 | 12076135931422860 | 12076135931488359 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 178 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 120768 | 0x7f173da09a00 | 0x7f1631e36700 | 401052 | 401052 | 29048 | 3208424 | 65536 | 261135179 | 3018796 | 0 | 1046365968 | 12076135931513315 | 12076135931714059 | 12076135931972618 | 12076135932038171 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 180 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 122496 | 0x7f173da09900 | 0x7f1631e36740 | 225837 | 225837 | 22999 | 1806704 | 65536 | 141021247 | 1633172 | 0 | 565964036 | 12076135932062466 | 12076135932261096 | 12076135932400776 | 12076135932404782 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 182 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 124736 | 0x7f173da09800 | 0x7f1631e36780 | 288029 | 288029 | 30475 | 2304240 | 65536 | 259543270 | 2142008 | 0 | 1040018208 | 12076135932464483 | 12076135932648774 | 12076135932831173 | 12076135932835122 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 184 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 126464 | 0x7f173da09700 | 0x7f1631e367c0 | 226389 | 226389 | 23672 | 1811120 | 65536 | 148586492 | 1634831 | 0 | 596218872 | 12076135932933384 | 12076135933114052 | 12076135933253731 | 12076135933258038 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 186 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 20 | 24 | 127936 | 0x7f173da09600 | 0x7f1631e36800 | 412325 | 412325 | 35844 | 3298608 | 65536 | 377878665 | 3124793 | 0 | 1513342756 | 12076135933318140 | 12076135933504930 | 12076135933771969 | 12076135933839388 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 188 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 129664 | 0x7f173da09500 | 0x7f1631e36840 | 403364 | 403364 | 30731 | 3226920 | 65536 | 264953630 | 3046071 | 0 | 1061634084 | 12076135933862772 | 12076135934069567 | 12076135934329886 | 12076135934395973 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 190 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 131392 | 0x7f173da09400 | 0x7f1631e36880 | 220917 | 220917 | 25674 | 1767344 | 65536 | 137415033 | 1589913 | 0 | 551500660 | 12076135934420699 | 12076135934619005 | 12076135934755164 | 12076135934759278 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 192 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 133632 | 0x7f173da09300 | 0x7f1631e368c0 | 302492 | 302492 | 32001 | 2419944 | 65536 | 270457391 | 2255976 | 0 | 1083673664 | 12076135934819530 | 12076135935005563 | 12076135935197242 | 12076135935246734 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 194 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 135360 | 0x7f173da09a00 | 0x7f1631e36900 | 226333 | 226333 | 25084 | 1810672 | 65536 | 159914218 | 1625610 | 0 | 641549000 | 12076135935283692 | 12076135935469880 | 12076135935609400 | 12076135935613485 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 196 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 20 | 24 | 136832 | 0x7f173da09900 | 0x7f1631e36940 | 438956 | 438956 | 41724 | 3511656 | 65536 | 405035660 | 3332328 | 0 | 1621963316 | 12076135935673377 | 12076135935857078 | 12076135936142197 | 12076135936163347 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 198 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 138560 | 0x7f173da09800 | 0x7f1631e36980 | 402716 | 402716 | 32036 | 3221736 | 65536 | 328363312 | 3029913 | 0 | 1315272584 | 12076135936221365 | 12076135936400596 | 12076135936660275 | 12076135936705385 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 200 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 140288 | 0x7f173da09700 | 0x7f1631e369c0 | 223205 | 223205 | 24953 | 1785648 | 65536 | 152044546 | 1602551 | 0 | 610074464 | 12076135936729309 | 12076135936925233 | 12076135937062993 | 12076135937067487 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 202 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 142784 | 0x7f173da09600 | 0x7f1631e36a00 | 330637 | 330637 | 34151 | 2645104 | 65536 | 298737788 | 2475240 | 0 | 1196802756 | 12076135937128341 | 12076135937320111 | 12076135937530350 | 12076135937576183 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 204 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 144768 | 0x7f173da09500 | 0x7f1631e36a40 | 222900 | 222900 | 25146 | 1783208 | 65536 | 152524368 | 1605008 | 0 | 611974088 | 12076135937614855 | 12076135937794189 | 12076135937931948 | 12076135937936162 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 206 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 20 | 24 | 145728 | 0x7f173da09400 | 0x7f1631e36a80 | 469933 | 469933 | 41177 | 3759472 | 65536 | 439651233 | 3593963 | 0 | 1760425108 | 12076135937994140 | 12076135938190507 | 12076135938497385 | 12076135938545775 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 208 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 147712 | 0x7f173da09300 | 0x7f1631e36ac0 | 395140 | 395140 | 33537 | 3161128 | 65536 | 332774074 | 2981371 | 0 | 1332925232 | 12076135938571192 | 12076135938767624 | 12076135939022983 | 12076135939070590 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 210 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 149696 | 0x7f173da09a00 | 0x7f1631e36b00 | 226061 | 226061 | 24994 | 1808496 | 65536 | 145521797 | 1640953 | 0 | 583959556 | 12076135939101498 | 12076135939298487 | 12076135939438647 | 12076135939445777 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 212 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 152192 | 0x7f173da09900 | 0x7f1631e36b40 | 357980 | 357980 | 35818 | 2863848 | 65536 | 325865663 | 2695953 | 0 | 1305328292 | 12076135939501080 | 12076135939687765 | 12076135939915924 | 12076135939961827 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 214 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 154176 | 0x7f173da09800 | 0x7f1631e36b80 | 221596 | 221596 | 26483 | 1772776 | 65536 | 157677697 | 1582120 | 0 | 632571872 | 12076135940005578 | 12076135940181523 | 12076135940318802 | 12076135940322136 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 216 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 20 | 24 | 155648 | 0x7f173da09700 | 0x7f1631e36bc0 | 547565 | 547565 | 16239 | 4380528 | 65536 | 472405577 | 3866623 | 0 | 1891442848 | 12076135940384622 | 12076135940575121 | 12076135940933200 | 12076135940978726 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 218 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 157632 | 0x7f173da09600 | 0x7f1631e36c00 | 394509 | 394509 | 35185 | 3156080 | 65536 | 353614703 | 2979157 | 0 | 1416300872 | 12076135941007660 | 12076135941208559 | 12076135941462798 | 12076135941510635 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 220 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 159616 | 0x7f173da09500 | 0x7f1631e36c40 | 231645 | 231645 | 24861 | 1853168 | 65536 | 149862564 | 1680595 | 0 | 601289828 | 12076135941536243 | 12076135941733516 | 12076135941876876 | 12076135941880162 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 222 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 162368 | 0x7f173da09400 | 0x7f1631e36c80 | 382517 | 382517 | 37690 | 3060144 | 65536 | 351156251 | 2890526 | 0 | 1406469800 | 12076135941942077 | 12076135942134475 | 12076135942379434 | 12076135942427509 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 224 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 164608 | 0x7f173da09300 | 0x7f1631e36cc0 | 223053 | 223053 | 29391 | 1784432 | 65536 | 160871228 | 1594550 | 0 | 645358740 | 12076135942467082 | 12076135942643433 | 12076135942781512 | 12076135942784803 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 226 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 20 | 24 | 166336 | 0x7f173da09a00 | 0x7f1631e36d00 | 576516 | 576516 | 48860 | 4612136 | 65536 | 543045725 | 4447484 | 0 | 2174001600 | 12076135942844243 | 12076135943037831 | 12076135943415750 | 12076135943461961 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 228 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 168576 | 0x7f173da09900 | 0x7f1631e36d40 | 408757 | 408757 | 31972 | 3270064 | 65536 | 298655669 | 3097434 | 0 | 1196437100 | 12076135943487429 | 12076135943692229 | 12076135943957348 | 12076135944005892 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 230 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 170816 | 0x7f173da09800 | 0x7f1631e36d80 | 219941 | 219941 | 28289 | 1759536 | 65536 | 170945297 | 1576055 | 0 | 685664364 | 12076135944039004 | 12076135944228707 | 12076135944364866 | 12076135944368416 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 232 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 174080 | 0x7f173da09700 | 0x7f1631e36dc0 | 433644 | 433644 | 39108 | 3469160 | 65536 | 402529939 | 3309153 | 0 | 1611969960 | 12076135944427005 | 12076135944613825 | 12076135944893664 | 12076135944912948 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 234 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 176576 | 0x7f173da09600 | 0x7f1631e36e00 | 229092 | 229092 | 27867 | 1832744 | 65536 | 183223889 | 1658367 | 0 | 734793500 | 12076135944979622 | 12076135945160543 | 12076135945303102 | 12076135945306409 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 236 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 20 | 24 | 177792 | 0x7f173da09500 | 0x7f1631e36e40 | 642980 | 642980 | 50441 | 5143848 | 65536 | 611399319 | 4986704 | 0 | 2447418384 | 12076135945366952 | 12076135945553661 | 12076135945977180 | 12076135946024083 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 238 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 180288 | 0x7f173da09400 | 0x7f1631e36e80 | 401004 | 401004 | 36411 | 3208040 | 65536 | 321005556 | 2994044 | 0 | 1285863932 | 12076135946049861 | 12076135946247739 | 12076135946508058 | 12076135946552956 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 240 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 182784 | 0x7f173da09300 | 0x7f1631e36ec0 | 224892 | 224892 | 26594 | 1799144 | 65536 | 160692309 | 1618416 | 0 | 644660108 | 12076135946577952 | 12076135946772536 | 12076135946912216 | 12076135946915630 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 242 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 186304 | 0x7f173da09a00 | 0x7f1631e36f00 | 486916 | 486916 | 42282 | 3895336 | 65536 | 456299460 | 3732954 | 0 | 1827058800 | 12076135946975952 | 12076135947173815 | 12076135947488854 | 12076135947536514 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 244 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 189312 | 0x7f173da09900 | 0x7f1631e36f40 | 243861 | 243861 | 27935 | 1950896 | 65536 | 214274314 | 1786827 | 0 | 858990044 | 12076135947579614 | 12076135947753013 | 12076135947906132 | 12076135947909347 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 246 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 20 | 24 | 190784 | 0x7f173da09800 | 0x7f1631e36f80 | 811868 | 811868 | 15266 | 6494952 | 65536 | 750130493 | 6111486 | 0 | 3002343300 | 12076135947969058 | 12076135948167091 | 12076135948701489 | 12076135948747224 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 248 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 193792 | 0x7f173da09700 | 0x7f1631e36fc0 | 453725 | 453725 | 39882 | 3629808 | 65536 | 342898212 | 3436387 | 0 | 1373418948 | 12076135948776809 | 12076135948972528 | 12076135949268687 | 12076135949334996 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 250 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 196800 | 0x7f173da09600 | 0x7f1631e37000 | 247852 | 247852 | 27756 | 1982824 | 65536 | 220216318 | 1817949 | 0 | 882766008 | 12076135949358860 | 12076135949558125 | 12076135949713965 | 12076135949717246 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 252 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 201088 | 0x7f173da09500 | 0x7f1631e37040 | 592852 | 592852 | 48555 | 4742824 | 65536 | 560091378 | 4576005 | 0 | 2242212868 | 12076135949780274 | 12076135949957484 | 12076135950342762 | 12076135950408722 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 254 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 204608 | 0x7f173da09400 | 0x7f1631e37080 | 281580 | 281580 | 30184 | 2252648 | 65536 | 247585655 | 2088620 | 0 | 992239392 | 12076135950448005 | 12076135950627081 | 12076135950805320 | 12076135950808675 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 256 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 20 | 24 | 206080 | 0x7f173da09300 | 0x7f1631e370c0 | 925156 | 925156 | 68584 | 7401256 | 65536 | 887122625 | 7232769 | 0 | 3550311012 | 12076135950869027 | 12076135951071719 | 12076135951681317 | 12076135951749353 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 258 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 209600 | 0x7f173da09a00 | 0x7f1631e37100 | 498909 | 498909 | 42947 | 3991280 | 65536 | 459214143 | 3825241 | 0 | 1838688700 | 12076135951775452 | 12076135951973156 | 12076135952301315 | 12076135952366320 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 260 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 213120 | 0x7f173da09900 | 0x7f1631e37140 | 281181 | 281181 | 29875 | 2249456 | 65536 | 251756305 | 2087022 | 0 | 1008901808 | 12076135952391326 | 12076135952589153 | 12076135952767073 | 12076135952770351 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 262 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 218176 | 0x7f173da09800 | 0x7f1631e37180 | 699717 | 699717 | 54711 | 5597744 | 65536 | 666113535 | 5434198 | 0 | 2666310096 | 12076135952830763 | 12076135953024192 | 12076135953481310 | 12076135953548026 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 264 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 222208 | 0x7f173da09700 | 0x7f1631e371c0 | 312933 | 312933 | 31904 | 2503472 | 65536 | 283406108 | 2341620 | 0 | 1135510608 | 12076135953584634 | 12076135953766269 | 12076135953965948 | 12076135954032316 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 266 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 20 | 24 | 223936 | 0x7f173da09600 | 0x7f1631e37200 | 1065868 | 1065868 | 76568 | 8526952 | 65536 | 1026834591 | 8356556 | 0 | 4109159364 | 12076135954055449 | 12076135954257787 | 12076135954960024 | 12076135955031964 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 268 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 227968 | 0x7f173da09500 | 0x7f1631e37240 | 607821 | 607821 | 15490 | 4862576 | 65536 | 534904924 | 4385961 | 0 | 2141495372 | 12076135955058122 | 12076135955260343 | 12076135955660501 | 12076135955726885 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 270 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 232000 | 0x7f173da09400 | 0x7f1631e37280 | 319429 | 319429 | 32688 | 2555440 | 65536 | 288497484 | 2384202 | 0 | 1155890280 | 12076135955750178 | 12076135955948020 | 12076135956151699 | 12076135956218879 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 272 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 237824 | 0x7f173da09300 | 0x7f1631e372c0 | 802853 | 802853 | 60356 | 6422832 | 65536 | 768973468 | 6258548 | 0 | 3077743552 | 12076135956243165 | 12076135956439858 | 12076135956965776 | 12076135957030768 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 274 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 242368 | 0x7f173da09a00 | 0x7f1631e37300 | 350076 | 350076 | 33759 | 2800616 | 65536 | 321080753 | 2638751 | 0 | 1286216588 | 12076135957069350 | 12076135957250895 | 12076135957475054 | 12076135957539634 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 276 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 20 | 24 | 243584 | 0x7f173da09900 | 0x7f1631e37340 | 1208228 | 1208228 | 15207 | 9665832 | 65536 | 1168069922 | 9486545 | 0 | 4674099608 | 12076135957562026 | 12076135957761613 | 12076135958559370 | 12076135958624921 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 278 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 248128 | 0x7f173da09800 | 0x7f1631e37380 | 638244 | 638244 | 52829 | 5105960 | 65536 | 591071574 | 4924449 | 0 | 2366122908 | 12076135958650348 | 12076135958856009 | 12076135959276807 | 12076135959345370 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 280 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 252672 | 0x7f173da09700 | 0x7f1631e373c0 | 354629 | 354629 | 34556 | 2837040 | 65536 | 325709234 | 2672662 | 0 | 1304731068 | 12076135959375756 | 12076135959574246 | 12076135959801605 | 12076135959866148 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 282 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 259264 | 0x7f173da09600 | 0x7f1631e37400 | 910053 | 910053 | 66233 | 7280432 | 65536 | 875001049 | 7117036 | 0 | 3501854468 | 12076135959892677 | 12076135960095684 | 12076135960693121 | 12076135960757915 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 284 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 264832 | 0x7f173da09500 | 0x7f1631e37440 | 416716 | 416716 | 38776 | 3333736 | 65536 | 386281421 | 3171718 | 0 | 1547031200 | 12076135960795024 | 12076135960980160 | 12076135961249119 | 12076135961316213 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 286 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 20 | 24 | 266304 | 0x7f173da09400 | 0x7f1631e37480 | 1487244 | 1487244 | 100773 | 11897960 | 65536 | 1446814948 | 11735305 | 0 | 5789080716 | 12076135961340057 | 12076135961541438 | 12076135962525754 | 12076135962592535 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 288 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 28 | 24 | 271872 | 0x7f173da09300 | 0x7f1631e374c0 | 776445 | 776445 | 59789 | 6211568 | 65536 | 739788968 | 6042986 | 0 | 2961010620 | 12076135962618513 | 12076135962822393 | 12076135963335671 | 12076135963406467 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 290 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 277440 | 0x7f173da09a00 | 0x7f1631e37500 | 423077 | 423077 | 40257 | 3384624 | 65536 | 391102994 | 3211673 | 0 | 1566299264 | 12076135963437826 | 12076135963634550 | 12076135963906549 | 12076135963944698 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 292 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 12 | 24 | 279936 | 0x7f173da09900 | 0x7f1631e37540 | 1126957 | 1126957 | 80266 | 9015664 | 65536 | 1090014630 | 8842228 | 0 | 4361884476 | 12076135963986475 | 12076135964165588 | 12076135964903985 | 12076135964943955 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 294 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 286528 | 0x7f173da09800 | 0x7f1631e37580 | 491548 | 491548 | 45963 | 3932392 | 65536 | 457575107 | 3751456 | 0 | 1832201660 | 12076135964990411 | 12076135965170704 | 12076135965487662 | 12076135965527279 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 296 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 20 | 24 | 288000 | 0x7f173da09700 | 0x7f1631e375c0 | 1769725 | 1769725 | 119015 | 14157808 | 65536 | 1723531118 | 13986039 | 0 | 6895944028 | 12076135965559128 | 12076135965757421 | 12076135966926697 | 12076135966966303 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 298 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 32 | 24 | 294592 | 0x7f173da09600 | 0x7f1631e37600 | 919469 | 919469 | 65519 | 7355760 | 65536 | 810978053 | 7176625 | 0 | 3245752132 | 12076135967015635 | 12076135967197256 | 12076135967806373 | 12076135967873890 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 300 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 301184 | 0x7f173da09500 | 0x7f1631e37640 | 497092 | 497092 | 45515 | 3976744 | 65536 | 462851659 | 3800463 | 0 | 1853321416 | 12076135967903455 | 12076135968111012 | 12076135968432451 | 12076135968499923 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 302 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 12 | 24 | 304960 | 0x7f173da09400 | 0x7f1631e37680 | 1336876 | 1336876 | 21586 | 10695016 | 65536 | 1298085168 | 10530402 | 0 | 5194165244 | 12076135968529869 | 12076135968726850 | 12076135969605086 | 12076135969670559 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 304 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 24 | 24 | 313600 | 0x7f173da09300 | 0x7f1631e376c0 | 627652 | 627652 | 51528 | 5021224 | 65536 | 590843017 | 4848288 | 0 | 2365269652 | 12076135969715302 | 12076135969889885 | 12076135970298043 | 12076135970364749 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 306 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 20 | 24 | 314816 | 0x7f173da09a00 | 0x7f1631e37700 | 2332381 | 2332381 | 150302 | 18659056 | 65536 | 2284130042 | 18493408 | 0 | 9138339756 | 12076135970394234 | 12076135970591322 | 12076135972138516 | 12076135972205770 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 308 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 20 | 24 | 317568 | 0x7f173da09900 | 0x7f1631e37740 | 1202124 | 1202124 | 85971 | 9617000 | 65536 | 1162046467 | 9439911 | 0 | 4650008660 | 12076135972239744 | 12076135972439635 | 12076135973234032 | 12076135973301286 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 310 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 12 | 24 | 320320 | 0x7f173da09800 | 0x7f1631e37780 | 640589 | 640589 | 52095 | 5124720 | 65536 | 608294635 | 4953187 | 0 | 2435011768 | 12076135973326834 | 12076135973530030 | 12076135973944269 | 12076135974021044 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 312 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 12 | 24 | 324096 | 0x7f173da09700 | 0x7f1631e377c0 | 1758292 | 1758292 | 117610 | 14066344 | 65536 | 1714930715 | 13897488 | 0 | 6861550860 | 12076135974053905 | 12076135974251308 | 12076135975410183 | 12076135975477701 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 314 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 12 | 24 | 328896 | 0x7f173da09600 | 0x7f1631e37800 | 1199812 | 1199812 | 84994 | 9598504 | 65536 | 1160567744 | 9419916 | 0 | 4644094668 | 12076135975516113 | 12076135975695302 | 12076135976481059 | 12076135976546988 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 316 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 20 | 24 | 330112 | 0x7f173da09500 | 0x7f1631e37840 | 4585060 | 4585060 | 282544 | 36680488 | 65536 | 4515006619 | 36505283 | 0 | 18061847936 | 12076135976575802 | 12076135976774338 | 12076135979825366 | 12076135979891985 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 318 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 20 | 24 | 332864 | 0x7f173da09400 | 0x7f1631e37880 | 2324701 | 2324701 | 150955 | 18597616 | 65536 | 2276235957 | 18423307 | 0 | 9106766752 | 12076135979922983 | 12076135980130484 | 12076135981671278 | 12076135981740059 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 320 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 12 | 24 | 335616 | 0x7f173da09300 | 0x7f1631e378c0 | 1202700 | 1202700 | 83758 | 9621608 | 65536 | 1165053457 | 9464827 | 0 | 4662038232 | 12076135981763433 | 12076135981967917 | 12076135982757514 | 12076135982824244 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 322 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 12 | 24 | 339392 | 0x7f173da09a00 | 0x7f1631e37900 | 3444068 | 3444068 | 215569 | 27552552 | 65536 | 3384431568 | 27385918 | 0 | 13539553104 | 12076135982849020 | 12076135983073033 | 12076135985355424 | 12076135985421922 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 324 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 12 | 24 | 344192 | 0x7f173da09900 | 0x7f1631e37940 | 2324021 | 2324021 | 148937 | 18592176 | 65536 | 2276603762 | 18424736 | 0 | 9108239516 | 12076135985466906 | 12076135985644382 | 12076135987180856 | 12076135987247565 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 326 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 20 | 24 | 345408 | 0x7f173da09800 | 0x7f1631e37980 | 9087516 | 9087516 | 473103 | 72700136 | 65536 | 8977459751 | 72531136 | 0 | 35911660608 | 12076135987292268 | 12076135987468855 | 12076135993523871 | 12076135993590373 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 328 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 20 | 24 | 348160 | 0x7f173da09700 | 0x7f1631e379c0 | 4573172 | 4573172 | 282265 | 36585384 | 65536 | 4502546623 | 36415583 | 0 | 18012008728 | 12076135993622102 | 12076135993821310 | 12076135996864498 | 12076135996934327 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 330 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 12 | 24 | 350912 | 0x7f173da09600 | 0x7f1631e37a00 | 2329029 | 2329029 | 150414 | 18632240 | 65536 | 2280558940 | 18466206 | 0 | 9124062400 | 12076135996963782 | 12076135997171697 | 12076135998711211 | 12076135998779737 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 332 | 911507 | 911514 | 4194304 | 256 | 0 | 0 | 12 | 24 | 0 | 0x7f173da09500 | 0x7f1631e37a40 | 6812700 | 6812700 | 341839 | 54501608 | 65536 | 6722245806 | 54335363 | 0 | 26890809756 | 12076135998803792 | 12076135999009889 | 12076136003537232 | 12076136003609252 |