50 KiB
50 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | GRBM_COUNT | GRBM_GUI_ACTIVE | CPC_ME1_BUSY_FOR_PACKET_DECODE | SQ_CYCLES | SQ_WAVES | SQ_WAVE_CYCLES | SQ_BUSY_CYCLES | SQ_LEVEL_WAVES | SQ_ACCUM_PREV_HIRES | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBuffer.kd | 0 | 0 | 0 | 916511 | 916516 | 33554432 | 256 | 0 | 0 | 8 | 32 | 6464 | 0x0 | 0x7f90e3204180 | 503889 | 503889 | 16730 | 4031120 | 524288 | 372250580 | 3826469 | 0 | 1503761888 | 12076231611374211 | 12076231851829714 | 12076231852153232 | 12076231852264555 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 916511 | 916516 | 32768 | 256 | 0 | 0 | 24 | 24 | 12480 | 0x0 | 0x7f90e3235100 | 27559 | 27559 | 20321 | 220480 | 512 | 1112827 | 76221 | 0 | 4465564 | 12076231866848879 | 12076231867193881 | 12076231867200281 | 12076231867210551 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 4 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 12928 | 0x7f921323b900 | 0x7f90e3235140 | 216436 | 216436 | 21011 | 1731496 | 65536 | 154594617 | 1558880 | 0 | 620189568 | 12076231867277295 | 12076231867515959 | 12076231867647959 | 12076231867651741 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 6 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 36 | 24 | 13632 | 0x7f921323b800 | 0x7f90e3235180 | 393236 | 393236 | 28182 | 3145896 | 65536 | 245010340 | 2972901 | 0 | 981859692 | 12076231867731039 | 12076231867921077 | 12076231868170836 | 12076231868239293 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 8 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 14080 | 0x7f921323b700 | 0x7f90e32351c0 | 390244 | 390244 | 32409 | 3121960 | 65536 | 338721655 | 2944143 | 0 | 1356704028 | 12076231868273567 | 12076231868480915 | 12076231868728913 | 12076231868795156 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 10 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14528 | 0x7f921323b600 | 0x7f90e3235200 | 225708 | 225708 | 22886 | 1805672 | 65536 | 152436055 | 1633502 | 0 | 611552572 | 12076231868819652 | 12076231869042032 | 12076231869179631 | 12076231869183528 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 12 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14976 | 0x7f921323b500 | 0x7f90e3235240 | 226972 | 226972 | 24313 | 1815784 | 65536 | 151226970 | 1626579 | 0 | 606718608 | 12076231869243820 | 12076231869447950 | 12076231869585069 | 12076231869588591 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 14 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 15424 | 0x7f921323b400 | 0x7f90e3235280 | 213820 | 213820 | 23937 | 1710568 | 65536 | 164040100 | 1540353 | 0 | 657976812 | 12076231869679460 | 12076231869864268 | 12076231869994668 | 12076231869998502 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 16 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16128 | 0x7f921323b300 | 0x7f90e32352c0 | 400004 | 400004 | 25990 | 3200040 | 65536 | 237013091 | 3012476 | 0 | 949869916 | 12076231870056049 | 12076231870269386 | 12076231870523945 | 12076231870564274 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 18 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16832 | 0x7f921323ba00 | 0x7f90e3235300 | 386764 | 386764 | 32562 | 3094120 | 65536 | 345035886 | 2923894 | 0 | 1381958844 | 12076231870599880 | 12076231870792744 | 12076231871038823 | 12076231871108586 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 20 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 17536 | 0x7f921323b900 | 0x7f90e3235340 | 210340 | 210340 | 21015 | 1682728 | 65536 | 157845892 | 1509518 | 0 | 633201152 | 12076231871132871 | 12076231871343461 | 12076231871470821 | 12076231871474405 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 22 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18240 | 0x7f921323b800 | 0x7f90e3235380 | 219924 | 219924 | 23427 | 1759400 | 65536 | 151336414 | 1565574 | 0 | 607159852 | 12076231871533706 | 12076231871725859 | 12076231871858179 | 12076231871861775 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 24 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18944 | 0x7f921323b700 | 0x7f90e32353c0 | 215500 | 215500 | 21853 | 1724008 | 65536 | 159265118 | 1556462 | 0 | 638873952 | 12076231871936715 | 12076231872123778 | 12076231872255137 | 12076231872258803 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 26 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 19904 | 0x7f921323b600 | 0x7f90e3235400 | 400740 | 400740 | 28259 | 3205928 | 65536 | 308188634 | 3036002 | 0 | 1234576780 | 12076231872320197 | 12076231872514656 | 12076231872770815 | 12076231872836757 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 28 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 20608 | 0x7f921323b500 | 0x7f90e3235440 | 397428 | 397428 | 25153 | 3179432 | 65536 | 290134170 | 3005635 | 0 | 1162354500 | 12076231872861103 | 12076231873075453 | 12076231873329052 | 12076231873394684 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 30 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 21312 | 0x7f921323b400 | 0x7f90e3235480 | 214356 | 214356 | 23905 | 1714856 | 65536 | 162508526 | 1547647 | 0 | 651848788 | 12076231873417216 | 12076231873625531 | 12076231873756410 | 12076231873759943 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 32 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22016 | 0x7f921323b300 | 0x7f90e32354c0 | 220820 | 220820 | 24110 | 1766568 | 65536 | 163520467 | 1594481 | 0 | 655897548 | 12076231873818822 | 12076231874010649 | 12076231874145528 | 12076231874149347 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 34 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22720 | 0x7f921323ba00 | 0x7f90e3235500 | 210748 | 210748 | 24194 | 1685992 | 65536 | 154343995 | 1516398 | 0 | 619194240 | 12076231874220819 | 12076231874401367 | 12076231874529526 | 12076231874533070 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 36 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 23680 | 0x7f921323b900 | 0x7f90e3235540 | 392164 | 392164 | 34212 | 3137320 | 65536 | 299650599 | 2965496 | 0 | 1200418612 | 12076231874592120 | 12076231874781845 | 12076231875031604 | 12076231875099002 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 38 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 24384 | 0x7f921323b800 | 0x7f90e3235580 | 390620 | 390620 | 24695 | 3124968 | 65536 | 289121011 | 2939042 | 0 | 1158301332 | 12076231875124970 | 12076231875328723 | 12076231875577361 | 12076231875644846 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 40 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25088 | 0x7f921323b700 | 0x7f90e32355c0 | 208548 | 208548 | 22666 | 1668392 | 65536 | 161516237 | 1499896 | 0 | 647887068 | 12076231875669592 | 12076231875869680 | 12076231875996559 | 12076231876004945 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 42 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25792 | 0x7f921323b600 | 0x7f90e3235600 | 214124 | 214124 | 24636 | 1713000 | 65536 | 158382307 | 1543906 | 0 | 635347936 | 12076231876058324 | 12076231876257678 | 12076231876388718 | 12076231876392325 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 44 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 26496 | 0x7f921323b500 | 0x7f90e3235640 | 209820 | 209820 | 23449 | 1678568 | 65536 | 165272919 | 1510819 | 0 | 662917608 | 12076231876464860 | 12076231876644076 | 12076231876771916 | 12076231876775527 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 46 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 27712 | 0x7f921323b400 | 0x7f90e3235680 | 392300 | 392300 | 35370 | 3138408 | 65536 | 354853870 | 2947559 | 0 | 1421232908 | 12076231876827985 | 12076231877028235 | 12076231877277194 | 12076231877345306 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 48 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 28416 | 0x7f921323b300 | 0x7f90e32356c0 | 391388 | 391388 | 33253 | 3131112 | 65536 | 353537677 | 2964093 | 0 | 1415968816 | 12076231877369161 | 12076231877569992 | 12076231877819591 | 12076231877885200 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 50 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 29120 | 0x7f921323ba00 | 0x7f90e3235700 | 214300 | 214300 | 23405 | 1714408 | 65536 | 158609196 | 1547880 | 0 | 636262676 | 12076231877907361 | 12076231878106630 | 12076231878237829 | 12076231878241472 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 52 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30080 | 0x7f921323b900 | 0x7f90e3235740 | 217404 | 217404 | 25609 | 1739240 | 65536 | 163441238 | 1570987 | 0 | 655578784 | 12076231878300802 | 12076231878491588 | 12076231878624707 | 12076231878628321 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 54 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30784 | 0x7f921323b800 | 0x7f90e3235780 | 210044 | 210044 | 21246 | 1680360 | 65536 | 166828921 | 1517545 | 0 | 669140864 | 12076231878700094 | 12076231878873026 | 12076231879001505 | 12076231879006333 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 56 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32000 | 0x7f921323b700 | 0x7f90e32357c0 | 399324 | 399324 | 31799 | 3194600 | 65536 | 331616212 | 3020527 | 0 | 1328281328 | 12076231879062918 | 12076231879258304 | 12076231879513823 | 12076231879581071 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 58 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32960 | 0x7f921323b600 | 0x7f90e3235800 | 389940 | 389940 | 27333 | 3119528 | 65536 | 302126929 | 2945221 | 0 | 1210326264 | 12076231879606298 | 12076231879805182 | 12076231880054141 | 12076231880121746 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 60 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 33920 | 0x7f921323b500 | 0x7f90e3235840 | 216036 | 216036 | 25333 | 1728296 | 65536 | 163097889 | 1558656 | 0 | 654210868 | 12076231880146382 | 12076231880349179 | 12076231880481339 | 12076231880484991 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 62 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 34880 | 0x7f921323b400 | 0x7f90e3235880 | 217324 | 217324 | 25771 | 1738600 | 65536 | 167412648 | 1565681 | 0 | 671491920 | 12076231880544221 | 12076231880732217 | 12076231880865177 | 12076231880869045 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 64 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 35840 | 0x7f921323b300 | 0x7f90e32358c0 | 212420 | 212420 | 26223 | 1699368 | 65536 | 162346759 | 1523206 | 0 | 651207572 | 12076231880939917 | 12076231881125176 | 12076231881254455 | 12076231881258479 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 66 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 37312 | 0x7f921323ba00 | 0x7f90e3235900 | 389884 | 389884 | 28296 | 3119080 | 65536 | 302704839 | 2931734 | 0 | 1212638368 | 12076231881317448 | 12076231881509014 | 12076231881759253 | 12076231881800015 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 68 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 38272 | 0x7f921323b900 | 0x7f90e3235940 | 387212 | 387212 | 32439 | 3097704 | 65536 | 349422561 | 2932630 | 0 | 1399507804 | 12076231881836543 | 12076231882035411 | 12076231882282610 | 12076231882350117 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 70 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 39232 | 0x7f921323b800 | 0x7f90e3235980 | 212228 | 212228 | 22603 | 1697832 | 65536 | 166326759 | 1524244 | 0 | 667130624 | 12076231882373962 | 12076231882579089 | 12076231882708528 | 12076231882712240 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 72 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 40192 | 0x7f921323b700 | 0x7f90e32359c0 | 214428 | 214428 | 26062 | 1715432 | 65536 | 162356258 | 1544539 | 0 | 651274924 | 12076231882772853 | 12076231882957327 | 12076231883088846 | 12076231883092958 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 74 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 41152 | 0x7f921323b600 | 0x7f90e3235a00 | 207420 | 207420 | 20347 | 1659368 | 65536 | 165791497 | 1484278 | 0 | 664990676 | 12076231883164801 | 12076231883344205 | 12076231883470445 | 12076231883474086 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 76 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 42624 | 0x7f921323b500 | 0x7f90e3235a40 | 444492 | 444492 | 18101 | 3555944 | 65536 | 349566839 | 2928634 | 0 | 1400096984 | 12076231883524470 | 12076231883720523 | 12076231884005642 | 12076231884072679 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 78 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 43584 | 0x7f921323b400 | 0x7f90e3235a80 | 391020 | 391020 | 30906 | 3128168 | 65536 | 331891325 | 2948469 | 0 | 1329381800 | 12076231884096844 | 12076231884297321 | 12076231884546919 | 12076231884612933 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 80 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 44544 | 0x7f921323b300 | 0x7f90e3235ac0 | 216428 | 216428 | 24873 | 1731432 | 65536 | 160248239 | 1555450 | 0 | 642810652 | 12076231884638069 | 12076231884839718 | 12076231884971718 | 12076231884975406 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 82 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 45760 | 0x7f921323ba00 | 0x7f90e3235b00 | 211716 | 211716 | 24110 | 1693736 | 65536 | 174815876 | 1495906 | 0 | 701086040 | 12076231885033404 | 12076231885247716 | 12076231885375556 | 12076231885379247 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 84 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 46720 | 0x7f921323b900 | 0x7f90e3235b40 | 214548 | 214548 | 25523 | 1716392 | 65536 | 165139480 | 1545141 | 0 | 662379796 | 12076231885451631 | 12076231885626274 | 12076231885757634 | 12076231885761217 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 86 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 48448 | 0x7f921323b800 | 0x7f90e3235b80 | 408052 | 408052 | 30103 | 3264424 | 65536 | 290336806 | 3080833 | 0 | 1163164760 | 12076231885819355 | 12076231886008513 | 12076231886270591 | 12076231886336616 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 88 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 49408 | 0x7f921323b700 | 0x7f90e3235bc0 | 398260 | 398260 | 26619 | 3186088 | 65536 | 268963824 | 3011758 | 0 | 1077674328 | 12076231886363015 | 12076231886560670 | 12076231886816189 | 12076231886882410 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 90 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 50368 | 0x7f921323b600 | 0x7f90e3235c00 | 218572 | 218572 | 24388 | 1748584 | 65536 | 160343562 | 1584646 | 0 | 643220688 | 12076231886904912 | 12076231887115707 | 12076231887249787 | 12076231887253560 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 92 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 51584 | 0x7f921323b500 | 0x7f90e3235c40 | 221044 | 221044 | 28820 | 1768360 | 65536 | 166934082 | 1589175 | 0 | 669558956 | 12076231887311037 | 12076231887499226 | 12076231887635065 | 12076231887638706 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 94 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 52544 | 0x7f921323b400 | 0x7f90e3235c80 | 213828 | 213828 | 24849 | 1710632 | 65536 | 164341372 | 1535313 | 0 | 659185660 | 12076231887709588 | 12076231887890584 | 12076231888021463 | 12076231888025585 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 96 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 54272 | 0x7f921323b300 | 0x7f90e3235cc0 | 412052 | 412052 | 33944 | 3296424 | 65536 | 350951339 | 3114670 | 0 | 1405623716 | 12076231888083112 | 12076231888282582 | 12076231888547221 | 12076231888611654 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 98 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 55488 | 0x7f921323ba00 | 0x7f90e3235d00 | 391972 | 391972 | 31564 | 3135784 | 65536 | 335063255 | 2955778 | 0 | 1342074760 | 12076231888636070 | 12076231888833299 | 12076231889084018 | 12076231889125489 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 100 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 56704 | 0x7f921323b900 | 0x7f90e3235d40 | 211268 | 211268 | 26388 | 1690152 | 65536 | 163425388 | 1518927 | 0 | 655550388 | 12076231889168840 | 12076231889346257 | 12076231889475696 | 12076231889479417 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 102 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 58176 | 0x7f921323b800 | 0x7f90e3235d80 | 225620 | 225620 | 26765 | 1804968 | 65536 | 175101612 | 1630182 | 0 | 702256672 | 12076231889539228 | 12076231889725455 | 12076231889864975 | 12076231889868781 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 104 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 59392 | 0x7f921323b700 | 0x7f90e3235dc0 | 218172 | 218172 | 26196 | 1745384 | 65536 | 169240744 | 1577053 | 0 | 678830104 | 12076231889939572 | 12076231890117293 | 12076231890251373 | 12076231890255329 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 106 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 61376 | 0x7f921323b600 | 0x7f90e3235e00 | 407748 | 407748 | 34788 | 3261992 | 65536 | 351974986 | 3077833 | 0 | 1409726352 | 12076231890314469 | 12076231890502892 | 12076231890764490 | 12076231890830939 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 108 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 62592 | 0x7f921323b500 | 0x7f90e3235e40 | 396140 | 396140 | 26128 | 3169128 | 65536 | 274045779 | 2964956 | 0 | 1097999384 | 12076231890855094 | 12076231891057929 | 12076231891311528 | 12076231891381211 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 110 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 63808 | 0x7f921323b400 | 0x7f90e3235e80 | 211060 | 211060 | 25078 | 1688488 | 65536 | 164141556 | 1521355 | 0 | 658404760 | 12076231891406448 | 12076231891608966 | 12076231891738406 | 12076231891742162 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 112 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 65280 | 0x7f921323b300 | 0x7f90e3235ec0 | 224876 | 224876 | 30861 | 1799016 | 65536 | 181708466 | 1599554 | 0 | 728670768 | 12076231891800400 | 12076231891987845 | 12076231892124644 | 12076231892128941 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 114 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 66496 | 0x7f921323ba00 | 0x7f90e3235f00 | 215980 | 215980 | 23120 | 1727848 | 65536 | 171090177 | 1551221 | 0 | 686225836 | 12076231892203439 | 12076231892380323 | 12076231892512642 | 12076231892516471 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 116 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 68480 | 0x7f921323b900 | 0x7f90e3235f40 | 403124 | 403124 | 37171 | 3225000 | 65536 | 294430551 | 3032257 | 0 | 1179541064 | 12076231892574289 | 12076231892762881 | 12076231893021760 | 12076231893064039 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 118 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 69696 | 0x7f921323b800 | 0x7f90e3235f80 | 395924 | 395924 | 30605 | 3167400 | 65536 | 316508958 | 2986761 | 0 | 1267854064 | 12076231893109644 | 12076231893290239 | 12076231893543517 | 12076231893609733 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 120 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 70912 | 0x7f921323b700 | 0x7f90e3235fc0 | 215308 | 215308 | 20409 | 1722472 | 65536 | 167112118 | 1554313 | 0 | 670323408 | 12076231893632926 | 12076231893834876 | 12076231893966875 | 12076231893970574 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 122 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 72384 | 0x7f921323b600 | 0x7f90e3236000 | 225932 | 225932 | 26404 | 1807464 | 65536 | 183772946 | 1606710 | 0 | 736964832 | 12076231894028892 | 12076231894237594 | 12076231894375514 | 12076231894379393 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 124 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 73600 | 0x7f921323b500 | 0x7f90e3236040 | 213012 | 213012 | 27724 | 1704104 | 65536 | 166993967 | 1524205 | 0 | 669813852 | 12076231894453882 | 12076231894628632 | 12076231894758872 | 12076231894762545 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 126 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 75840 | 0x7f921323b400 | 0x7f90e3236080 | 412388 | 412388 | 33522 | 3299112 | 65536 | 314624223 | 3105274 | 0 | 1260313276 | 12076231894821525 | 12076231895008311 | 12076231895273589 | 12076231895343094 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 128 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 77056 | 0x7f921323b300 | 0x7f90e32360c0 | 388004 | 388004 | 31263 | 3104040 | 65536 | 341579810 | 2930373 | 0 | 1368136080 | 12076231895368572 | 12076231895568148 | 12076231895817267 | 12076231895883248 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 130 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 78272 | 0x7f921323ba00 | 0x7f90e3236100 | 215204 | 215204 | 27264 | 1721640 | 65536 | 162639467 | 1545823 | 0 | 652405112 | 12076231895905980 | 12076231896118545 | 12076231896250705 | 12076231896254688 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 132 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 80000 | 0x7f921323b900 | 0x7f90e3236140 | 232980 | 232980 | 28944 | 1863848 | 65536 | 196870474 | 1691195 | 0 | 789355492 | 12076231896314910 | 12076231896500464 | 12076231896644783 | 12076231896648490 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 134 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 81216 | 0x7f921323b800 | 0x7f90e3236180 | 215652 | 215652 | 23399 | 1725224 | 65536 | 169288132 | 1551402 | 0 | 679019796 | 12076231896720344 | 12076231896896302 | 12076231897028301 | 12076231897032193 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 136 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 83456 | 0x7f921323b700 | 0x7f90e32361c0 | 410452 | 410452 | 33213 | 3283624 | 65536 | 358108503 | 3105537 | 0 | 1434256804 | 12076231897088839 | 12076231897287020 | 12076231897551659 | 12076231897616589 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 138 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 84928 | 0x7f921323b600 | 0x7f90e3236200 | 391340 | 391340 | 30541 | 3130728 | 65536 | 323491336 | 2961911 | 0 | 1295789352 | 12076231897639853 | 12076231897837097 | 12076231898087976 | 12076231898154379 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 140 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 86400 | 0x7f921323b500 | 0x7f90e3236240 | 217444 | 217444 | 25764 | 1739560 | 65536 | 168995367 | 1571393 | 0 | 677812780 | 12076231898177933 | 12076231898379335 | 12076231898512934 | 12076231898516702 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 142 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 88128 | 0x7f921323b400 | 0x7f90e3236280 | 242548 | 242548 | 28922 | 1940392 | 65536 | 211879219 | 1766407 | 0 | 849383028 | 12076231898575672 | 12076231898762373 | 12076231898912452 | 12076231898916505 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 144 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 89600 | 0x7f921323b300 | 0x7f90e32362c0 | 214652 | 214652 | 27762 | 1717224 | 65536 | 171720457 | 1539295 | 0 | 688714884 | 12076231899010710 | 12076231899186371 | 12076231899318210 | 12076231899322179 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 146 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 92096 | 0x7f921323ba00 | 0x7f90e3236300 | 413876 | 413876 | 38752 | 3311016 | 65536 | 315478967 | 3084197 | 0 | 1263733464 | 12076231899373745 | 12076231899570049 | 12076231899835648 | 12076231899901165 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 148 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 93568 | 0x7f921323b900 | 0x7f90e3236340 | 394564 | 394564 | 31363 | 3156520 | 65536 | 324389758 | 2988860 | 0 | 1299381036 | 12076231899926562 | 12076231900125567 | 12076231900379485 | 12076231900448042 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 150 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 95040 | 0x7f921323b800 | 0x7f90e3236380 | 214772 | 214772 | 25335 | 1718184 | 65536 | 170705090 | 1542095 | 0 | 684703728 | 12076231900466105 | 12076231900672604 | 12076231900804443 | 12076231900808421 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 152 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 96768 | 0x7f921323b700 | 0x7f90e32363c0 | 251964 | 251964 | 28544 | 2015720 | 65536 | 221368828 | 1849294 | 0 | 887325512 | 12076231900867251 | 12076231901059482 | 12076231901216922 | 12076231901220908 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 154 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 98240 | 0x7f921323b600 | 0x7f90e3236400 | 216260 | 216260 | 26963 | 1730088 | 65536 | 162353338 | 1553710 | 0 | 651270712 | 12076231901291820 | 12076231901468120 | 12076231901600760 | 12076231901604601 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 156 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 100736 | 0x7f921323b500 | 0x7f90e3236440 | 408868 | 408868 | 34786 | 3270952 | 65536 | 352285559 | 3098715 | 0 | 1410973424 | 12076231901663100 | 12076231901849239 | 12076231902113717 | 12076231902179539 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 158 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 102208 | 0x7f921323b400 | 0x7f90e3236480 | 387420 | 387420 | 34140 | 3099368 | 65536 | 345549815 | 2927381 | 0 | 1384017284 | 12076231902203444 | 12076231902404596 | 12076231902653395 | 12076231902719463 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 160 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 103680 | 0x7f921323b300 | 0x7f90e32364c0 | 224172 | 224172 | 26689 | 1793384 | 65536 | 170117887 | 1616250 | 0 | 682350060 | 12076231902743498 | 12076231902941393 | 12076231903079313 | 12076231903083209 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 162 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 105664 | 0x7f921323ba00 | 0x7f90e3236500 | 265316 | 265316 | 30088 | 2122536 | 65536 | 234848461 | 1953284 | 0 | 941243104 | 12076231903141928 | 12076231903339152 | 12076231903505391 | 12076231903509221 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 164 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 107136 | 0x7f921323b900 | 0x7f90e3236540 | 216500 | 216500 | 26720 | 1732008 | 65536 | 167231965 | 1553615 | 0 | 670756376 | 12076231903580974 | 12076231903758190 | 12076231903891309 | 12076231903894928 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 166 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 109888 | 0x7f921323b800 | 0x7f90e3236580 | 404780 | 404780 | 34990 | 3238248 | 65536 | 298742070 | 3021079 | 0 | 1196807292 | 12076231903954468 | 12076231904147948 | 12076231904409707 | 12076231904477140 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 168 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 111360 | 0x7f921323b700 | 0x7f90e32365c0 | 385500 | 385500 | 29917 | 3084008 | 65536 | 327472525 | 2895403 | 0 | 1311710800 | 12076231904501845 | 12076231904702185 | 12076231904950344 | 12076231905018035 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 170 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 112832 | 0x7f921323b600 | 0x7f90e3236600 | 212116 | 212116 | 21647 | 1696936 | 65536 | 167442840 | 1530569 | 0 | 671610512 | 12076231905041068 | 12076231905241543 | 12076231905371942 | 12076231905375349 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 172 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 114816 | 0x7f921323b500 | 0x7f90e3236640 | 276628 | 276628 | 30438 | 2213032 | 65536 | 244234287 | 2041863 | 0 | 978780344 | 12076231905434679 | 12076231905621221 | 12076231905794820 | 12076231905798395 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 174 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 116288 | 0x7f921323b400 | 0x7f90e3236680 | 222180 | 222180 | 27382 | 1777448 | 65536 | 175126311 | 1606570 | 0 | 702349764 | 12076231905870148 | 12076231906050339 | 12076231906187298 | 12076231906191195 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 176 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 119040 | 0x7f921323b300 | 0x7f90e32366c0 | 402204 | 402204 | 38868 | 3217640 | 65536 | 343303391 | 3015453 | 0 | 1375047096 | 12076231906243222 | 12076231906443617 | 12076231906701856 | 12076231906748330 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 178 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 120768 | 0x7f921323ba00 | 0x7f90e3236700 | 391716 | 391716 | 28357 | 3133736 | 65536 | 307026386 | 2953055 | 0 | 1229934904 | 12076231906775501 | 12076231906972735 | 12076231907225533 | 12076231907276612 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 180 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 122496 | 0x7f921323b900 | 0x7f90e3236740 | 223020 | 223020 | 26589 | 1784168 | 65536 | 174927215 | 1614355 | 0 | 701575448 | 12076231907299855 | 12076231907498972 | 12076231907636252 | 12076231907640158 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 182 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 124736 | 0x7f921323b800 | 0x7f90e3236780 | 291756 | 291756 | 30889 | 2334056 | 65536 | 255429615 | 2141529 | 0 | 1023554924 | 12076231907701281 | 12076231907885530 | 12076231908067609 | 12076231908071700 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 184 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 126464 | 0x7f921323b700 | 0x7f90e32367c0 | 218772 | 218772 | 23740 | 1750184 | 65536 | 162113280 | 1577588 | 0 | 650326224 | 12076231908159223 | 12076231908335768 | 12076231908470648 | 12076231908474458 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 186 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 127936 | 0x7f921323b600 | 0x7f90e3236800 | 408372 | 408372 | 35875 | 3266984 | 65536 | 374890415 | 3095343 | 0 | 1501382876 | 12076231908532927 | 12076231908718966 | 12076231908983605 | 12076231909032085 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 188 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 129664 | 0x7f921323b500 | 0x7f90e3236840 | 396228 | 396228 | 29097 | 3169832 | 65536 | 279160748 | 2986495 | 0 | 1118467876 | 12076231909055829 | 12076231909261204 | 12076231909516723 | 12076231909564715 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 190 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 131392 | 0x7f921323b400 | 0x7f90e3236880 | 213988 | 213988 | 26279 | 1711912 | 65536 | 170204780 | 1542288 | 0 | 682658196 | 12076231909587266 | 12076231909784561 | 12076231909916241 | 12076231909919935 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 192 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 133632 | 0x7f921323b300 | 0x7f90e32368c0 | 304196 | 304196 | 32741 | 2433576 | 65536 | 268605054 | 2254010 | 0 | 1076263632 | 12076231909978443 | 12076231910170960 | 12076231910363439 | 12076231910412109 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 194 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 135360 | 0x7f921323ba00 | 0x7f90e3236900 | 223468 | 223468 | 27558 | 1787752 | 65536 | 174169103 | 1609275 | 0 | 698550524 | 12076231910447986 | 12076231910635277 | 12076231910772717 | 12076231910776847 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 196 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 136832 | 0x7f921323b900 | 0x7f90e3236940 | 437972 | 437972 | 40632 | 3503784 | 65536 | 406098341 | 3332083 | 0 | 1626213004 | 12076231910835176 | 12076231911025036 | 12076231911309514 | 12076231911360201 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 198 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 138560 | 0x7f921323b800 | 0x7f90e3236980 | 391020 | 391020 | 35034 | 3128168 | 65536 | 338198311 | 2942351 | 0 | 1354636000 | 12076231911384507 | 12076231911582953 | 12076231911834632 | 12076231911882923 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 200 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 140288 | 0x7f921323b700 | 0x7f90e32369c0 | 217972 | 217972 | 27828 | 1743784 | 65536 | 162941030 | 1554590 | 0 | 653600068 | 12076231911905926 | 12076231912116071 | 12076231912249990 | 12076231912254083 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 202 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 142784 | 0x7f921323b600 | 0x7f90e3236a00 | 331132 | 331132 | 34861 | 2649064 | 65536 | 299989235 | 2475724 | 0 | 1201818428 | 12076231912312832 | 12076231912506789 | 12076231912716708 | 12076231912765243 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 204 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 144768 | 0x7f921323b500 | 0x7f90e3236a40 | 223188 | 223188 | 25976 | 1785512 | 65536 | 174304746 | 1614789 | 0 | 699083668 | 12076231912799376 | 12076231912984386 | 12076231913122146 | 12076231913126384 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 206 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 145728 | 0x7f921323b400 | 0x7f90e3236a80 | 471372 | 471372 | 42537 | 3770984 | 65536 | 438236492 | 3595112 | 0 | 1754765804 | 12076231913185073 | 12076231913374625 | 12076231913681983 | 12076231913730016 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 208 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 147712 | 0x7f921323b300 | 0x7f90e3236ac0 | 398004 | 398004 | 34201 | 3184040 | 65536 | 304164707 | 2970597 | 0 | 1218499192 | 12076231913753850 | 12076231913949982 | 12076231914204541 | 12076231914253138 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 210 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 149696 | 0x7f921323ba00 | 0x7f90e3236b00 | 219116 | 219116 | 28566 | 1752936 | 65536 | 171869843 | 1572135 | 0 | 689335288 | 12076231914280479 | 12076231914476540 | 12076231914611579 | 12076231914615571 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 212 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 152192 | 0x7f921323b900 | 0x7f90e3236b40 | 359748 | 359748 | 38478 | 2877992 | 65536 | 325584781 | 2687145 | 0 | 1304200692 | 12076231914675924 | 12076231914860811 | 12076231915088010 | 12076231915142781 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 214 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 154176 | 0x7f921323b800 | 0x7f90e3236b80 | 226156 | 226156 | 27872 | 1809256 | 65536 | 178178966 | 1626552 | 0 | 714600236 | 12076231915176133 | 12076231915367528 | 12076231915507527 | 12076231915510985 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 216 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 155648 | 0x7f921323b700 | 0x7f90e3236bc0 | 547452 | 547452 | 16277 | 4379624 | 65536 | 471732068 | 3865211 | 0 | 1888749296 | 12076231915571247 | 12076231915762246 | 12076231916120005 | 12076231916168958 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 218 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 157632 | 0x7f921323b600 | 0x7f90e3236c00 | 396100 | 396100 | 33592 | 3168808 | 65536 | 286883321 | 2975765 | 0 | 1149368112 | 12076231916193354 | 12076231916394884 | 12076231916649923 | 12076231916671232 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 220 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 159616 | 0x7f921323b500 | 0x7f90e3236c40 | 227308 | 227308 | 28211 | 1818472 | 65536 | 179633601 | 1637453 | 0 | 720416176 | 12076231916725703 | 12076231916900322 | 12076231917040641 | 12076231917043834 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 222 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 162368 | 0x7f921323b400 | 0x7f90e3236c80 | 383836 | 383836 | 37665 | 3070696 | 65536 | 351339674 | 2894854 | 0 | 1407201852 | 12076231917106160 | 12076231917306240 | 12076231917551039 | 12076231917598765 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 224 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 164608 | 0x7f921323b300 | 0x7f90e3236cc0 | 221244 | 221244 | 27608 | 1769960 | 65536 | 178257294 | 1597247 | 0 | 714921032 | 12076231917634101 | 12076231917818718 | 12076231917955997 | 12076231917959466 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 226 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 166336 | 0x7f921323ba00 | 0x7f90e3236d00 | 575148 | 575148 | 49385 | 4601192 | 65536 | 541174626 | 4423355 | 0 | 2166519248 | 12076231918022864 | 12076231918212156 | 12076231918588955 | 12076231918636173 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 228 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 168576 | 0x7f921323b900 | 0x7f90e3236d40 | 404724 | 404724 | 38279 | 3237800 | 65536 | 304400557 | 3047956 | 0 | 1219436236 | 12076231918658445 | 12076231918860474 | 12076231919122233 | 12076231919172781 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 230 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 170816 | 0x7f921323b800 | 0x7f90e3236d80 | 228556 | 228556 | 29646 | 1828456 | 65536 | 186585058 | 1646747 | 0 | 748222188 | 12076231919196134 | 12076231919398392 | 12076231919540311 | 12076231919543910 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 232 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 174080 | 0x7f921323b700 | 0x7f90e3236dc0 | 434988 | 434988 | 40594 | 3479912 | 65536 | 402477309 | 3306040 | 0 | 1611757108 | 12076231919602710 | 12076231919787510 | 12076231920067349 | 12076231920116615 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 234 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 176576 | 0x7f921323b600 | 0x7f90e3236e00 | 222444 | 222444 | 29050 | 1779560 | 65536 | 185192669 | 1602805 | 0 | 742649080 | 12076231920152672 | 12076231920341428 | 12076231920479348 | 12076231920482785 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 236 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 177792 | 0x7f921323b500 | 0x7f90e3236e40 | 645828 | 645828 | 52296 | 5166632 | 65536 | 608907256 | 4992501 | 0 | 2437449248 | 12076231920541264 | 12076231920729587 | 12076231921154545 | 12076231921202162 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 238 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 180288 | 0x7f921323b400 | 0x7f90e3236e80 | 397500 | 397500 | 38711 | 3180008 | 65536 | 333995342 | 2976636 | 0 | 1337826292 | 12076231921228982 | 12076231921425104 | 12076231921681903 | 12076231921729843 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 240 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 182784 | 0x7f921323b300 | 0x7f90e3236ec0 | 229436 | 229436 | 28919 | 1835496 | 65536 | 188599093 | 1647539 | 0 | 756299704 | 12076231921752755 | 12076231921953742 | 12076231922095661 | 12076231922099690 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 242 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 186304 | 0x7f921323ba00 | 0x7f90e3236f00 | 488444 | 488444 | 43695 | 3907560 | 65536 | 454422446 | 3735273 | 0 | 1819537584 | 12076231922160243 | 12076231922354380 | 12076231922669419 | 12076231922717008 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 244 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 189312 | 0x7f921323b900 | 0x7f90e3236f40 | 243844 | 243844 | 28023 | 1950760 | 65536 | 214659664 | 1784113 | 0 | 860525104 | 12076231922751071 | 12076231922939818 | 12076231923092457 | 12076231923096373 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 246 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 190784 | 0x7f921323b800 | 0x7f90e3236f80 | 788308 | 788308 | 63833 | 6306472 | 65536 | 750941066 | 6108037 | 0 | 3005584164 | 12076231923154761 | 12076231923343816 | 12076231923861254 | 12076231923910095 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 248 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 193792 | 0x7f921323b700 | 0x7f90e3236fc0 | 460372 | 460372 | 41899 | 3682984 | 65536 | 380467912 | 3486782 | 0 | 1523712884 | 12076231923934079 | 12076231924135173 | 12076231924435332 | 12076231924485414 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 250 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 196800 | 0x7f921323b600 | 0x7f90e3237000 | 247332 | 247332 | 28756 | 1978664 | 65536 | 218572441 | 1808278 | 0 | 876171028 | 12076231924508026 | 12076231924708451 | 12076231924863170 | 12076231924866472 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 252 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 201088 | 0x7f921323b500 | 0x7f90e3237040 | 595164 | 595164 | 50118 | 4761320 | 65536 | 561201796 | 4583951 | 0 | 2246652128 | 12076231924925823 | 12076231925118049 | 12076231925504448 | 12076231925552858 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 254 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 204608 | 0x7f921323b400 | 0x7f90e3237080 | 281876 | 281876 | 31026 | 2255016 | 65536 | 251195278 | 2078094 | 0 | 1006675948 | 12076231925589035 | 12076231925775007 | 12076231925953086 | 12076231925956798 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 256 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 206080 | 0x7f921323b300 | 0x7f90e32370c0 | 925404 | 925404 | 68706 | 7403240 | 65536 | 890026304 | 7232511 | 0 | 3561926420 | 12076231926019956 | 12076231926216765 | 12076231926827163 | 12076231926894451 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 258 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 209600 | 0x7f921323ba00 | 0x7f90e3237100 | 503300 | 503300 | 45144 | 4026408 | 65536 | 461861349 | 3846022 | 0 | 1849280228 | 12076231926918776 | 12076231927118841 | 12076231927447160 | 12076231927515075 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 260 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 213120 | 0x7f921323b900 | 0x7f90e3237140 | 285332 | 285332 | 31046 | 2282664 | 65536 | 255272867 | 2111656 | 0 | 1022985284 | 12076231927540271 | 12076231927736919 | 12076231927917558 | 12076231927921059 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 262 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 218176 | 0x7f921323b800 | 0x7f90e3237180 | 698492 | 698492 | 55232 | 5587944 | 65536 | 664922242 | 5418973 | 0 | 2661534732 | 12076231927980960 | 12076231928177717 | 12076231928633556 | 12076231928700708 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 264 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 222208 | 0x7f921323b700 | 0x7f90e32371c0 | 314684 | 314684 | 32860 | 2517480 | 65536 | 285355900 | 2345471 | 0 | 1143315464 | 12076231928736615 | 12076231928925234 | 12076231929125394 | 12076231929191350 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 266 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 223936 | 0x7f921323b600 | 0x7f90e3237200 | 1075708 | 1075708 | 15991 | 8605672 | 65536 | 1028789433 | 8353693 | 0 | 4116978952 | 12076231929216256 | 12076231929413392 | 12076231930124110 | 12076231930190397 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 268 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 227968 | 0x7f921323b500 | 0x7f90e3237240 | 569900 | 569900 | 48630 | 4559208 | 65536 | 531638232 | 4382539 | 0 | 2128395416 | 12076231930215914 | 12076231930418668 | 12076231930793067 | 12076231930858097 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 270 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 232000 | 0x7f921323b400 | 0x7f90e3237280 | 319788 | 319788 | 33173 | 2558312 | 65536 | 288889980 | 2379759 | 0 | 1157456352 | 12076231930880389 | 12076231931094666 | 12076231931298025 | 12076231931365811 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 272 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 237824 | 0x7f921323b300 | 0x7f90e32372c0 | 830692 | 830692 | 16772 | 6645544 | 65536 | 768833364 | 6263712 | 0 | 3077184860 | 12076231931397199 | 12076231931590664 | 12076231932134662 | 12076231932201895 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 274 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 242368 | 0x7f921323ba00 | 0x7f90e3237300 | 350500 | 350500 | 35965 | 2804008 | 65536 | 319134542 | 2622082 | 0 | 1278429768 | 12076231932238763 | 12076231932421701 | 12076231932644900 | 12076231932710871 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 276 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 243584 | 0x7f921323b900 | 0x7f90e3237340 | 1211100 | 1211100 | 87667 | 9688808 | 65536 | 1168911486 | 9491007 | 0 | 4677466636 | 12076231932734364 | 12076231932935779 | 12076231933735615 | 12076231933803812 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 278 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 248128 | 0x7f921323b800 | 0x7f90e3237380 | 639380 | 639380 | 53458 | 5115048 | 65536 | 601490401 | 4932314 | 0 | 2407819688 | 12076231933834369 | 12076231934041694 | 12076231934462333 | 12076231934529120 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 280 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 252672 | 0x7f921323b700 | 0x7f90e32373c0 | 355340 | 355340 | 34831 | 2842728 | 65536 | 325977534 | 2674007 | 0 | 1305798228 | 12076231934552774 | 12076231934751611 | 12076231934979611 | 12076231935047132 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 282 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 259264 | 0x7f921323b600 | 0x7f90e3237400 | 913252 | 913252 | 67630 | 7306024 | 65536 | 873353673 | 7111720 | 0 | 3495264728 | 12076231935077589 | 12076231935275289 | 12076231935872727 | 12076231935938509 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 284 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 264832 | 0x7f921323b500 | 0x7f90e3237440 | 419092 | 419092 | 39375 | 3352744 | 65536 | 387478807 | 3182077 | 0 | 1551821780 | 12076231935972813 | 12076231936165686 | 12076231936434965 | 12076231936502978 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 286 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 266304 | 0x7f921323b400 | 0x7f90e3237480 | 1488116 | 1488116 | 101306 | 11904936 | 65536 | 1447272521 | 11731982 | 0 | 5790910268 | 12076231936528105 | 12076231936724884 | 12076231937710480 | 12076231937777206 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 288 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 271872 | 0x7f921323b300 | 0x7f90e32374c0 | 780564 | 780564 | 61408 | 6244520 | 65536 | 743785851 | 6074386 | 0 | 2976989912 | 12076231937800950 | 12076231938005359 | 12076231938522157 | 12076231938589756 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 290 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 277440 | 0x7f921323ba00 | 0x7f90e3237500 | 422060 | 422060 | 39642 | 3376488 | 65536 | 390786297 | 3207523 | 0 | 1565029572 | 12076231938620564 | 12076231938822155 | 12076231939094954 | 12076231939164064 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 292 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 279936 | 0x7f921323b900 | 0x7f90e3237540 | 1127460 | 1127460 | 80640 | 9019688 | 65536 | 1088944561 | 8846175 | 0 | 4357601656 | 12076231939194320 | 12076231939388713 | 12076231940127750 | 12076231940194008 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 294 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 286528 | 0x7f921323b800 | 0x7f90e3237580 | 491524 | 491524 | 45001 | 3932200 | 65536 | 458104942 | 3757848 | 0 | 1834312856 | 12076231940229925 | 12076231940428229 | 12076231940745988 | 12076231940812026 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 296 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 288000 | 0x7f921323b700 | 0x7f90e32375c0 | 1769300 | 1769300 | 117483 | 14154408 | 65536 | 1724289476 | 13983857 | 0 | 6898978492 | 12076231940835340 | 12076231941050947 | 12076231942221822 | 12076231942290003 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 298 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 32 | 24 | 294592 | 0x7f921323b600 | 0x7f90e3237600 | 925588 | 925588 | 68101 | 7404712 | 65536 | 802906095 | 7212505 | 0 | 3213471748 | 12076231942315300 | 12076231942526301 | 12076231943138139 | 12076231943205324 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 300 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 301184 | 0x7f921323b500 | 0x7f90e3237640 | 491900 | 491900 | 43878 | 3935208 | 65536 | 458729719 | 3763001 | 0 | 1836805772 | 12076231943234979 | 12076231943434937 | 12076231943753176 | 12076231943818914 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 302 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 304960 | 0x7f921323b400 | 0x7f90e3237680 | 1337756 | 1337756 | 92589 | 10702056 | 65536 | 1298496330 | 10531728 | 0 | 5195810048 | 12076231943842037 | 12076231944049975 | 12076231944928531 | 12076231944996824 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 304 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 313600 | 0x7f921323b300 | 0x7f90e32376c0 | 629316 | 629316 | 52870 | 5034536 | 65536 | 593147166 | 4858222 | 0 | 2374483980 | 12076231945040795 | 12076231945220530 | 12076231945629969 | 12076231945696103 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 306 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 314816 | 0x7f921323ba00 | 0x7f90e3237700 | 2334132 | 2334132 | 80444 | 18673064 | 65536 | 2284274524 | 18500488 | 0 | 9138918216 | 12076231945717844 | 12076231945926288 | 12076231947476361 | 12076231947544799 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 308 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 317568 | 0x7f921323b900 | 0x7f90e3237740 | 1205092 | 1205092 | 85618 | 9640744 | 65536 | 1163043276 | 9439381 | 0 | 4653996540 | 12076231947567120 | 12076231947780040 | 12076231948576357 | 12076231948643601 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 310 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 320320 | 0x7f921323b800 | 0x7f90e3237780 | 642284 | 642284 | 51773 | 5138280 | 65536 | 610020630 | 4959719 | 0 | 2441916164 | 12076231948671863 | 12076231948875236 | 12076231949290914 | 12076231949360393 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 312 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 324096 | 0x7f921323b700 | 0x7f90e32377c0 | 1758524 | 1758524 | 117806 | 14068200 | 65536 | 1714404982 | 13898360 | 0 | 6859442952 | 12076231949386221 | 12076231949584353 | 12076231950743229 | 12076231950810197 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 314 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 328896 | 0x7f921323b600 | 0x7f90e3237800 | 1198548 | 1198548 | 84117 | 9588392 | 65536 | 1160783717 | 9420479 | 0 | 4644960060 | 12076231950848228 | 12076231951037628 | 12076231951823384 | 12076231951892108 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 316 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 330112 | 0x7f921323b500 | 0x7f90e3237840 | 4584324 | 4584324 | 282048 | 36674600 | 65536 | 4515000683 | 36505761 | 0 | 18061823208 | 12076231951920150 | 12076231952127863 | 12076231955179531 | 12076231955248346 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 318 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 332864 | 0x7f921323b400 | 0x7f90e3237880 | 2324972 | 2324972 | 150879 | 18599784 | 65536 | 2274108545 | 18429203 | 0 | 9098258420 | 12076231955276077 | 12076231955485450 | 12076231957029764 | 12076231957097803 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 320 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 335616 | 0x7f921323b300 | 0x7f90e32378c0 | 1204540 | 1204540 | 85060 | 9636328 | 65536 | 1166566135 | 9461305 | 0 | 4668088820 | 12076231957126406 | 12076231957328323 | 12076231958118720 | 12076231958186376 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 322 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 339392 | 0x7f921323ba00 | 0x7f90e3237900 | 3444388 | 3444388 | 215203 | 27555112 | 65536 | 3385941564 | 27381169 | 0 | 13545593912 | 12076231958209589 | 12076231958418559 | 12076231960701270 | 12076231960768795 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 324 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 344192 | 0x7f921323b900 | 0x7f90e3237940 | 2323580 | 2323580 | 149918 | 18588648 | 65536 | 2273690728 | 18416492 | 0 | 9096585828 | 12076231960812436 | 12076231960990388 | 12076231962525742 | 12076231962595139 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 326 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 345408 | 0x7f921323b800 | 0x7f90e3237980 | 9088412 | 9088412 | 545478 | 72707304 | 65536 | 8976130373 | 72531065 | 0 | 35906342652 | 12076231962624153 | 12076231962827181 | 12076231968885077 | 12076231968953456 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 328 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 348160 | 0x7f921323b700 | 0x7f90e32379c0 | 4575932 | 4575932 | 214331 | 36607464 | 65536 | 4502519390 | 36415784 | 0 | 18011900616 | 12076231968985135 | 12076231969190676 | 12076231972237864 | 12076231972308792 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 330 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 350912 | 0x7f921323b600 | 0x7f90e3237a00 | 2328900 | 2328900 | 150974 | 18631208 | 65536 | 2280422633 | 18457455 | 0 | 9123513388 | 12076231972335551 | 12076231972540903 | 12076231974079777 | 12076231974147298 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 332 | 916511 | 916516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 0 | 0x7f921323b500 | 0x7f90e3237a40 | 6814196 | 6814196 | 413207 | 54513576 | 65536 | 6723170056 | 54335062 | 0 | 26894504164 | 12076231974175371 | 12076231974377849 | 12076231978905673 | 12076231978976403 |