50 KiB
50 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | GRBM_COUNT | GRBM_GUI_ACTIVE | CPC_ME1_BUSY_FOR_PACKET_DECODE | SQ_CYCLES | SQ_WAVES | SQ_WAVE_CYCLES | SQ_BUSY_CYCLES | SQ_LEVEL_WAVES | SQ_ACCUM_PREV_HIRES | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBuffer.kd | 0 | 0 | 0 | 937062 | 937067 | 33554432 | 256 | 0 | 0 | 8 | 32 | 6464 | 0x0 | 0x7f30a5604180 | 499674 | 499674 | 16219 | 3997400 | 524288 | 371768834 | 3801967 | 0 | 1501859028 | 12076634246773666 | 12076634492293789 | 12076634492615709 | 12076634492722711 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 937062 | 937067 | 32768 | 256 | 0 | 0 | 24 | 24 | 12480 | 0x0 | 0x7f30a5635100 | 27467 | 27467 | 20214 | 219744 | 512 | 1126759 | 75366 | 0 | 4521000 | 12076634507281913 | 12076634507609674 | 12076634507615914 | 12076634507624380 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 4 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 12928 | 0x7f31b1191900 | 0x7f30a5635140 | 217853 | 217853 | 21633 | 1742832 | 65536 | 133671007 | 1576099 | 0 | 536498696 | 12076634507695903 | 12076634507934314 | 12076634508067113 | 12076634508071402 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 6 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 36 | 24 | 13632 | 0x7f31b1191800 | 0x7f30a5635180 | 389621 | 389621 | 26804 | 3116976 | 65536 | 253060814 | 2948271 | 0 | 1014058364 | 12076634508150749 | 12076634508354473 | 12076634508602952 | 12076634508672009 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 8 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 14080 | 0x7f31b1191700 | 0x7f30a56351c0 | 403772 | 403772 | 26436 | 3230184 | 65536 | 276105211 | 3052243 | 0 | 1106238632 | 12076634508705912 | 12076634508912072 | 12076634509168551 | 12076634509238653 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 10 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14528 | 0x7f31b1191600 | 0x7f30a5635200 | 217957 | 217957 | 19106 | 1743664 | 65536 | 133545621 | 1580634 | 0 | 535998284 | 12076634509266064 | 12076634509473830 | 12076634509607270 | 12076634509610645 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 12 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14976 | 0x7f31b1191500 | 0x7f30a5635240 | 229069 | 229069 | 19969 | 1832560 | 65536 | 121094689 | 1662134 | 0 | 486187148 | 12076634509672260 | 12076634509869669 | 12076634510009989 | 12076634510013484 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 14 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 15424 | 0x7f31b1191400 | 0x7f30a5635280 | 212108 | 212108 | 18602 | 1696872 | 65536 | 156194765 | 1531178 | 0 | 626596552 | 12076634510103422 | 12076634510288548 | 12076634510418468 | 12076634510421754 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 16 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16128 | 0x7f31b1191300 | 0x7f30a56352c0 | 395493 | 395493 | 31193 | 3163952 | 65536 | 350214228 | 2994677 | 0 | 1402673352 | 12076634510485032 | 12076634510682148 | 12076634510933987 | 12076634510974372 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 18 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16832 | 0x7f31b1191a00 | 0x7f30a5635300 | 467516 | 467516 | 17173 | 3740136 | 65536 | 304554434 | 3042045 | 0 | 1220032660 | 12076634511021389 | 12076634511221666 | 12076634511520386 | 12076634511586971 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 20 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 17536 | 0x7f31b1191900 | 0x7f30a5635340 | 225357 | 225357 | 22606 | 1802864 | 65536 | 126884404 | 1624746 | 0 | 509347128 | 12076634511614202 | 12076634511821665 | 12076634511958625 | 12076634511961738 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 22 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18240 | 0x7f31b1191800 | 0x7f30a5635380 | 214853 | 214853 | 19943 | 1718832 | 65536 | 138156049 | 1552593 | 0 | 554439388 | 12076634512019917 | 12076634512227264 | 12076634512358624 | 12076634512362043 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 24 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18944 | 0x7f31b1191700 | 0x7f30a56353c0 | 212085 | 212085 | 21709 | 1696688 | 65536 | 162677854 | 1518896 | 0 | 652530312 | 12076634512440970 | 12076634512621503 | 12076634512750303 | 12076634512753651 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 26 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 19904 | 0x7f31b1191600 | 0x7f30a5635400 | 448589 | 448589 | 14903 | 3588720 | 65536 | 352429805 | 2980195 | 0 | 1411536664 | 12076634512815597 | 12076634513007902 | 12076634513294302 | 12076634513361963 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 28 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 20608 | 0x7f31b1191500 | 0x7f30a5635440 | 398365 | 398365 | 30407 | 3186928 | 65536 | 322399376 | 3015454 | 0 | 1291412136 | 12076634513388011 | 12076634513596861 | 12076634513850621 | 12076634513916414 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 30 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 21312 | 0x7f31b1191400 | 0x7f30a5635480 | 212733 | 212733 | 21416 | 1701872 | 65536 | 152200936 | 1534437 | 0 | 610620496 | 12076634513941962 | 12076634514154300 | 12076634514284220 | 12076634514287725 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 32 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22016 | 0x7f31b1191300 | 0x7f30a56354c0 | 212636 | 212636 | 21337 | 1701096 | 65536 | 160391048 | 1536825 | 0 | 643379376 | 12076634514348127 | 12076634514545339 | 12076634514675419 | 12076634514678532 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 34 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22720 | 0x7f31b1191a00 | 0x7f30a5635500 | 222621 | 222621 | 21873 | 1780976 | 65536 | 144958356 | 1613854 | 0 | 581645868 | 12076634514752189 | 12076634514932378 | 12076634515069178 | 12076634515072334 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 36 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 23680 | 0x7f31b1191900 | 0x7f30a5635540 | 397556 | 397556 | 30136 | 3180456 | 65536 | 341121024 | 3002753 | 0 | 1366301448 | 12076634515133218 | 12076634515327737 | 12076634515581337 | 12076634515646482 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 38 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 24384 | 0x7f31b1191800 | 0x7f30a5635580 | 403172 | 403172 | 24087 | 3225384 | 65536 | 224707461 | 3040116 | 0 | 900648000 | 12076634515675667 | 12076634515871416 | 12076634516129336 | 12076634516199712 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 40 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25088 | 0x7f31b1191700 | 0x7f30a56355c0 | 216284 | 216284 | 24062 | 1730280 | 65536 | 126988676 | 1563585 | 0 | 509770336 | 12076634516228164 | 12076634516426135 | 12076634516558295 | 12076634516561545 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 42 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25792 | 0x7f31b1191600 | 0x7f30a5635600 | 219124 | 219124 | 22867 | 1753000 | 65536 | 136862429 | 1589426 | 0 | 549267012 | 12076634516622708 | 12076634516813174 | 12076634516948054 | 12076634516951169 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 44 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 26496 | 0x7f31b1191500 | 0x7f30a5635640 | 213709 | 213709 | 22349 | 1709680 | 65536 | 157553694 | 1533157 | 0 | 632031676 | 12076634517030537 | 12076634517211413 | 12076634517341493 | 12076634517344561 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 46 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 27712 | 0x7f31b1191400 | 0x7f30a5635680 | 399981 | 399981 | 28230 | 3199856 | 65536 | 292218439 | 3025047 | 0 | 1170691640 | 12076634517402739 | 12076634517594932 | 12076634517850932 | 12076634517917136 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 48 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 28416 | 0x7f31b1191300 | 0x7f30a56356c0 | 405413 | 405413 | 28751 | 3243312 | 65536 | 293488110 | 3058758 | 0 | 1175768556 | 12076634517942824 | 12076634518149171 | 12076634518407570 | 12076634518473942 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 50 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 29120 | 0x7f31b1191a00 | 0x7f30a5635700 | 210541 | 210541 | 18560 | 1684336 | 65536 | 158231458 | 1524200 | 0 | 634744128 | 12076634518498998 | 12076634518696210 | 12076634518825490 | 12076634518828672 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 52 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30080 | 0x7f31b1191900 | 0x7f30a5635740 | 221165 | 221165 | 25093 | 1769328 | 65536 | 153672197 | 1587171 | 0 | 616501944 | 12076634518889094 | 12076634519077649 | 12076634519212209 | 12076634519215481 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 54 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30784 | 0x7f31b1191800 | 0x7f30a5635780 | 214733 | 214733 | 20244 | 1717872 | 65536 | 146270057 | 1557059 | 0 | 586895320 | 12076634519290751 | 12076634519467568 | 12076634519599568 | 12076634519602501 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 56 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32000 | 0x7f31b1191700 | 0x7f30a56357c0 | 395148 | 395148 | 31161 | 3161192 | 65536 | 348917173 | 2987788 | 0 | 1397488220 | 12076634519664046 | 12076634519856527 | 12076634520109807 | 12076634520179775 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 58 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32960 | 0x7f31b1191600 | 0x7f30a5635800 | 391717 | 391717 | 30699 | 3133744 | 65536 | 352352859 | 2970795 | 0 | 1411229772 | 12076634520210833 | 12076634520405326 | 12076634520656205 | 12076634520723025 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 60 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 33920 | 0x7f31b1191500 | 0x7f30a5635840 | 211165 | 211165 | 22356 | 1689328 | 65536 | 159141992 | 1519622 | 0 | 638391120 | 12076634520747351 | 12076634520946445 | 12076634521075405 | 12076634521078877 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 62 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 34880 | 0x7f31b1191400 | 0x7f30a5635880 | 222669 | 222669 | 22358 | 1781360 | 65536 | 140744300 | 1613811 | 0 | 564813204 | 12076634521142015 | 12076634521321964 | 12076634521459724 | 12076634521462892 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 64 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 35840 | 0x7f31b1191300 | 0x7f30a56358c0 | 210276 | 210276 | 22634 | 1682216 | 65536 | 138811194 | 1514446 | 0 | 557073360 | 12076634521536088 | 12076634521712523 | 12076634521841163 | 12076634521844321 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 66 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 37312 | 0x7f31b1191a00 | 0x7f30a5635900 | 410381 | 410381 | 24119 | 3283056 | 65536 | 309213186 | 3095322 | 0 | 1238665668 | 12076634521904974 | 12076634522099402 | 12076634522362922 | 12076634522429620 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 68 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 38272 | 0x7f31b1191900 | 0x7f30a5635940 | 397700 | 397700 | 29036 | 3181608 | 65536 | 290946828 | 3013211 | 0 | 1165600192 | 12076634522455268 | 12076634522659721 | 12076634522913640 | 12076634522980154 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 70 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 39232 | 0x7f31b1191800 | 0x7f30a5635980 | 222484 | 222484 | 22018 | 1779880 | 65536 | 128990349 | 1612365 | 0 | 517801864 | 12076634523013546 | 12076634523210600 | 12076634523347879 | 12076634523351094 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 72 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 40192 | 0x7f31b1191700 | 0x7f30a56359c0 | 222764 | 222764 | 24204 | 1782120 | 65536 | 146331742 | 1615165 | 0 | 587163692 | 12076634523411156 | 12076634523601639 | 12076634523738919 | 12076634523742191 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 74 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 41152 | 0x7f31b1191600 | 0x7f30a5635a00 | 212021 | 212021 | 22550 | 1696176 | 65536 | 157151524 | 1525336 | 0 | 630423560 | 12076634523815207 | 12076634523991078 | 12076634524121478 | 12076634524125254 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 76 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 42624 | 0x7f31b1191500 | 0x7f30a5635a40 | 401045 | 401045 | 26402 | 3208368 | 65536 | 336390145 | 3004654 | 0 | 1347376740 | 12076634524185225 | 12076634524375077 | 12076634524632037 | 12076634524698570 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 78 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 43584 | 0x7f31b1191400 | 0x7f30a5635a80 | 401900 | 401900 | 26390 | 3215208 | 65536 | 295687943 | 3049472 | 0 | 1184568052 | 12076634524722816 | 12076634524918436 | 12076634525176195 | 12076634525244576 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 80 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 44544 | 0x7f31b1191300 | 0x7f30a5635ac0 | 211773 | 211773 | 25302 | 1694192 | 65536 | 150606101 | 1519106 | 0 | 604274304 | 12076634525273139 | 12076634525467875 | 12076634525597154 | 12076634525600558 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 82 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 45760 | 0x7f31b1191a00 | 0x7f30a5635b00 | 220996 | 220996 | 25671 | 1767976 | 65536 | 155539914 | 1586010 | 0 | 623980732 | 12076634525660279 | 12076634525852834 | 12076634525988034 | 12076634525991365 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 84 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 46720 | 0x7f31b1191900 | 0x7f30a5635b40 | 218309 | 218309 | 23155 | 1746480 | 65536 | 153186453 | 1572643 | 0 | 614577888 | 12076634526065333 | 12076634526243073 | 12076634526376673 | 12076634526380178 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 86 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 48448 | 0x7f31b1191800 | 0x7f30a5635b80 | 413652 | 413652 | 33352 | 3309224 | 65536 | 288767588 | 3135220 | 0 | 1156887128 | 12076634526440901 | 12076634526625632 | 12076634526892192 | 12076634526958564 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 88 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 49408 | 0x7f31b1191700 | 0x7f30a5635bc0 | 398316 | 398316 | 33638 | 3186536 | 65536 | 340782112 | 3013266 | 0 | 1364942872 | 12076634526986887 | 12076634527187231 | 12076634527441950 | 12076634527482839 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 90 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 50368 | 0x7f31b1191600 | 0x7f30a5635c00 | 205276 | 205276 | 21119 | 1642216 | 65536 | 171403607 | 1481515 | 0 | 687438856 | 12076634527516492 | 12076634527715070 | 12076634527841149 | 12076634527844392 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 92 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 51584 | 0x7f31b1191500 | 0x7f30a5635c40 | 220756 | 220756 | 26419 | 1766056 | 65536 | 159827176 | 1589517 | 0 | 641144504 | 12076634527905185 | 12076634528120349 | 12076634528256349 | 12076634528259995 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 94 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 52544 | 0x7f31b1191400 | 0x7f30a5635c80 | 214117 | 214117 | 22063 | 1712944 | 65536 | 148578942 | 1546808 | 0 | 596133776 | 12076634528333842 | 12076634528514748 | 12076634528646268 | 12076634528649429 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 96 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 54272 | 0x7f31b1191300 | 0x7f30a5635cc0 | 414036 | 414036 | 32477 | 3312296 | 65536 | 326939111 | 3126424 | 0 | 1309573220 | 12076634528709702 | 12076634528904027 | 12076634529170587 | 12076634529238565 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 98 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 55488 | 0x7f31b1191a00 | 0x7f30a5635d00 | 389733 | 389733 | 31059 | 3117872 | 65536 | 341729609 | 2953917 | 0 | 1368736252 | 12076634529266878 | 12076634529463066 | 12076634529713785 | 12076634529779682 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 100 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 56704 | 0x7f31b1191900 | 0x7f30a5635d40 | 219156 | 219156 | 22779 | 1753256 | 65536 | 151748826 | 1574610 | 0 | 608822620 | 12076634529804548 | 12076634530000025 | 12076634530134264 | 12076634530137818 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 102 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 58176 | 0x7f31b1191800 | 0x7f30a5635d80 | 223341 | 223341 | 26808 | 1786736 | 65536 | 159849644 | 1611787 | 0 | 641246944 | 12076634530199333 | 12076634530387384 | 12076634530525624 | 12076634530529216 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 104 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 59392 | 0x7f31b1191700 | 0x7f30a5635dc0 | 214917 | 214917 | 25443 | 1719344 | 65536 | 162346752 | 1524728 | 0 | 651208456 | 12076634530601581 | 12076634530778263 | 12076634530908023 | 12076634530911397 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 106 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 61376 | 0x7f31b1191600 | 0x7f30a5635e00 | 408484 | 408484 | 31258 | 3267880 | 65536 | 320329657 | 3092366 | 0 | 1283139824 | 12076634530971860 | 12076634531165142 | 12076634531428822 | 12076634531469916 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 108 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 62592 | 0x7f31b1191500 | 0x7f30a5635e40 | 392756 | 392756 | 33027 | 3142056 | 65536 | 334521193 | 2972456 | 0 | 1339901580 | 12076634531509049 | 12076634531687221 | 12076634531939700 | 12076634532015320 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 110 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 63808 | 0x7f31b1191400 | 0x7f30a5635e80 | 279308 | 279308 | 14600 | 2234472 | 65536 | 144221695 | 1546141 | 0 | 578708168 | 12076634532041669 | 12076634532250260 | 12076634532424979 | 12076634532428349 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 112 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 65280 | 0x7f31b1191300 | 0x7f30a5635ec0 | 226748 | 226748 | 29457 | 1813992 | 65536 | 178093356 | 1602668 | 0 | 714235712 | 12076634532488801 | 12076634532676339 | 12076634532814258 | 12076634532817633 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 114 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 66496 | 0x7f31b1191a00 | 0x7f30a5635f00 | 212613 | 212613 | 24369 | 1700912 | 65536 | 161056055 | 1531647 | 0 | 646065348 | 12076634532892883 | 12076634533076338 | 12076634533207058 | 12076634533210664 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 116 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 68480 | 0x7f31b1191900 | 0x7f30a5635f40 | 412309 | 412309 | 32858 | 3298480 | 65536 | 334313937 | 3122389 | 0 | 1339107212 | 12076634533272058 | 12076634533460977 | 12076634533726416 | 12076634533793017 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 118 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 69696 | 0x7f31b1191800 | 0x7f30a5635f80 | 398371 | 398371 | 32510 | 3186976 | 65536 | 340219487 | 3015834 | 0 | 1362692336 | 12076634533821500 | 12076634534015056 | 12076634534271375 | 12076634534338342 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 120 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 70912 | 0x7f31b1191700 | 0x7f30a5635fc0 | 228188 | 228188 | 26741 | 1825512 | 65536 | 149209986 | 1647545 | 0 | 598707016 | 12076634534364611 | 12076634534562895 | 12076634534702894 | 12076634534706386 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 122 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 72384 | 0x7f31b1191600 | 0x7f30a5636000 | 225708 | 225708 | 27996 | 1805672 | 65536 | 177250903 | 1624430 | 0 | 710843524 | 12076634534768331 | 12076634534956494 | 12076634535096813 | 12076634535100429 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 124 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 73600 | 0x7f31b1191500 | 0x7f30a5636040 | 211220 | 211220 | 22475 | 1689768 | 65536 | 160254032 | 1514284 | 0 | 642854776 | 12076634535174577 | 12076634535353933 | 12076634535483213 | 12076634535486688 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 126 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 75840 | 0x7f31b1191400 | 0x7f30a5636080 | 412789 | 412789 | 34054 | 3302320 | 65536 | 296733323 | 3121128 | 0 | 1188766220 | 12076634535547371 | 12076634535732812 | 12076634535999531 | 12076634536067077 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 128 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 77056 | 0x7f31b1191300 | 0x7f30a56360c0 | 398948 | 398948 | 26692 | 3191592 | 65536 | 249430262 | 3001335 | 0 | 999539228 | 12076634536092825 | 12076634536295051 | 12076634536551850 | 12076634536615818 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 130 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 78272 | 0x7f31b1191a00 | 0x7f30a5636100 | 219732 | 219732 | 21810 | 1757864 | 65536 | 146729488 | 1589753 | 0 | 588782344 | 12076634536640754 | 12076634536837610 | 12076634536973129 | 12076634536976529 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 132 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 80000 | 0x7f31b1191900 | 0x7f30a5636140 | 231853 | 231853 | 26909 | 1854832 | 65536 | 196977740 | 1689369 | 0 | 789781756 | 12076634537038584 | 12076634537226409 | 12076634537370248 | 12076634537373768 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 134 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 81216 | 0x7f31b1191800 | 0x7f30a5636180 | 217245 | 217245 | 22366 | 1737968 | 65536 | 149848899 | 1578330 | 0 | 601235936 | 12076634537446193 | 12076634537621608 | 12076634537755847 | 12076634537759095 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 136 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 83456 | 0x7f31b1191700 | 0x7f30a56361c0 | 412789 | 412789 | 33823 | 3302320 | 65536 | 328378877 | 3136724 | 0 | 1315331968 | 12076634537819016 | 12076634538011047 | 12076634538278246 | 12076634538344273 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 138 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 84928 | 0x7f31b1191600 | 0x7f30a5636200 | 443228 | 443228 | 14224 | 3545832 | 65536 | 352134637 | 2957912 | 0 | 1410361884 | 12076634538370261 | 12076634538570246 | 12076634538857285 | 12076634538923270 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 140 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 86400 | 0x7f31b1191500 | 0x7f30a5636240 | 222836 | 222836 | 24641 | 1782696 | 65536 | 145710036 | 1601147 | 0 | 584707564 | 12076634538947946 | 12076634539151204 | 12076634539287684 | 12076634539291164 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 142 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 88128 | 0x7f31b1191400 | 0x7f30a5636280 | 239925 | 239925 | 25988 | 1919408 | 65536 | 208509762 | 1760336 | 0 | 835909668 | 12076634539352438 | 12076634539539364 | 12076634539689443 | 12076634539692852 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 144 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 89600 | 0x7f31b1191300 | 0x7f30a56362c0 | 222461 | 222461 | 24485 | 1779696 | 65536 | 148799551 | 1614425 | 0 | 597037216 | 12076634539780374 | 12076634539956803 | 12076634540093762 | 12076634540097414 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 146 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 92096 | 0x7f31b1191a00 | 0x7f30a5636300 | 413860 | 413860 | 32816 | 3310888 | 65536 | 342079203 | 3146167 | 0 | 1370187588 | 12076634540157266 | 12076634540344802 | 12076634540613441 | 12076634540679657 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 148 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 93568 | 0x7f31b1191900 | 0x7f30a5636340 | 400508 | 400508 | 30364 | 3204072 | 65536 | 325601236 | 3033729 | 0 | 1304223612 | 12076634540707349 | 12076634540900960 | 12076634541159040 | 12076634541228759 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 150 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 95040 | 0x7f31b1191800 | 0x7f30a5636380 | 215420 | 215420 | 22810 | 1723368 | 65536 | 152585967 | 1559429 | 0 | 612212692 | 12076634541255168 | 12076634541456959 | 12076634541590079 | 12076634541593427 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 152 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 96768 | 0x7f31b1191700 | 0x7f30a56363c0 | 255541 | 255541 | 30610 | 2044336 | 65536 | 222286816 | 1859187 | 0 | 890991504 | 12076634541652958 | 12076634541839998 | 12076634541998398 | 12076634542006746 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 154 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 98240 | 0x7f31b1191600 | 0x7f30a5636400 | 216668 | 216668 | 22070 | 1733352 | 65536 | 155741010 | 1570265 | 0 | 624841512 | 12076634542076425 | 12076634542254077 | 12076634542388157 | 12076634542391301 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 156 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 100736 | 0x7f31b1191500 | 0x7f30a5636440 | 417628 | 417628 | 34074 | 3341032 | 65536 | 333216758 | 3118039 | 0 | 1334683008 | 12076634542450792 | 12076634542640317 | 12076634542910716 | 12076634542957294 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 158 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 102208 | 0x7f31b1191400 | 0x7f30a5636480 | 397324 | 397324 | 30824 | 3178600 | 65536 | 337128771 | 3018503 | 0 | 1350334456 | 12076634542981539 | 12076634543186555 | 12076634543443355 | 12076634543488522 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 160 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 103680 | 0x7f31b1191300 | 0x7f30a56364c0 | 225229 | 225229 | 24290 | 1801840 | 65536 | 142448607 | 1626904 | 0 | 571637828 | 12076634543530180 | 12076634543704954 | 12076634543843994 | 12076634543847129 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 162 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 105664 | 0x7f31b1191a00 | 0x7f30a5636500 | 262797 | 262797 | 28081 | 2102384 | 65536 | 233830271 | 1940837 | 0 | 937170592 | 12076634543909976 | 12076634544116153 | 12076634544281593 | 12076634544284773 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 164 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 107136 | 0x7f31b1191900 | 0x7f30a5636540 | 221141 | 221141 | 24416 | 1769136 | 65536 | 144513711 | 1597678 | 0 | 579896612 | 12076634544358721 | 12076634544537752 | 12076634544673752 | 12076634544677273 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 166 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 109888 | 0x7f31b1191800 | 0x7f30a5636580 | 408485 | 408485 | 36448 | 3267888 | 65536 | 324005800 | 3077103 | 0 | 1297869708 | 12076634544736423 | 12076634544931832 | 12076634545196471 | 12076634545265778 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 168 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 111360 | 0x7f31b1191700 | 0x7f30a56365c0 | 398084 | 398084 | 38085 | 3184680 | 65536 | 335870488 | 2989272 | 0 | 1345296084 | 12076634545291696 | 12076634545487190 | 12076634545741750 | 12076634545808177 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 170 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 112832 | 0x7f31b1191600 | 0x7f30a5636600 | 225389 | 225389 | 23366 | 1803120 | 65536 | 143201439 | 1633713 | 0 | 574668904 | 12076634545835267 | 12076634546034069 | 12076634546174229 | 12076634546177504 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 172 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 114816 | 0x7f31b1191500 | 0x7f30a5636640 | 274788 | 274788 | 30015 | 2198312 | 65536 | 245834907 | 2034390 | 0 | 985205140 | 12076634546240341 | 12076634546426388 | 12076634546599348 | 12076634546602735 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 174 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 116288 | 0x7f31b1191400 | 0x7f30a5636680 | 212845 | 212845 | 21768 | 1702768 | 65536 | 154990814 | 1519836 | 0 | 621814824 | 12076634546674859 | 12076634546851027 | 12076634546982387 | 12076634546985697 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 176 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 119040 | 0x7f31b1191300 | 0x7f30a56366c0 | 406924 | 406924 | 36478 | 3255400 | 65536 | 315620737 | 3056185 | 0 | 1264314736 | 12076634547046330 | 12076634547239506 | 12076634547502386 | 12076634547567880 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 178 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 120768 | 0x7f31b1191a00 | 0x7f30a5636700 | 395605 | 395605 | 32088 | 3164848 | 65536 | 318104197 | 2984256 | 0 | 1274235812 | 12076634547596163 | 12076634547788305 | 12076634548044305 | 12076634548112944 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 180 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 122496 | 0x7f31b1191900 | 0x7f30a5636740 | 228092 | 228092 | 25336 | 1824744 | 65536 | 143630960 | 1655089 | 0 | 576381056 | 12076634548138692 | 12076634548339344 | 12076634548480304 | 12076634548483744 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 182 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 124736 | 0x7f31b1191800 | 0x7f30a5636780 | 288765 | 288765 | 29964 | 2310128 | 65536 | 259925815 | 2149141 | 0 | 1041559524 | 12076634548543705 | 12076634548728143 | 12076634548911663 | 12076634548915016 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 184 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 126464 | 0x7f31b1191700 | 0x7f30a56367c0 | 226421 | 226421 | 25398 | 1811376 | 65536 | 145020314 | 1625891 | 0 | 581981220 | 12076634549021955 | 12076634549203822 | 12076634549343342 | 12076634549347000 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 186 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 20 | 24 | 127936 | 0x7f31b1191600 | 0x7f30a5636800 | 411885 | 411885 | 36923 | 3295088 | 65536 | 377379464 | 3118343 | 0 | 1511338396 | 12076634549406931 | 12076634549594221 | 12076634549861421 | 12076634549902813 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 188 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 129664 | 0x7f31b1191500 | 0x7f30a5636840 | 400772 | 400772 | 29019 | 3206184 | 65536 | 270661366 | 3012149 | 0 | 1084466864 | 12076634549939552 | 12076634550131340 | 12076634550390059 | 12076634550456674 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 190 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 131392 | 0x7f31b1191400 | 0x7f30a5636880 | 223493 | 223493 | 28641 | 1787952 | 65536 | 156747337 | 1596842 | 0 | 628841760 | 12076634550483403 | 12076634550681099 | 12076634550818218 | 12076634550821683 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 192 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 133632 | 0x7f31b1191300 | 0x7f30a56368c0 | 300748 | 300748 | 31432 | 2405992 | 65536 | 272500640 | 2248458 | 0 | 1091849984 | 12076634550885241 | 12076634551073738 | 12076634551264457 | 12076634551332072 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 194 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 135360 | 0x7f31b1191a00 | 0x7f30a5636900 | 221837 | 221837 | 24841 | 1774704 | 65536 | 154654268 | 1601667 | 0 | 620482812 | 12076634551376685 | 12076634551554377 | 12076634551691497 | 12076634551694897 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 196 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 20 | 24 | 136832 | 0x7f31b1191900 | 0x7f30a5636940 | 436893 | 436893 | 39784 | 3495152 | 65536 | 402285022 | 3324829 | 0 | 1610960776 | 12076634551755520 | 12076634551941416 | 12076634552225095 | 12076634552292028 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 198 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 138560 | 0x7f31b1191800 | 0x7f30a5636980 | 400429 | 400429 | 33584 | 3203440 | 65536 | 329960243 | 3008473 | 0 | 1321683344 | 12076634552320531 | 12076634552515655 | 12076634552773094 | 12076634552839366 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 200 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 140288 | 0x7f31b1191700 | 0x7f30a56369c0 | 220708 | 220708 | 27469 | 1765672 | 65536 | 151431805 | 1583632 | 0 | 607560160 | 12076634552864162 | 12076634553076133 | 12076634553212453 | 12076634553215966 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 202 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 142784 | 0x7f31b1191600 | 0x7f30a5636a00 | 330308 | 330308 | 32883 | 2642472 | 65536 | 300457787 | 2479819 | 0 | 1203689148 | 12076634553276469 | 12076634553467493 | 12076634553678052 | 12076634553744239 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 204 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 144768 | 0x7f31b1191500 | 0x7f30a5636a40 | 223524 | 223524 | 23556 | 1788200 | 65536 | 157776974 | 1611436 | 0 | 632997452 | 12076634553780316 | 12076634553964291 | 12076634554102531 | 12076634554106102 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 206 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 20 | 24 | 145728 | 0x7f31b1191400 | 0x7f30a5636a80 | 470173 | 470173 | 40408 | 3761392 | 65536 | 437901688 | 3595251 | 0 | 1753426996 | 12076634554166254 | 12076634554354531 | 12076634554661730 | 12076634554727688 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 208 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 147712 | 0x7f31b1191300 | 0x7f30a5636ac0 | 405245 | 405245 | 34161 | 3241968 | 65536 | 335140714 | 3063259 | 0 | 1342389428 | 12076634554754929 | 12076634554951169 | 12076634555213249 | 12076634555279745 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 210 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 149696 | 0x7f31b1191a00 | 0x7f30a5636b00 | 225836 | 225836 | 22182 | 1806696 | 65536 | 154006034 | 1634745 | 0 | 617898908 | 12076634555304291 | 12076634555503466 | 12076634555643946 | 12076634555651266 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 212 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 152192 | 0x7f31b1191900 | 0x7f30a5636b40 | 356653 | 356653 | 35781 | 2853232 | 65536 | 325389722 | 2683365 | 0 | 1303415364 | 12076634555706769 | 12076634555893704 | 12076634556121223 | 12076634556189638 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 214 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 154176 | 0x7f31b1191800 | 0x7f30a5636b80 | 218700 | 218700 | 25714 | 1749608 | 65536 | 154046907 | 1576350 | 0 | 618063224 | 12076634556229913 | 12076634556409383 | 12076634556544902 | 12076634556548375 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 216 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 20 | 24 | 155648 | 0x7f31b1191700 | 0x7f30a5636bc0 | 503797 | 503797 | 43958 | 4030384 | 65536 | 471894300 | 3865747 | 0 | 1889397832 | 12076634556611292 | 12076634556801222 | 12076634557130821 | 12076634557200308 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 218 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 157632 | 0x7f31b1191600 | 0x7f30a5636c00 | 407340 | 407340 | 28640 | 3258728 | 65536 | 312702803 | 3052368 | 0 | 1252629116 | 12076634557226436 | 12076634557424421 | 12076634557688420 | 12076634557729282 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 220 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 159616 | 0x7f31b1191500 | 0x7f30a5636c40 | 226132 | 226132 | 25877 | 1809064 | 65536 | 153078598 | 1635564 | 0 | 614185492 | 12076634557772462 | 12076634557950339 | 12076634558090179 | 12076634558093649 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 222 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 162368 | 0x7f31b1191400 | 0x7f30a5636c80 | 381924 | 381924 | 36449 | 3055400 | 65536 | 349844373 | 2887948 | 0 | 1401225252 | 12076634558156556 | 12076634558340738 | 12076634558586178 | 12076634558653100 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 224 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 164608 | 0x7f31b1191300 | 0x7f30a5636cc0 | 225636 | 225636 | 28452 | 1805096 | 65536 | 163004818 | 1620463 | 0 | 653901864 | 12076634558689598 | 12076634558868257 | 12076634559008577 | 12076634559012278 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 226 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 20 | 24 | 166336 | 0x7f31b1191a00 | 0x7f30a5636d00 | 575053 | 575053 | 46909 | 4600432 | 65536 | 542304783 | 4444623 | 0 | 2171037532 | 12076634559063984 | 12076634559260256 | 12076634559638336 | 12076634559704396 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 228 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 168576 | 0x7f31b1191900 | 0x7f30a5636d40 | 407580 | 407580 | 33723 | 3260648 | 65536 | 300540763 | 3069434 | 0 | 1203998740 | 12076634559729943 | 12076634559931455 | 12076634560195934 | 12076634560262333 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 230 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 170816 | 0x7f31b1191800 | 0x7f30a5636d80 | 221692 | 221692 | 25854 | 1773544 | 65536 | 161817687 | 1597624 | 0 | 649149348 | 12076634560287300 | 12076634560486494 | 12076634560624573 | 12076634560628034 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 232 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 174080 | 0x7f31b1191700 | 0x7f30a5636dc0 | 479652 | 479652 | 16068 | 3837224 | 65536 | 400919451 | 3297056 | 0 | 1605528676 | 12076634560680361 | 12076634560872413 | 12076634561182652 | 12076634561253146 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 234 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 176576 | 0x7f31b1191600 | 0x7f30a5636e00 | 226229 | 226229 | 27604 | 1809840 | 65536 | 171830292 | 1631065 | 0 | 689219544 | 12076634561291798 | 12076634561482171 | 12076634561622011 | 12076634561625890 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 236 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 20 | 24 | 177792 | 0x7f31b1191500 | 0x7f30a5636e40 | 642308 | 642308 | 49986 | 5138472 | 65536 | 610109508 | 4982908 | 0 | 2442259024 | 12076634561686523 | 12076634561873050 | 12076634562297370 | 12076634562320632 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 238 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 180288 | 0x7f31b1191400 | 0x7f30a5636e80 | 405085 | 405085 | 38149 | 3240688 | 65536 | 316171611 | 3009971 | 0 | 1266529976 | 12076634562375825 | 12076634562555289 | 12076634562816408 | 12076634562837233 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 240 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 182784 | 0x7f31b1191300 | 0x7f30a5636ec0 | 224084 | 224084 | 25472 | 1792680 | 65536 | 181419219 | 1619155 | 0 | 727554372 | 12076634562893057 | 12076634563075768 | 12076634563215447 | 12076634563219103 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 242 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 186304 | 0x7f31b1191a00 | 0x7f30a5636f00 | 486588 | 486588 | 42862 | 3892712 | 65536 | 454898081 | 3728515 | 0 | 1821450016 | 12076634563280988 | 12076634563480887 | 12076634563795926 | 12076634563817416 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 244 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 189312 | 0x7f31b1191900 | 0x7f30a5636f40 | 242541 | 242541 | 26466 | 1940336 | 65536 | 213332854 | 1779150 | 0 | 855228180 | 12076634563892296 | 12076634564077046 | 12076634564229205 | 12076634564232879 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 246 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 20 | 24 | 190784 | 0x7f31b1191800 | 0x7f30a5636f80 | 784108 | 784108 | 60015 | 6272872 | 65536 | 750338005 | 6105956 | 0 | 3003173004 | 12076634564285056 | 12076634564488245 | 12076634565005523 | 12076634565048666 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 248 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 193792 | 0x7f31b1191700 | 0x7f30a5636fc0 | 462325 | 462325 | 41381 | 3698608 | 65536 | 395240212 | 3527463 | 0 | 1582802604 | 12076634565094482 | 12076634565274643 | 12076634565577202 | 12076634565643924 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 250 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 196800 | 0x7f31b1191600 | 0x7f30a5637000 | 246109 | 246109 | 27784 | 1968880 | 65536 | 217168109 | 1806572 | 0 | 870553892 | 12076634565678829 | 12076634565865202 | 12076634566019761 | 12076634566023700 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 252 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 201088 | 0x7f31b1191500 | 0x7f30a5637040 | 592933 | 592933 | 48021 | 4743472 | 65536 | 560063963 | 4582944 | 0 | 2242112928 | 12076634566085235 | 12076634566269841 | 12076634566656240 | 12076634566723241 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 254 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 204608 | 0x7f31b1191400 | 0x7f30a5637080 | 280285 | 280285 | 32285 | 2242288 | 65536 | 249208230 | 2057641 | 0 | 998719992 | 12076634566764228 | 12076634566942639 | 12076634567118159 | 12076634567121853 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 256 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 20 | 24 | 206080 | 0x7f31b1191300 | 0x7f30a56370c0 | 941756 | 941756 | 14101 | 7534056 | 65536 | 890432974 | 7231462 | 0 | 3563553184 | 12076634567184139 | 12076634567381838 | 12076634568006157 | 12076634568057143 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 258 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 209600 | 0x7f31b1191a00 | 0x7f30a5637100 | 507660 | 507660 | 43040 | 4061288 | 65536 | 461305602 | 3890339 | 0 | 1847067668 | 12076634568083071 | 12076634568279756 | 12076634568611435 | 12076634568675082 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 260 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 213120 | 0x7f31b1191900 | 0x7f30a5637140 | 280405 | 280405 | 28596 | 2243248 | 65536 | 253336450 | 2087346 | 0 | 1015233064 | 12076634568699477 | 12076634568894155 | 12076634569072874 | 12076634569076559 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 262 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 218176 | 0x7f31b1191800 | 0x7f30a5637180 | 697597 | 697597 | 54537 | 5580784 | 65536 | 664191411 | 5417658 | 0 | 2658618900 | 12076634569137282 | 12076634569324714 | 12076634569780713 | 12076634569820142 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 264 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 222208 | 0x7f31b1191700 | 0x7f30a56371c0 | 311908 | 311908 | 30116 | 2495272 | 65536 | 284878944 | 2343878 | 0 | 1141402000 | 12076634569871588 | 12076634570055592 | 12076634570255592 | 12076634570322897 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 266 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 20 | 24 | 223936 | 0x7f31b1191600 | 0x7f30a5637200 | 1074565 | 1074565 | 15319 | 8596528 | 65536 | 1026825089 | 8361806 | 0 | 4109121200 | 12076634570348595 | 12076634570543431 | 12076634571254629 | 12076634571321024 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 268 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 227968 | 0x7f31b1191500 | 0x7f30a5637240 | 569653 | 569653 | 48021 | 4557232 | 65536 | 530371934 | 4383864 | 0 | 2123328560 | 12076634571346331 | 12076634571549829 | 12076634571923908 | 12076634571989848 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 270 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 232000 | 0x7f31b1191400 | 0x7f30a5637280 | 317245 | 317245 | 31036 | 2537968 | 65536 | 289326641 | 2381524 | 0 | 1159204808 | 12076634572019904 | 12076634572227907 | 12076634572431907 | 12076634572500057 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 272 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 237824 | 0x7f31b1191300 | 0x7f30a56372c0 | 802213 | 802213 | 60272 | 6417712 | 65536 | 768040034 | 6253577 | 0 | 3074015504 | 12076634572524202 | 12076634572719586 | 12076634573245505 | 12076634573312158 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 274 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 242368 | 0x7f31b1191a00 | 0x7f30a5637300 | 347797 | 347797 | 32627 | 2782384 | 65536 | 318789642 | 2623936 | 0 | 1277049252 | 12076634573353665 | 12076634573529664 | 12076634573753184 | 12076634573818800 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 276 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 20 | 24 | 243584 | 0x7f31b1191900 | 0x7f30a5637340 | 1208453 | 1208453 | 86374 | 9667632 | 65536 | 1168332366 | 9488416 | 0 | 4675150496 | 12076634573846241 | 12076634574053983 | 12076634574853821 | 12076634574922784 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 278 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 248128 | 0x7f31b1191800 | 0x7f30a5637380 | 638796 | 638796 | 50862 | 5110376 | 65536 | 604054320 | 4952300 | 0 | 2418060228 | 12076634574954663 | 12076634575159741 | 12076634575582300 | 12076634575648644 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 280 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 252672 | 0x7f31b1191700 | 0x7f30a56373c0 | 353580 | 353580 | 33948 | 2828648 | 65536 | 324112464 | 2665953 | 0 | 1298340420 | 12076634575673781 | 12076634575870139 | 12076634576097178 | 12076634576165074 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 282 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 259264 | 0x7f31b1191600 | 0x7f30a5637400 | 907972 | 907972 | 66668 | 7263784 | 65536 | 871808204 | 7100172 | 0 | 3489082580 | 12076634576192956 | 12076634576392698 | 12076634576989176 | 12076634577055932 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 284 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 264832 | 0x7f31b1191500 | 0x7f30a5637440 | 420029 | 420029 | 39580 | 3360240 | 65536 | 386607378 | 3170786 | 0 | 1548333812 | 12076634577092850 | 12076634577279576 | 12076634577549015 | 12076634577615372 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 286 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 20 | 24 | 266304 | 0x7f31b1191400 | 0x7f30a5637480 | 1486188 | 1486188 | 99240 | 11889512 | 65536 | 1446440968 | 11735967 | 0 | 5787585524 | 12076634577639457 | 12076634577835895 | 12076634578822292 | 12076634578892327 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 288 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 28 | 24 | 271872 | 0x7f31b1191300 | 0x7f30a56374c0 | 779469 | 779469 | 62018 | 6235760 | 65536 | 741190979 | 6050695 | 0 | 2966600364 | 12076634578918406 | 12076634579119252 | 12076634579634931 | 12076634579684581 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 290 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 277440 | 0x7f31b1191a00 | 0x7f30a5637500 | 421452 | 421452 | 38444 | 3371624 | 65536 | 390033242 | 3211048 | 0 | 1562019048 | 12076634579709507 | 12076634579904850 | 12076634580177649 | 12076634580247919 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 292 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 12 | 24 | 279936 | 0x7f31b1191900 | 0x7f30a5637540 | 1125589 | 1125589 | 79521 | 9004720 | 65536 | 1088541766 | 8842982 | 0 | 4355992024 | 12076634580276031 | 12076634580473649 | 12076634581212847 | 12076634581268868 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 294 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 286528 | 0x7f31b1191800 | 0x7f30a5637580 | 487197 | 487197 | 42516 | 3897584 | 65536 | 454902748 | 3736289 | 0 | 1821503676 | 12076634581309874 | 12076634581489006 | 12076634581805326 | 12076634581854848 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 296 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 20 | 24 | 288000 | 0x7f31b1191700 | 0x7f30a56375c0 | 1768453 | 1768453 | 117917 | 14147632 | 65536 | 1724990770 | 13979184 | 0 | 6901783928 | 12076634581883371 | 12076634582092845 | 12076634583267082 | 12076634583563887 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 298 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 32 | 24 | 294592 | 0x7f31b1191600 | 0x7f30a5637600 | 918692 | 918692 | 64768 | 7349544 | 65536 | 807675935 | 7172718 | 0 | 3232557552 | 12076634583598371 | 12076634583798721 | 12076634584410400 | 12076634584459012 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 300 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 301184 | 0x7f31b1191500 | 0x7f30a5637640 | 495188 | 495188 | 44861 | 3961512 | 65536 | 462565391 | 3791743 | 0 | 1852172552 | 12076634584494178 | 12076634584679519 | 12076634585000798 | 12076634585094735 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 302 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 12 | 24 | 304960 | 0x7f31b1191400 | 0x7f30a5637680 | 1336324 | 1336324 | 91200 | 10690600 | 65536 | 1298621629 | 10526806 | 0 | 5196313352 | 12076634585122847 | 12076634585329598 | 12076634586208316 | 12076634586259401 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 304 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 24 | 24 | 313600 | 0x7f31b1191300 | 0x7f30a56376c0 | 630380 | 630380 | 53869 | 5043048 | 65536 | 590581195 | 4863819 | 0 | 2364224928 | 12076634586306950 | 12076634586488795 | 12076634586899354 | 12076634586946790 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 306 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 20 | 24 | 314816 | 0x7f31b1191a00 | 0x7f30a5637700 | 2331940 | 2331940 | 149601 | 18655528 | 65536 | 2284557933 | 18495718 | 0 | 9140053388 | 12076634586979761 | 12076634587190394 | 12076634588740950 | 12076634588791461 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 308 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 20 | 24 | 317568 | 0x7f31b1191900 | 0x7f30a5637740 | 1199868 | 1199868 | 85074 | 9598952 | 65536 | 1161498246 | 9433358 | 0 | 4647813612 | 12076634588825635 | 12076634589030229 | 12076634589826548 | 12076634589875217 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 310 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 12 | 24 | 320320 | 0x7f31b1191800 | 0x7f30a5637780 | 639124 | 639124 | 50051 | 5113000 | 65536 | 608084295 | 4952297 | 0 | 2434171872 | 12076634589900133 | 12076634590105267 | 12076634590520306 | 12076634590569148 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 312 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 12 | 24 | 324096 | 0x7f31b1191700 | 0x7f30a56377c0 | 1757892 | 1757892 | 117158 | 14063144 | 65536 | 1714873517 | 13896555 | 0 | 6861319168 | 12076634590598723 | 12076634590790705 | 12076634591950063 | 12076634592004989 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 314 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 12 | 24 | 328896 | 0x7f31b1191600 | 0x7f30a5637800 | 1197780 | 1197780 | 82504 | 9582248 | 65536 | 1162231712 | 9420247 | 0 | 4650752948 | 12076634592045584 | 12076634592226382 | 12076634593012780 | 12076634593059921 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 316 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 20 | 24 | 330112 | 0x7f31b1191500 | 0x7f30a5637840 | 4583812 | 4583812 | 211273 | 36670504 | 65536 | 4515494636 | 36504397 | 0 | 18063799832 | 12076634593107028 | 12076634593282380 | 12076634596337893 | 12076634596388553 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 318 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 20 | 24 | 332864 | 0x7f31b1191400 | 0x7f30a5637880 | 2324436 | 2324436 | 151493 | 18595496 | 65536 | 2275127854 | 18421263 | 0 | 9102333000 | 12076634596420652 | 12076634596617732 | 12076634598164129 | 12076634598235348 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 320 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 12 | 24 | 335616 | 0x7f31b1191300 | 0x7f30a56378c0 | 1201508 | 1201508 | 82495 | 9612072 | 65536 | 1164122494 | 9454092 | 0 | 4658313948 | 12076634598267928 | 12076634598462688 | 12076634599253246 | 12076634599321308 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 322 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 12 | 24 | 339392 | 0x7f31b1191a00 | 0x7f30a5637900 | 3443268 | 3443268 | 214620 | 27546152 | 65536 | 3385357309 | 27381420 | 0 | 13543259488 | 12076634599342307 | 12076634599558206 | 12076634601841081 | 12076634601909913 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 324 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 12 | 24 | 344192 | 0x7f31b1191900 | 0x7f30a5637940 | 2323188 | 2323188 | 149197 | 18585512 | 65536 | 2274824901 | 18413418 | 0 | 9101128852 | 12076634601955628 | 12076634602146200 | 12076634603681556 | 12076634603750837 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 326 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 20 | 24 | 345408 | 0x7f31b1191800 | 0x7f30a5637980 | 9085596 | 9085596 | 472686 | 72684776 | 65536 | 8976560945 | 72523826 | 0 | 35908064460 | 12076634603782336 | 12076634603982356 | 12076634610042502 | 12076634610114464 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 328 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 20 | 24 | 348160 | 0x7f31b1191700 | 0x7f30a56379c0 | 4572572 | 4572572 | 211846 | 36580584 | 65536 | 4502571004 | 36416224 | 0 | 18012108720 | 12076634610150401 | 12076634610350021 | 12076634613397535 | 12076634613464596 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 330 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 12 | 24 | 350912 | 0x7f31b1191600 | 0x7f30a5637a00 | 2327092 | 2327092 | 149864 | 18616744 | 65536 | 2280060719 | 18455117 | 0 | 9122067764 | 12076634613497297 | 12076634613694974 | 12076634615234810 | 12076634615302405 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 332 | 937062 | 937067 | 4194304 | 256 | 0 | 0 | 12 | 24 | 0 | 0x7f31b1191500 | 0x7f30a5637a40 | 6811828 | 6811828 | 339834 | 54494632 | 65536 | 6722926938 | 54334488 | 0 | 26893535824 | 12076634615338832 | 12076634615533667 | 12076634620063098 | 12076634620137478 |