50 KiB
50 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | GRBM_COUNT | GRBM_GUI_ACTIVE | CPC_ME1_BUSY_FOR_PACKET_DECODE | SQ_CYCLES | SQ_WAVES | SQ_WAVE_CYCLES | SQ_BUSY_CYCLES | SQ_LEVEL_WAVES | SQ_ACCUM_PREV_HIRES | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBuffer.kd | 0 | 0 | 0 | 926521 | 926526 | 33554432 | 256 | 0 | 0 | 8 | 32 | 6464 | 0x0 | 0x7fce83a04180 | 503699 | 503699 | 17355 | 4029600 | 524288 | 366928723 | 3829704 | 0 | 1482516916 | 12076421986929943 | 12076422230152268 | 12076422230476426 | 12076422230582908 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 926521 | 926526 | 32768 | 256 | 0 | 0 | 24 | 24 | 12480 | 0x0 | 0x7fce83a35100 | 27109 | 27109 | 19731 | 216880 | 512 | 1101208 | 73761 | 0 | 4419008 | 12076422245266393 | 12076422245592863 | 12076422245598943 | 12076422245607868 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 4 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 12928 | 0x7fcfb3b9d900 | 0x7fce83a35140 | 225780 | 225780 | 18089 | 1806248 | 65536 | 121244525 | 1637237 | 0 | 486787396 | 12076422245672528 | 12076422245913181 | 12076422246051260 | 12076422246055560 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 6 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 36 | 24 | 13632 | 0x7fcfb3b9d800 | 0x7fce83a35180 | 398716 | 398716 | 25976 | 3189736 | 65536 | 266456349 | 3024405 | 0 | 1067638612 | 12076422246135248 | 12076422246327739 | 12076422246581337 | 12076422246650806 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 8 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 14080 | 0x7fcfb3b9d700 | 0x7fce83a351c0 | 403333 | 403333 | 34035 | 3226672 | 65536 | 354894109 | 3067944 | 0 | 1421388888 | 12076422246685390 | 12076422246892536 | 12076422247149334 | 12076422247218571 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 10 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14528 | 0x7fcfb3b9d600 | 0x7fce83a35200 | 227348 | 227348 | 20828 | 1818792 | 65536 | 125536939 | 1651001 | 0 | 503954456 | 12076422247241934 | 12076422247460533 | 12076422247599412 | 12076422247603316 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 12 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14976 | 0x7fcfb3b9d500 | 0x7fce83a35240 | 229261 | 229261 | 21467 | 1834096 | 65536 | 123251360 | 1671416 | 0 | 494816324 | 12076422247662826 | 12076422247855090 | 12076422247995890 | 12076422247999492 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 14 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 15424 | 0x7fcfb3b9d400 | 0x7fce83a35280 | 216188 | 216188 | 21598 | 1729512 | 65536 | 145299013 | 1563103 | 0 | 583013608 | 12076422248090922 | 12076422248277008 | 12076422248408847 | 12076422248412529 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 16 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16128 | 0x7fcfb3b9d300 | 0x7fce83a352c0 | 400557 | 400557 | 25434 | 3204464 | 65536 | 292496940 | 3032207 | 0 | 1171806292 | 12076422248470136 | 12076422248678126 | 12076422248933165 | 12076422248974774 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 18 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16832 | 0x7fcfb3b9da00 | 0x7fce83a35300 | 396692 | 396692 | 33873 | 3173544 | 65536 | 335003606 | 3013158 | 0 | 1341828416 | 12076422249014638 | 12076422249212683 | 12076422249465162 | 12076422249531549 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 20 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 17536 | 0x7fcfb3b9d900 | 0x7fce83a35340 | 227253 | 227253 | 23237 | 1818032 | 65536 | 134136813 | 1649495 | 0 | 538355480 | 12076422249555864 | 12076422249761640 | 12076422249900519 | 12076422249904331 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 22 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18240 | 0x7fcfb3b9d800 | 0x7fce83a35380 | 226692 | 226692 | 21732 | 1813544 | 65536 | 120696594 | 1647371 | 0 | 484596232 | 12076422249962099 | 12076422250165318 | 12076422250304037 | 12076422250307861 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 24 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18944 | 0x7fcfb3b9d700 | 0x7fce83a353c0 | 214389 | 214389 | 21576 | 1715120 | 65536 | 137992393 | 1551183 | 0 | 553787104 | 12076422250381688 | 12076422250562276 | 12076422250693315 | 12076422250696974 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 26 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 19904 | 0x7fcfb3b9d600 | 0x7fce83a35400 | 414773 | 414773 | 24896 | 3318192 | 65536 | 254332233 | 3139003 | 0 | 1019146680 | 12076422250755613 | 12076422250948994 | 12076422251212992 | 12076422251280318 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 28 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 20608 | 0x7fcfb3b9d500 | 0x7fce83a35440 | 402948 | 402948 | 31100 | 3223592 | 65536 | 345311611 | 3060797 | 0 | 1383060980 | 12076422251306146 | 12076422251518591 | 12076422251775389 | 12076422251840920 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 30 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 21312 | 0x7fcfb3b9d400 | 0x7fce83a35480 | 215981 | 215981 | 22105 | 1727856 | 65536 | 147538715 | 1562274 | 0 | 591974556 | 12076422251866167 | 12076422252081787 | 12076422252213787 | 12076422252217640 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 32 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22016 | 0x7fcfb3b9d300 | 0x7fce83a354c0 | 227301 | 227301 | 21853 | 1818416 | 65536 | 129902661 | 1652435 | 0 | 521419652 | 12076422252277842 | 12076422252471865 | 12076422252610905 | 12076422252614548 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 34 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22720 | 0x7fcfb3b9da00 | 0x7fce83a35500 | 214676 | 214676 | 24068 | 1717416 | 65536 | 151564306 | 1542217 | 0 | 608077676 | 12076422252688154 | 12076422252865143 | 12076422252995703 | 12076422252999373 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 36 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 23680 | 0x7fcfb3b9d900 | 0x7fce83a35540 | 407717 | 407717 | 30268 | 3261744 | 65536 | 271756611 | 3087787 | 0 | 1088842252 | 12076422253058502 | 12076422253257781 | 12076422253518100 | 12076422253584109 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 38 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 24384 | 0x7fcfb3b9d800 | 0x7fce83a35580 | 401724 | 401724 | 31754 | 3213800 | 65536 | 319868522 | 3053462 | 0 | 1281290176 | 12076422253608695 | 12076422253810898 | 12076422254067217 | 12076422254136005 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 40 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25088 | 0x7fcfb3b9d700 | 0x7fce83a355c0 | 208093 | 208093 | 22267 | 1664752 | 65536 | 158231025 | 1498711 | 0 | 634748272 | 12076422254161963 | 12076422254367855 | 12076422254494734 | 12076422254498488 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 42 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25792 | 0x7fcfb3b9d600 | 0x7fce83a35600 | 214588 | 214588 | 23034 | 1716712 | 65536 | 159211605 | 1544541 | 0 | 638671756 | 12076422254559091 | 12076422254757453 | 12076422254888652 | 12076422254892280 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 44 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 26496 | 0x7fcfb3b9d500 | 0x7fce83a35640 | 221509 | 221509 | 23535 | 1772080 | 65536 | 140129597 | 1602848 | 0 | 562331880 | 12076422254964895 | 12076422255150571 | 12076422255286090 | 12076422255289959 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 46 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 27712 | 0x7fcfb3b9d400 | 0x7fce83a35680 | 409749 | 409749 | 25632 | 3278000 | 65536 | 260767049 | 3102581 | 0 | 1044886584 | 12076422255349269 | 12076422255540329 | 12076422255801127 | 12076422255866781 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 48 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 28416 | 0x7fcfb3b9d300 | 0x7fce83a356c0 | 397045 | 397045 | 32320 | 3176368 | 65536 | 354677251 | 3014095 | 0 | 1420525360 | 12076422255890675 | 12076422256096646 | 12076422256349924 | 12076422256415911 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 50 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 29120 | 0x7fcfb3b9da00 | 0x7fce83a35700 | 215413 | 215413 | 22699 | 1723312 | 65536 | 157612957 | 1556141 | 0 | 632267272 | 12076422256440236 | 12076422256640483 | 12076422256771842 | 12076422256775660 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 52 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30080 | 0x7fcfb3b9d900 | 0x7fce83a35740 | 216860 | 216860 | 22611 | 1734888 | 65536 | 145357817 | 1570009 | 0 | 583244976 | 12076422256836152 | 12076422257030241 | 12076422257163200 | 12076422257166957 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 54 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30784 | 0x7fcfb3b9d800 | 0x7fce83a35780 | 213045 | 213045 | 23853 | 1704368 | 65536 | 153537369 | 1537736 | 0 | 615969060 | 12076422257239451 | 12076422257415838 | 12076422257546078 | 12076422257549848 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 56 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32000 | 0x7fcfb3b9d700 | 0x7fce83a357c0 | 391252 | 391252 | 31855 | 3130024 | 65536 | 347972870 | 2968370 | 0 | 1393710848 | 12076422257606954 | 12076422257799996 | 12076422258050555 | 12076422258120719 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 58 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32960 | 0x7fcfb3b9d600 | 0x7fce83a35800 | 391917 | 391917 | 32586 | 3135344 | 65536 | 357088180 | 2974460 | 0 | 1430171152 | 12076422258145435 | 12076422258346713 | 12076422258597112 | 12076422258663908 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 60 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 33920 | 0x7fcfb3b9d500 | 0x7fce83a35840 | 219165 | 219165 | 23836 | 1753328 | 65536 | 147783216 | 1582085 | 0 | 592948824 | 12076422258686580 | 12076422258885590 | 12076422259019990 | 12076422259023757 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 62 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 34880 | 0x7fcfb3b9d400 | 0x7fce83a35880 | 230476 | 230476 | 22194 | 1843816 | 65536 | 139690028 | 1682228 | 0 | 560580832 | 12076422259081945 | 12076422259272788 | 12076422259414868 | 12076422259418511 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 64 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 35840 | 0x7fcfb3b9d300 | 0x7fce83a358c0 | 224397 | 224397 | 24500 | 1795184 | 65536 | 139326035 | 1628540 | 0 | 559121308 | 12076422259490574 | 12076422259667506 | 12076422259804785 | 12076422259808826 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 66 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 37312 | 0x7fcfb3b9da00 | 0x7fce83a35900 | 406772 | 406772 | 34419 | 3254184 | 65536 | 334317077 | 3081931 | 0 | 1339083372 | 12076422259869829 | 12076422260058704 | 12076422260318543 | 12076422260386028 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 68 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 38272 | 0x7fcfb3b9d900 | 0x7fce83a35940 | 404924 | 404924 | 31362 | 3239400 | 65536 | 230520085 | 3053461 | 0 | 923897596 | 12076422260411085 | 12076422260613261 | 12076422260871660 | 12076422260938565 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 70 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 39232 | 0x7fcfb3b9d800 | 0x7fce83a35980 | 212860 | 212860 | 22476 | 1702888 | 65536 | 161996899 | 1533777 | 0 | 649810240 | 12076422260962009 | 12076422261166858 | 12076422261296937 | 12076422261300798 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 72 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 40192 | 0x7fcfb3b9d700 | 0x7fce83a359c0 | 223429 | 223429 | 25313 | 1787440 | 65536 | 146066002 | 1619218 | 0 | 586102212 | 12076422261361050 | 12076422261549256 | 12076422261686855 | 12076422261690703 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 74 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 41152 | 0x7fcfb3b9d600 | 0x7fce83a35a00 | 215341 | 215341 | 22757 | 1722736 | 65536 | 150797450 | 1555333 | 0 | 605010484 | 12076422261762406 | 12076422261935814 | 12076422262067653 | 12076422262071470 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 76 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 42624 | 0x7fcfb3b9d500 | 0x7fce83a35a40 | 474748 | 474748 | 16975 | 3797992 | 65536 | 293534388 | 3130580 | 0 | 1175955920 | 12076422262124910 | 12076422262319012 | 12076422262623810 | 12076422262691583 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 78 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 43584 | 0x7fcfb3b9d400 | 0x7fce83a35a80 | 403108 | 403108 | 33932 | 3224872 | 65536 | 347697388 | 3043075 | 0 | 1392606100 | 12076422262714906 | 12076422262912929 | 12076422263169887 | 12076422263238118 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 80 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 44544 | 0x7fcfb3b9d300 | 0x7fce83a35ac0 | 218532 | 218532 | 22305 | 1748264 | 65536 | 148062250 | 1575543 | 0 | 594095208 | 12076422263262604 | 12076422263464126 | 12076422263597885 | 12076422263601684 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 82 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 45760 | 0x7fcfb3b9da00 | 0x7fce83a35b00 | 285396 | 285396 | 15738 | 2283176 | 65536 | 149822683 | 1619413 | 0 | 601120376 | 12076422263661806 | 12076422263852443 | 12076422264031162 | 12076422264035350 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 84 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 46720 | 0x7fcfb3b9d900 | 0x7fce83a35b40 | 215340 | 215340 | 22956 | 1722728 | 65536 | 162021071 | 1545606 | 0 | 649905944 | 12076422264108095 | 12076422264284601 | 12076422264415640 | 12076422264419594 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 86 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 48448 | 0x7fcfb3b9d800 | 0x7fce83a35b80 | 425685 | 425685 | 31955 | 3405488 | 65536 | 325827843 | 3227397 | 0 | 1305138592 | 12076422264478834 | 12076422264668279 | 12076422264941077 | 12076422265013087 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 88 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 49408 | 0x7fcfb3b9d700 | 0x7fce83a35bc0 | 399117 | 399117 | 23963 | 3192944 | 65536 | 292223838 | 2999341 | 0 | 1170709396 | 12076422265040618 | 12076422265237236 | 12076422265492594 | 12076422265534015 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 90 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 50368 | 0x7fcfb3b9d600 | 0x7fce83a35c00 | 210661 | 210661 | 21983 | 1685296 | 65536 | 147076810 | 1508233 | 0 | 590129484 | 12076422265571003 | 12076422265749873 | 12076422265878512 | 12076422265882252 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 92 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 51584 | 0x7fcfb3b9d500 | 0x7fce83a35c40 | 222613 | 222613 | 26266 | 1780912 | 65536 | 155333837 | 1609408 | 0 | 623161660 | 12076422265941261 | 12076422266135471 | 12076422266272590 | 12076422266276755 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 94 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 52544 | 0x7fcfb3b9d400 | 0x7fce83a35c80 | 211892 | 211892 | 24480 | 1695144 | 65536 | 166751152 | 1529483 | 0 | 668832212 | 12076422266348718 | 12076422266524909 | 12076422266654828 | 12076422266658835 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 96 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 54272 | 0x7fcfb3b9d300 | 0x7fce83a35cc0 | 420621 | 420621 | 30732 | 3364976 | 65536 | 311354590 | 3185080 | 0 | 1247235472 | 12076422266715470 | 12076422266911147 | 12076422267181385 | 12076422267250594 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 98 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 55488 | 0x7fcfb3b9da00 | 0x7fce83a35d00 | 403556 | 403556 | 33342 | 3228456 | 65536 | 346750767 | 3057790 | 0 | 1388818196 | 12076422267278276 | 12076422267486184 | 12076422267744582 | 12076422267811507 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 100 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 56704 | 0x7fcfb3b9d900 | 0x7fce83a35d40 | 225501 | 225501 | 23326 | 1804016 | 65536 | 145719608 | 1633466 | 0 | 584723516 | 12076422267834439 | 12076422268039301 | 12076422268177700 | 12076422268181765 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 102 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 58176 | 0x7fcfb3b9d800 | 0x7fce83a35d80 | 235212 | 235212 | 27732 | 1881704 | 65536 | 151182254 | 1700055 | 0 | 606561024 | 12076422268240013 | 12076422268431138 | 12076422268576418 | 12076422268580295 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 104 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 59392 | 0x7fcfb3b9d700 | 0x7fce83a35dc0 | 222764 | 222764 | 23545 | 1782120 | 65536 | 136559206 | 1610562 | 0 | 548058776 | 12076422268651117 | 12076422268826496 | 12076422268962976 | 12076422268966924 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 106 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 61376 | 0x7fcfb3b9d600 | 0x7fce83a35e00 | 419660 | 419660 | 32755 | 3357288 | 65536 | 312862054 | 3171275 | 0 | 1253263824 | 12076422269031493 | 12076422269226014 | 12076422269495773 | 12076422269562801 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 108 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 62592 | 0x7fcfb3b9d500 | 0x7fce83a35e40 | 397188 | 397188 | 31242 | 3177512 | 65536 | 332538084 | 3011258 | 0 | 1331967048 | 12076422269586845 | 12076422269782011 | 12076422270036730 | 12076422270107533 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 110 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 63808 | 0x7fcfb3b9d400 | 0x7fce83a35e80 | 216437 | 216437 | 25021 | 1731504 | 65536 | 157726815 | 1558570 | 0 | 632727716 | 12076422270130025 | 12076422270336248 | 12076422270468727 | 12076422270472732 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 112 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 65280 | 0x7fcfb3b9d300 | 0x7fce83a35ec0 | 229452 | 229452 | 27876 | 1835624 | 65536 | 162600824 | 1664094 | 0 | 652248928 | 12076422270532372 | 12076422270718646 | 12076422270860885 | 12076422270864730 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 114 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 66496 | 0x7fcfb3b9da00 | 0x7fce83a35f00 | 217388 | 217388 | 22921 | 1739112 | 65536 | 153502619 | 1572871 | 0 | 615838496 | 12076422270935682 | 12076422271114644 | 12076422271248083 | 12076422271252220 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 116 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 68480 | 0x7fcfb3b9d900 | 0x7fce83a35f40 | 400429 | 400429 | 34517 | 3203440 | 65536 | 312053086 | 3016277 | 0 | 1250046964 | 12076422271311761 | 12076422271498002 | 12076422271755280 | 12076422271821979 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 118 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 69696 | 0x7fcfb3b9d800 | 0x7fce83a35f80 | 395852 | 395852 | 31261 | 3166824 | 65536 | 341590869 | 2995699 | 0 | 1368181768 | 12076422271845212 | 12076422272050639 | 12076422272303757 | 12076422272344580 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 120 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 70912 | 0x7fcfb3b9d700 | 0x7fce83a35fc0 | 219084 | 219084 | 24215 | 1752680 | 65536 | 156295132 | 1584241 | 0 | 627013864 | 12076422272388431 | 12076422272566156 | 12076422272700555 | 12076422272704449 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 122 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 72384 | 0x7fcfb3b9d600 | 0x7fce83a36000 | 226477 | 226477 | 29923 | 1811824 | 65536 | 181410308 | 1615662 | 0 | 727484396 | 12076422272763348 | 12076422272953034 | 12076422273093673 | 12076422273097789 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 124 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 73600 | 0x7fcfb3b9d500 | 0x7fce83a36040 | 219013 | 219013 | 23835 | 1752112 | 65536 | 157447610 | 1578787 | 0 | 631630232 | 12076422273169833 | 12076422273350312 | 12076422273484871 | 12076422273489297 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 126 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 75840 | 0x7fcfb3b9d400 | 0x7fce83a36080 | 419132 | 419132 | 30206 | 3353064 | 65536 | 329234179 | 3172116 | 0 | 1318751924 | 12076422273549499 | 12076422273735430 | 12076422274005188 | 12076422274072410 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 128 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 77056 | 0x7fcfb3b9d300 | 0x7fce83a360c0 | 398684 | 398684 | 32485 | 3189480 | 65536 | 335509694 | 3011973 | 0 | 1343854716 | 12076422274097307 | 12076422274300547 | 12076422274556065 | 12076422274622783 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 130 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 78272 | 0x7fcfb3b9da00 | 0x7fce83a36100 | 209485 | 209485 | 24938 | 1675888 | 65536 | 160253623 | 1499799 | 0 | 642853828 | 12076422274645155 | 12076422274847104 | 12076422274974943 | 12076422274979236 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 132 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 80000 | 0x7fcfb3b9d900 | 0x7fce83a36140 | 231717 | 231717 | 27158 | 1853744 | 65536 | 198813574 | 1680703 | 0 | 797119676 | 12076422275038686 | 12076422275235581 | 12076422275379101 | 12076422275383136 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 134 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 81216 | 0x7fcfb3b9d800 | 0x7fce83a36180 | 214548 | 214548 | 24407 | 1716392 | 65536 | 161752299 | 1541490 | 0 | 648831784 | 12076422275454649 | 12076422275631899 | 12076422275763419 | 12076422275767380 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 136 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 83456 | 0x7fcfb3b9d700 | 0x7fce83a361c0 | 412860 | 412860 | 36234 | 3302888 | 65536 | 350456912 | 3124991 | 0 | 1403653908 | 12076422275823875 | 12076422276022937 | 12076422276289016 | 12076422276330767 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 138 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 84928 | 0x7fcfb3b9d600 | 0x7fce83a36200 | 402453 | 402453 | 31680 | 3219632 | 65536 | 338781063 | 3027344 | 0 | 1356939772 | 12076422276366083 | 12076422276552694 | 12076422276810773 | 12076422276876942 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 140 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 86400 | 0x7fcfb3b9d500 | 0x7fce83a36240 | 220708 | 220708 | 24329 | 1765672 | 65536 | 157002495 | 1591501 | 0 | 629870192 | 12076422276901968 | 12076422277118931 | 12076422277254610 | 12076422277258801 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 142 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 88128 | 0x7fcfb3b9d400 | 0x7fce83a36280 | 240668 | 240668 | 27135 | 1925352 | 65536 | 210606440 | 1759383 | 0 | 844293336 | 12076422277318783 | 12076422277506929 | 12076422277657168 | 12076422277661059 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 144 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 89600 | 0x7fcfb3b9d300 | 0x7fce83a362c0 | 223356 | 223356 | 20513 | 1786856 | 65536 | 138893136 | 1601355 | 0 | 557411936 | 12076422277751397 | 12076422277926447 | 12076422278064206 | 12076422278068306 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 146 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 92096 | 0x7fcfb3b9da00 | 0x7fce83a36300 | 418285 | 418285 | 34889 | 3346288 | 65536 | 355328694 | 3174330 | 0 | 1423132904 | 12076422278127666 | 12076422278315565 | 12076422278585483 | 12076422278652491 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 148 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 93568 | 0x7fcfb3b9d900 | 0x7fce83a36340 | 392637 | 392637 | 33434 | 3141104 | 65536 | 327070314 | 2963136 | 0 | 1310097676 | 12076422278675855 | 12076422278874922 | 12076422279127240 | 12076422279197013 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 150 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 95040 | 0x7fcfb3b9d800 | 0x7fce83a36380 | 219684 | 219684 | 25013 | 1757480 | 65536 | 141765986 | 1587326 | 0 | 568928736 | 12076422279219054 | 12076422279425639 | 12076422279560678 | 12076422279564816 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 152 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 96768 | 0x7fcfb3b9d700 | 0x7fce83a363c0 | 251725 | 251725 | 27930 | 2013808 | 65536 | 219711714 | 1847916 | 0 | 880706740 | 12076422279622433 | 12076422279808837 | 12076422279966276 | 12076422279970410 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 154 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 98240 | 0x7fcfb3b9d600 | 0x7fce83a36400 | 218428 | 218428 | 23741 | 1747432 | 65536 | 146932548 | 1572145 | 0 | 589568572 | 12076422280040330 | 12076422280220994 | 12076422280354914 | 12076422280358872 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 156 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 100736 | 0x7fcfb3b9d500 | 0x7fce83a36440 | 416780 | 416780 | 35702 | 3334248 | 65536 | 313903423 | 3144701 | 0 | 1257430300 | 12076422280417551 | 12076422280602432 | 12076422280871871 | 12076422280938599 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 158 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 102208 | 0x7fcfb3b9d400 | 0x7fce83a36480 | 406884 | 406884 | 27905 | 3255080 | 65536 | 313018011 | 3063698 | 0 | 1253889056 | 12076422280963846 | 12076422281164189 | 12076422281425948 | 12076422281492488 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 160 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 103680 | 0x7fcfb3b9d300 | 0x7fce83a364c0 | 229668 | 229668 | 24801 | 1837352 | 65536 | 150883653 | 1668980 | 0 | 605416868 | 12076422281516673 | 12076422281716666 | 12076422281858425 | 12076422281862326 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 162 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 105664 | 0x7fcfb3b9da00 | 0x7fce83a36500 | 265292 | 265292 | 28678 | 2122344 | 65536 | 210780370 | 1948298 | 0 | 844963224 | 12076422281919572 | 12076422282116504 | 12076422282282903 | 12076422282286814 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 164 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 107136 | 0x7fcfb3b9d900 | 0x7fce83a36540 | 219149 | 219149 | 23954 | 1753200 | 65536 | 160117012 | 1578996 | 0 | 642313620 | 12076422282358077 | 12076422282533942 | 12076422282668501 | 12076422282672892 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 166 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 109888 | 0x7fcfb3b9d800 | 0x7fce83a36580 | 406493 | 406493 | 36509 | 3251952 | 65536 | 321834646 | 3066828 | 0 | 1289154532 | 12076422282730920 | 12076422282918100 | 12076422283180818 | 12076422283249083 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 168 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 111360 | 0x7fcfb3b9d700 | 0x7fce83a365c0 | 393837 | 393837 | 32326 | 3150704 | 65536 | 320648340 | 2973250 | 0 | 1284410676 | 12076422283273468 | 12076422283471377 | 12076422283724655 | 12076422283790519 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 170 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 112832 | 0x7fcfb3b9d600 | 0x7fce83a36600 | 220909 | 220909 | 23280 | 1767280 | 65536 | 146895258 | 1595763 | 0 | 589440376 | 12076422283813271 | 12076422284013614 | 12076422284149453 | 12076422284153694 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 172 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 114816 | 0x7fcfb3b9d500 | 0x7fce83a36640 | 276781 | 276781 | 30766 | 2214256 | 65536 | 246773218 | 2044544 | 0 | 988956324 | 12076422284213204 | 12076422284402732 | 12076422284576651 | 12076422284580877 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 174 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 116288 | 0x7fcfb3b9d400 | 0x7fce83a36680 | 218949 | 218949 | 22994 | 1751600 | 65536 | 154046395 | 1583898 | 0 | 618042152 | 12076422284651709 | 12076422284828809 | 12076422284963849 | 12076422284967876 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 176 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 119040 | 0x7fcfb3b9d300 | 0x7fce83a366c0 | 407669 | 407669 | 36824 | 3261360 | 65536 | 325944167 | 3066396 | 0 | 1305598460 | 12076422285027437 | 12076422285226247 | 12076422285489926 | 12076422285556019 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 178 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 120768 | 0x7fcfb3b9da00 | 0x7fce83a36700 | 406917 | 406917 | 29810 | 3255344 | 65536 | 287675422 | 3068890 | 0 | 1152519268 | 12076422285578441 | 12076422285776164 | 12076422286038083 | 12076422286105520 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 180 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 122496 | 0x7fcfb3b9d900 | 0x7fce83a36740 | 222613 | 222613 | 24660 | 1780912 | 65536 | 146589944 | 1609355 | 0 | 588249056 | 12076422286129665 | 12076422286332001 | 12076422286469280 | 12076422286473514 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 182 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 124736 | 0x7fcfb3b9d800 | 0x7fce83a36780 | 292060 | 292060 | 29662 | 2336488 | 65536 | 224283309 | 2158613 | 0 | 898975508 | 12076422286534377 | 12076422286718879 | 12076422286903198 | 12076422286908943 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 184 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 126464 | 0x7fcfb3b9d700 | 0x7fce83a367c0 | 232965 | 232965 | 24040 | 1863728 | 65536 | 151651181 | 1686866 | 0 | 608475076 | 12076422287008599 | 12076422287186876 | 12076422287330876 | 12076422287334935 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 186 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 20 | 24 | 127936 | 0x7fcfb3b9d600 | 0x7fce83a36800 | 412629 | 412629 | 35624 | 3301040 | 65536 | 377814396 | 3130284 | 0 | 1513078308 | 12076422287386300 | 12076422287580314 | 12076422287847513 | 12076422287914362 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 188 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 129664 | 0x7fcfb3b9d500 | 0x7fce83a36840 | 388316 | 388316 | 34679 | 3106536 | 65536 | 350624047 | 2937232 | 0 | 1404316396 | 12076422287939699 | 12076422288145751 | 12076422288395830 | 12076422288463312 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 190 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 131392 | 0x7fcfb3b9d400 | 0x7fce83a36880 | 223141 | 223141 | 24762 | 1785136 | 65536 | 149710534 | 1621100 | 0 | 600690168 | 12076422288484752 | 12076422288686708 | 12076422288824467 | 12076422288828661 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 192 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 133632 | 0x7fcfb3b9d300 | 0x7fce83a368c0 | 303668 | 303668 | 31670 | 2429352 | 65536 | 255178334 | 2253448 | 0 | 1022550376 | 12076422288887530 | 12076422289073266 | 12076422289265585 | 12076422289333108 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 194 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 135360 | 0x7fcfb3b9da00 | 0x7fce83a36900 | 229453 | 229453 | 25754 | 1835632 | 65536 | 146779933 | 1655856 | 0 | 589000452 | 12076422289368223 | 12076422289554384 | 12076422289696143 | 12076422289700340 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 196 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 20 | 24 | 136832 | 0x7fcfb3b9d900 | 0x7fce83a36940 | 438604 | 438604 | 40791 | 3508840 | 65536 | 404317648 | 3332579 | 0 | 1619096328 | 12076422289758028 | 12076422289946221 | 12076422290230860 | 12076422290298482 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 198 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 138560 | 0x7fcfb3b9d800 | 0x7fce83a36980 | 405996 | 405996 | 32539 | 3247976 | 65536 | 298917013 | 3062505 | 0 | 1197493796 | 12076422290323608 | 12076422290523658 | 12076422290785577 | 12076422290852401 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 200 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 140288 | 0x7fcfb3b9d700 | 0x7fce83a369c0 | 228932 | 228932 | 26104 | 1831464 | 65536 | 158591426 | 1657716 | 0 | 636211032 | 12076422290875474 | 12076422291073575 | 12076422291215494 | 12076422291219724 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 202 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 142784 | 0x7fcfb3b9d600 | 0x7fce83a36a00 | 331020 | 331020 | 33377 | 2648168 | 65536 | 287597780 | 2476511 | 0 | 1152241000 | 12076422291281829 | 12076422291473093 | 12076422291683972 | 12076422291750810 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 204 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 144768 | 0x7fcfb3b9d500 | 0x7fce83a36a40 | 220468 | 220468 | 25995 | 1763752 | 65536 | 157891066 | 1593087 | 0 | 633442824 | 12076422291786136 | 12076422291971810 | 12076422292108130 | 12076422292112402 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 206 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 20 | 24 | 145728 | 0x7fcfb3b9d400 | 0x7fce83a36a80 | 516012 | 516012 | 16131 | 4128104 | 65536 | 440232521 | 3598214 | 0 | 1762749448 | 12076422292171442 | 12076422292359968 | 12076422292697406 | 12076422292764574 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 208 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 147712 | 0x7fcfb3b9d300 | 0x7fce83a36ac0 | 402452 | 402452 | 32041 | 3219624 | 65536 | 301871677 | 3034946 | 0 | 1209305088 | 12076422292787306 | 12076422292988775 | 12076422293248134 | 12076422293319055 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 210 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 149696 | 0x7fcfb3b9da00 | 0x7fce83a36b00 | 231124 | 231124 | 27588 | 1849000 | 65536 | 148823118 | 1654055 | 0 | 597183332 | 12076422293341817 | 12076422293545731 | 12076422293688771 | 12076422293692288 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 212 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 152192 | 0x7fcfb3b9d900 | 0x7fce83a36b40 | 357885 | 357885 | 36548 | 2863088 | 65536 | 325386305 | 2693190 | 0 | 1303392204 | 12076422293750937 | 12076422293939650 | 12076422294167969 | 12076422294234696 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 214 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 154176 | 0x7fcfb3b9d800 | 0x7fce83a36b80 | 222957 | 222957 | 27509 | 1783664 | 65536 | 157196857 | 1609299 | 0 | 630675928 | 12076422294271304 | 12076422294458367 | 12076422294596287 | 12076422294599824 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 216 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 20 | 24 | 155648 | 0x7fcfb3b9d700 | 0x7fce83a36bc0 | 504589 | 504589 | 43271 | 4036720 | 65536 | 473168996 | 3867344 | 0 | 1894496440 | 12076422294656820 | 12076422294850525 | 12076422295179644 | 12076422295249622 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 218 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 157632 | 0x7fcfb3b9d600 | 0x7fce83a36c00 | 398029 | 398029 | 33682 | 3184240 | 65536 | 325014631 | 3004648 | 0 | 1301891740 | 12076422295274618 | 12076422295478682 | 12076422295736121 | 12076422295776441 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 220 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 159616 | 0x7fcfb3b9d500 | 0x7fce83a36c40 | 224437 | 224437 | 29000 | 1795504 | 65536 | 159150730 | 1611443 | 0 | 638476544 | 12076422295816676 | 12076422295992280 | 12076422296131319 | 12076422296135097 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 222 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 162368 | 0x7fcfb3b9d400 | 0x7fce83a36c80 | 382701 | 382701 | 36565 | 3061616 | 65536 | 340333006 | 2881939 | 0 | 1363176672 | 12076422296194868 | 12076422296383638 | 12076422296628277 | 12076422296676924 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 224 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 164608 | 0x7fcfb3b9d300 | 0x7fce83a36cc0 | 225541 | 225541 | 27570 | 1804336 | 65536 | 164592173 | 1632499 | 0 | 660250964 | 12076422296710957 | 12076422296896116 | 12076422297036115 | 12076422297039838 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 226 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 20 | 24 | 166336 | 0x7fcfb3b9da00 | 0x7fce83a36d00 | 615581 | 615581 | 15264 | 4924656 | 65536 | 540748228 | 4448258 | 0 | 2164811700 | 12076422297100511 | 12076422297286994 | 12076422297690352 | 12076422297756520 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 228 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 168576 | 0x7fcfb3b9d900 | 0x7fce83a36d40 | 409916 | 409916 | 34853 | 3279336 | 65536 | 325320904 | 3107981 | 0 | 1303129512 | 12076422297778381 | 12076422297979951 | 12076422298245550 | 12076422298311622 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 230 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 170816 | 0x7fcfb3b9d800 | 0x7fce83a36d80 | 222604 | 222604 | 26920 | 1780840 | 65536 | 168663399 | 1598012 | 0 | 676509476 | 12076422298335877 | 12076422298539628 | 12076422298678028 | 12076422298681539 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 232 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 174080 | 0x7fcfb3b9d700 | 0x7fce83a36dc0 | 434093 | 434093 | 39080 | 3472752 | 65536 | 402714776 | 3313431 | 0 | 1612709980 | 12076422298741891 | 12076422298927787 | 12076422299207785 | 12076422299275072 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 234 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 176576 | 0x7fcfb3b9d600 | 0x7fce83a36e00 | 218988 | 218988 | 25587 | 1751912 | 65536 | 176943198 | 1585131 | 0 | 709668124 | 12076422299310287 | 12076422299497384 | 12076422299633543 | 12076422299637065 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 236 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 20 | 24 | 177792 | 0x7fcfb3b9d500 | 0x7fce83a36e40 | 645285 | 645285 | 51706 | 5162288 | 65536 | 612204362 | 4998865 | 0 | 2450637196 | 12076422299696705 | 12076422299882182 | 12076422300307140 | 12076422300372702 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 238 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 180288 | 0x7fcfb3b9d400 | 0x7fce83a36e80 | 407629 | 407629 | 32895 | 3261040 | 65536 | 262008107 | 3059612 | 0 | 1049860448 | 12076422300395784 | 12076422300597219 | 12076422300862018 | 12076422300927893 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 240 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 182784 | 0x7fcfb3b9d300 | 0x7fce83a36ec0 | 226188 | 226188 | 27465 | 1809512 | 65536 | 186282848 | 1630709 | 0 | 747016968 | 12076422300950976 | 12076422301158816 | 12076422301299136 | 12076422301302840 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 242 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 186304 | 0x7fcfb3b9da00 | 0x7fce83a36f00 | 486692 | 486692 | 42025 | 3893544 | 65536 | 455390925 | 3732269 | 0 | 1823417868 | 12076422301361058 | 12076422301557374 | 12076422301872093 | 12076422301938070 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 244 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 189312 | 0x7fcfb3b9d900 | 0x7fce83a36f40 | 244140 | 244140 | 27861 | 1953128 | 65536 | 214490596 | 1784302 | 0 | 859849684 | 12076422301973025 | 12076422302167452 | 12076422302319931 | 12076422302323507 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 246 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 20 | 24 | 190784 | 0x7fcfb3b9d800 | 0x7fce83a36f80 | 783356 | 783356 | 59103 | 6266856 | 65536 | 750961899 | 6106899 | 0 | 3005668952 | 12076422302381905 | 12076422302571770 | 12076422303087927 | 12076422303154441 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 248 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 193792 | 0x7fcfb3b9d700 | 0x7fce83a36fc0 | 455197 | 455197 | 37900 | 3641584 | 65536 | 393047895 | 3468051 | 0 | 1574035104 | 12076422303179768 | 12076422303384406 | 12076422303682324 | 12076422303748555 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 250 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 196800 | 0x7fcfb3b9d600 | 0x7fce83a37000 | 249069 | 249069 | 28046 | 1992560 | 65536 | 221114045 | 1830394 | 0 | 886345256 | 12076422303770936 | 12076422303973683 | 12076422304130322 | 12076422304133971 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 252 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 201088 | 0x7fcfb3b9d500 | 0x7fce83a37040 | 593037 | 593037 | 47421 | 4744304 | 65536 | 561202544 | 4585706 | 0 | 2246662192 | 12076422304188021 | 12076422304389681 | 12076422304775599 | 12076422304841666 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 254 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 204608 | 0x7fcfb3b9d400 | 0x7fce83a37080 | 280245 | 280245 | 30218 | 2241968 | 65536 | 251505433 | 2076873 | 0 | 1007912812 | 12076422304876871 | 12076422305075918 | 12076422305253837 | 12076422305257208 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 256 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 20 | 24 | 206080 | 0x7fcfb3b9d300 | 0x7fce83a370c0 | 925053 | 925053 | 68135 | 7400432 | 65536 | 889025546 | 7230251 | 0 | 3557922404 | 12076422305315406 | 12076422305510476 | 12076422306119273 | 12076422306186234 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 258 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 209600 | 0x7fcfb3b9da00 | 0x7fce83a37100 | 496573 | 496573 | 43215 | 3972592 | 65536 | 464968480 | 3810805 | 0 | 1861719160 | 12076422306210209 | 12076422306421992 | 12076422306747910 | 12076422306813851 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 260 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 213120 | 0x7fcfb3b9d900 | 0x7fce83a37140 | 282573 | 282573 | 30310 | 2260592 | 65536 | 245138361 | 2090515 | 0 | 982424512 | 12076422306836272 | 12076422307050629 | 12076422307229508 | 12076422307233090 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 262 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 218176 | 0x7fcfb3b9d800 | 0x7fce83a37180 | 700404 | 700404 | 54933 | 5603240 | 65536 | 657058131 | 5420548 | 0 | 2630087420 | 12076422307293742 | 12076422307482307 | 12076422307939105 | 12076422308009933 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 264 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 222208 | 0x7fcfb3b9d700 | 0x7fce83a371c0 | 314084 | 314084 | 33081 | 2512680 | 65536 | 284058019 | 2346964 | 0 | 1138112972 | 12076422308049527 | 12076422308229983 | 12076422308429982 | 12076422308497659 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 266 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 20 | 24 | 223936 | 0x7fcfb3b9d600 | 0x7fce83a37200 | 1074724 | 1074724 | 14778 | 8597800 | 65536 | 1028860015 | 8360439 | 0 | 4117259584 | 12076422308520762 | 12076422308718941 | 12076422309427258 | 12076422309494021 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 268 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 227968 | 0x7fcfb3b9d500 | 0x7fce83a37240 | 569205 | 569205 | 47431 | 4553648 | 65536 | 524902911 | 4378104 | 0 | 2101442328 | 12076422309517655 | 12076422309723257 | 12076422310096855 | 12076422310163966 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 270 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 232000 | 0x7fcfb3b9d400 | 0x7fce83a37280 | 318836 | 318836 | 31286 | 2550696 | 65536 | 290506834 | 2391140 | 0 | 1163918320 | 12076422310190185 | 12076422310392213 | 12076422310595892 | 12076422310661921 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 272 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 237824 | 0x7fcfb3b9d300 | 0x7fce83a372c0 | 805052 | 805052 | 62511 | 6440424 | 65536 | 754774542 | 6260311 | 0 | 3020942204 | 12076422310686136 | 12076422310882931 | 12076422311410449 | 12076422311478378 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 274 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 242368 | 0x7fcfb3b9da00 | 0x7fce83a37300 | 348261 | 348261 | 33375 | 2786096 | 65536 | 319442757 | 2628552 | 0 | 1279662016 | 12076422311515888 | 12076422311703567 | 12076422311926926 | 12076422311992614 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 276 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 20 | 24 | 243584 | 0x7fcfb3b9d900 | 0x7fce83a37340 | 1206444 | 1206444 | 84335 | 9651560 | 65536 | 1169243555 | 9486012 | 0 | 4678794904 | 12076422312021698 | 12076422312227405 | 12076422313024841 | 12076422313094431 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 278 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 248128 | 0x7fcfb3b9d800 | 0x7fce83a37380 | 641757 | 641757 | 51917 | 5134064 | 65536 | 583511542 | 4954976 | 0 | 2335889284 | 12076422313124838 | 12076422313319720 | 12076422313742918 | 12076422313790014 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 280 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 252672 | 0x7fcfb3b9d700 | 0x7fce83a373c0 | 352420 | 352420 | 33915 | 2819368 | 65536 | 323733204 | 2659514 | 0 | 1296818424 | 12076422313812556 | 12076422314009797 | 12076422314236036 | 12076422314301574 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 282 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 259264 | 0x7fcfb3b9d600 | 0x7fce83a37400 | 910540 | 910540 | 67210 | 7284328 | 65536 | 870689085 | 7116406 | 0 | 3484608212 | 12076422314325248 | 12076422314530754 | 12076422315128511 | 12076422315195836 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 284 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 264832 | 0x7fcfb3b9d500 | 0x7fce83a37440 | 417741 | 417741 | 38318 | 3341936 | 65536 | 386432586 | 3177659 | 0 | 1547633820 | 12076422315239517 | 12076422315419710 | 12076422315688669 | 12076422315755115 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 286 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 20 | 24 | 266304 | 0x7fcfb3b9d400 | 0x7fce83a37480 | 1487780 | 1487780 | 30186 | 11902248 | 65536 | 1447343369 | 11739321 | 0 | 5791194176 | 12076422315784860 | 12076422315977468 | 12076422316961623 | 12076422317030595 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 288 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 28 | 24 | 271872 | 0x7fcfb3b9d300 | 0x7fce83a374c0 | 779597 | 779597 | 61696 | 6236784 | 65536 | 736839533 | 6053676 | 0 | 2949208516 | 12076422317054239 | 12076422317258262 | 12076422317772819 | 12076422317839108 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 290 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 277440 | 0x7fcfb3b9da00 | 0x7fce83a37500 | 422236 | 422236 | 40119 | 3377896 | 65536 | 389758743 | 3209413 | 0 | 1560930052 | 12076422317863022 | 12076422318079538 | 12076422318351057 | 12076422318417092 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 292 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 12 | 24 | 279936 | 0x7fcfb3b9d900 | 0x7fce83a37540 | 1127188 | 1127188 | 80122 | 9017512 | 65536 | 1089844253 | 8849495 | 0 | 4361201344 | 12076422318443952 | 12076422318645295 | 12076422319383852 | 12076422319450482 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 294 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 286528 | 0x7fcfb3b9d800 | 0x7fce83a37580 | 490877 | 490877 | 44160 | 3927024 | 65536 | 458314879 | 3757975 | 0 | 1835155012 | 12076422319486268 | 12076422319676330 | 12076422319993929 | 12076422320060546 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 296 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 20 | 24 | 288000 | 0x7fcfb3b9d700 | 0x7fce83a375c0 | 1769005 | 1769005 | 118286 | 14152048 | 65536 | 1725546472 | 13978516 | 0 | 6904006968 | 12076422320089119 | 12076422320292808 | 12076422321463042 | 12076422321530437 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 298 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 32 | 24 | 294592 | 0x7fcfb3b9d600 | 0x7fce83a37600 | 923284 | 923284 | 65617 | 7386280 | 65536 | 808546405 | 7213600 | 0 | 3236041376 | 12076422321561625 | 12076422321758881 | 12076422322369918 | 12076422322437312 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 300 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 301184 | 0x7fcfb3b9d500 | 0x7fce83a37640 | 493629 | 493629 | 44347 | 3949040 | 65536 | 459212720 | 3780176 | 0 | 1838750768 | 12076422322466546 | 12076422322666557 | 12076422322986075 | 12076422323053497 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 302 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 12 | 24 | 304960 | 0x7fcfb3b9d400 | 0x7fce83a37680 | 1338309 | 1338309 | 22983 | 10706480 | 65536 | 1296308347 | 10533677 | 0 | 5187058948 | 12076422323084925 | 12076422323282874 | 12076422324161430 | 12076422324228170 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 304 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 24 | 24 | 313600 | 0x7fcfb3b9d300 | 0x7fce83a376c0 | 632516 | 632516 | 55846 | 5060136 | 65536 | 595435278 | 4867539 | 0 | 2383636848 | 12076422324262714 | 12076422324457748 | 12076422324869106 | 12076422324935324 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 306 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 20 | 24 | 314816 | 0x7fcfb3b9da00 | 0x7fce83a37700 | 2332156 | 2332156 | 78846 | 18657256 | 65536 | 2284864238 | 18490317 | 0 | 9141279164 | 12076422324958106 | 12076422325164625 | 12076422326713738 | 12076422326782717 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 308 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 20 | 24 | 317568 | 0x7fcfb3b9d900 | 0x7fce83a37740 | 1204364 | 1204364 | 16631 | 9634920 | 65536 | 1163160413 | 9443674 | 0 | 4654463876 | 12076422326811300 | 12076422327012616 | 12076422327807973 | 12076422327876229 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 310 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 12 | 24 | 320320 | 0x7fcfb3b9d800 | 0x7fce83a37780 | 675220 | 675220 | 15866 | 5401768 | 65536 | 606590856 | 4959542 | 0 | 2428192320 | 12076422327899081 | 12076422328118851 | 12076422328556289 | 12076422328624639 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 312 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 12 | 24 | 324096 | 0x7fcfb3b9d700 | 0x7fce83a377c0 | 1758765 | 1758765 | 116328 | 14070128 | 65536 | 1715714906 | 13903000 | 0 | 6864685360 | 12076422328648884 | 12076422328847008 | 12076422330006043 | 12076422330075465 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 314 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 12 | 24 | 328896 | 0x7fcfb3b9d600 | 0x7fce83a37800 | 1198500 | 1198500 | 83581 | 9588008 | 65536 | 1161943075 | 9425060 | 0 | 4649604988 | 12076422330118525 | 12076422330299481 | 12076422331085238 | 12076422331152036 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 316 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 20 | 24 | 330112 | 0x7fcfb3b9d500 | 0x7fce83a37840 | 4584909 | 4584909 | 281527 | 36679280 | 65536 | 4515029250 | 36509769 | 0 | 18061938388 | 12076422331182893 | 12076422331381716 | 12076422334431622 | 12076422334499416 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 318 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 20 | 24 | 332864 | 0x7fcfb3b9d400 | 0x7fce83a37880 | 2324645 | 2324645 | 149685 | 18597168 | 65536 | 2276439762 | 18431236 | 0 | 9107582372 | 12076422334530934 | 12076422334729381 | 12076422336270654 | 12076422336338223 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 320 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 12 | 24 | 335616 | 0x7fcfb3b9d300 | 0x7fce83a378c0 | 1202852 | 1202852 | 84251 | 9622824 | 65536 | 1163701609 | 9464272 | 0 | 4656628068 | 12076422336365734 | 12076422336568572 | 12076422337358008 | 12076422337425443 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 322 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 12 | 24 | 339392 | 0x7fcfb3b9da00 | 0x7fce83a37900 | 3443580 | 3443580 | 214313 | 27548648 | 65536 | 3385805194 | 27382963 | 0 | 13545047292 | 12076422337454958 | 12076422337658167 | 12076422339940077 | 12076422340012731 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 324 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 12 | 24 | 344192 | 0x7fcfb3b9d900 | 0x7fce83a37940 | 2322469 | 2322469 | 149479 | 18579760 | 65536 | 2274477463 | 18417030 | 0 | 9099739268 | 12076422340060079 | 12076422340242635 | 12076422341777988 | 12076422341844845 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 326 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 20 | 24 | 345408 | 0x7fcfb3b9d800 | 0x7fce83a37980 | 9086525 | 9086525 | 472884 | 72692208 | 65536 | 8977180693 | 72529082 | 0 | 35910543404 | 12076422341874500 | 12076422342076547 | 12076422348126599 | 12076422348198492 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 328 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 20 | 24 | 348160 | 0x7fcfb3b9d700 | 0x7fce83a379c0 | 4572316 | 4572316 | 281374 | 36578536 | 65536 | 4502089425 | 36412870 | 0 | 18010179900 | 12076422348222286 | 12076422348428677 | 12076422351468663 | 12076422351536374 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 330 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 12 | 24 | 350912 | 0x7fcfb3b9d600 | 0x7fce83a37a00 | 2328845 | 2328845 | 148657 | 18630768 | 65536 | 2282049224 | 18467367 | 0 | 9130023892 | 12076422351563074 | 12076422351767382 | 12076422353306895 | 12076422353374179 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 332 | 926521 | 926526 | 4194304 | 256 | 0 | 0 | 12 | 24 | 0 | 0x7fcfb3b9d500 | 0x7fce83a37a40 | 6813101 | 6813101 | 413116 | 54504816 | 65536 | 6723379980 | 54330764 | 0 | 26895347688 | 12076422353404024 | 12076422353601961 | 12076422358129141 | 12076422358202661 |