50 KiB
50 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | GRBM_COUNT | GRBM_GUI_ACTIVE | CPC_ME1_BUSY_FOR_PACKET_DECODE | SQ_CYCLES | SQ_WAVES | SQ_WAVE_CYCLES | SQ_BUSY_CYCLES | SQ_LEVEL_WAVES | SQ_ACCUM_PREV_HIRES | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBuffer.kd | 0 | 0 | 0 | 906511 | 906516 | 33554432 | 256 | 0 | 0 | 8 | 32 | 6464 | 0x0 | 0x7f4586e04180 | 504246 | 504246 | 17548 | 4033976 | 524288 | 368605679 | 3827741 | 0 | 1489275812 | 12076039738087526 | 12076039982998365 | 12076039983322204 | 12076039983431627 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 906511 | 906516 | 32768 | 256 | 0 | 0 | 24 | 24 | 12480 | 0x0 | 0x7f4586e35100 | 28708 | 28708 | 21209 | 229672 | 512 | 1162927 | 78755 | 0 | 4666036 | 12076039998516141 | 12076039998845611 | 12076039998852011 | 12076039998860611 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 4 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 12928 | 0x7f46b6ef1900 | 0x7f4586e35140 | 226140 | 226140 | 22430 | 1809128 | 65536 | 121427759 | 1627144 | 0 | 487522204 | 12076039998932284 | 12076039999174090 | 12076039999311369 | 12076039999315967 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 6 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 36 | 24 | 13632 | 0x7f46b6ef1800 | 0x7f4586e35180 | 397141 | 397141 | 33067 | 3177136 | 65536 | 268902429 | 3002458 | 0 | 1077424420 | 12076039999393782 | 12076039999582888 | 12076039999835047 | 12076039999903209 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 8 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 14080 | 0x7f46b6ef1700 | 0x7f4586e351c0 | 403852 | 403852 | 34188 | 3230824 | 65536 | 356939146 | 3060834 | 0 | 1429571176 | 12076039999931782 | 12076040000148965 | 12076040000404644 | 12076040000472567 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 10 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14528 | 0x7f46b6ef1600 | 0x7f4586e35200 | 229517 | 229517 | 19317 | 1836144 | 65536 | 114186568 | 1660258 | 0 | 458553652 | 12076040000498194 | 12076040000710403 | 12076040000850562 | 12076040000854346 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 12 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14976 | 0x7f46b6ef1500 | 0x7f4586e35240 | 229461 | 229461 | 20974 | 1835696 | 65536 | 118547639 | 1660132 | 0 | 476003120 | 12076040000914638 | 12076040001108961 | 12076040001248480 | 12076040001252526 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 14 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 15424 | 0x7f46b6ef1400 | 0x7f4586e35280 | 215940 | 215940 | 21307 | 1727528 | 65536 | 145803612 | 1557827 | 0 | 585035556 | 12076040001342874 | 12076040001527679 | 12076040001659678 | 12076040001663290 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 16 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16128 | 0x7f46b6ef1300 | 0x7f4586e352c0 | 402453 | 402453 | 32654 | 3219632 | 65536 | 337685742 | 3045890 | 0 | 1352559244 | 12076040001720847 | 12076040001922557 | 12076040002178076 | 12076040002219994 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 18 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16832 | 0x7f46b6ef1a00 | 0x7f4586e35300 | 397661 | 397661 | 32239 | 3181296 | 65536 | 348877103 | 3014906 | 0 | 1397324992 | 12076040002256592 | 12076040002448794 | 12076040002701593 | 12076040002767943 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 20 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 17536 | 0x7f46b6ef1900 | 0x7f4586e35340 | 214805 | 214805 | 22800 | 1718448 | 65536 | 156070767 | 1542093 | 0 | 626104280 | 12076040002790675 | 12076040002996472 | 12076040003126551 | 12076040003130587 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 22 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18240 | 0x7f46b6ef1800 | 0x7f4586e35380 | 220980 | 220980 | 21712 | 1767848 | 65536 | 139494488 | 1597248 | 0 | 559792932 | 12076040003190999 | 12076040003382870 | 12076040003517750 | 12076040003521533 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 24 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18944 | 0x7f46b6ef1700 | 0x7f4586e353c0 | 218005 | 218005 | 23411 | 1744048 | 65536 | 146610905 | 1568498 | 0 | 588257348 | 12076040003593567 | 12076040003774388 | 12076040003906388 | 12076040003909995 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 26 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 19904 | 0x7f46b6ef1600 | 0x7f4586e35400 | 405061 | 405061 | 24614 | 3240496 | 65536 | 250916459 | 3058603 | 0 | 1005484488 | 12076040003969145 | 12076040004167187 | 12076040004425265 | 12076040004492498 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 28 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 20608 | 0x7f46b6ef1500 | 0x7f4586e35440 | 402060 | 402060 | 33437 | 3216488 | 65536 | 246331570 | 3044240 | 0 | 987143108 | 12076040004517554 | 12076040004727024 | 12076040004982223 | 12076040005048842 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 30 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 21312 | 0x7f46b6ef1400 | 0x7f4586e35480 | 215428 | 215428 | 21631 | 1723432 | 65536 | 156989391 | 1550840 | 0 | 629771708 | 12076040005072796 | 12076040005275821 | 12076040005407501 | 12076040005411315 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 32 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22016 | 0x7f46b6ef1300 | 0x7f4586e354c0 | 218597 | 218597 | 23469 | 1748784 | 65536 | 140858230 | 1565360 | 0 | 565248308 | 12076040005470866 | 12076040005662700 | 12076040005795499 | 12076040005799066 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 34 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22720 | 0x7f46b6ef1a00 | 0x7f4586e35500 | 215036 | 215036 | 22339 | 1720296 | 65536 | 143345104 | 1550056 | 0 | 575198644 | 12076040005869907 | 12076040006060458 | 12076040006191657 | 12076040006195633 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 36 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 23680 | 0x7f46b6ef1900 | 0x7f4586e35540 | 407380 | 407380 | 28968 | 3259048 | 65536 | 303986919 | 3081436 | 0 | 1217763384 | 12076040006247790 | 12076040006446856 | 12076040006706055 | 12076040006772435 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 38 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 24384 | 0x7f46b6ef1800 | 0x7f4586e35580 | 400877 | 400877 | 32603 | 3207024 | 65536 | 349956178 | 3039131 | 0 | 1401639288 | 12076040006796309 | 12076040006997733 | 12076040007252932 | 12076040007322517 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 40 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25088 | 0x7f46b6ef1700 | 0x7f4586e355c0 | 214069 | 214069 | 23382 | 1712560 | 65536 | 152569740 | 1541876 | 0 | 612097012 | 12076040007346442 | 12076040007547331 | 12076040007677250 | 12076040007681033 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 42 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25792 | 0x7f46b6ef1600 | 0x7f4586e35600 | 225045 | 225045 | 25351 | 1800368 | 65536 | 130622093 | 1605735 | 0 | 524305336 | 12076040007737889 | 12076040007931169 | 12076040008067328 | 12076040008071369 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 44 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 26496 | 0x7f46b6ef1500 | 0x7f4586e35640 | 215429 | 215429 | 20587 | 1723440 | 65536 | 154668393 | 1548040 | 0 | 620489668 | 12076040008144845 | 12076040008323327 | 12076040008454207 | 12076040008457957 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 46 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 27712 | 0x7f46b6ef1400 | 0x7f4586e35680 | 413284 | 413284 | 28979 | 3306280 | 65536 | 303928169 | 3125101 | 0 | 1217527876 | 12076040008515845 | 12076040008702046 | 12076040008966044 | 12076040009033838 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 48 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 28416 | 0x7f46b6ef1300 | 0x7f4586e356c0 | 399436 | 399436 | 32695 | 3195496 | 65536 | 337428777 | 3023397 | 0 | 1351528840 | 12076040009058403 | 12076040009260763 | 12076040009514522 | 12076040009580944 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 50 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 29120 | 0x7f46b6ef1a00 | 0x7f4586e35700 | 213588 | 213588 | 22023 | 1708712 | 65536 | 144771901 | 1543386 | 0 | 580904936 | 12076040009605580 | 12076040009802680 | 12076040009933560 | 12076040009937397 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 52 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30080 | 0x7f46b6ef1900 | 0x7f4586e35740 | 215252 | 215252 | 23952 | 1722024 | 65536 | 145640070 | 1545413 | 0 | 584378760 | 12076040009995665 | 12076040010188439 | 12076040010319798 | 12076040010323645 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 54 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30784 | 0x7f46b6ef1800 | 0x7f4586e35780 | 217021 | 217021 | 23238 | 1736176 | 65536 | 143321140 | 1562050 | 0 | 575102568 | 12076040010393925 | 12076040010572277 | 12076040010704756 | 12076040010708450 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 56 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32000 | 0x7f46b6ef1700 | 0x7f4586e357c0 | 399053 | 399053 | 33617 | 3192432 | 65536 | 315720755 | 3016174 | 0 | 1264699104 | 12076040010767389 | 12076040010965075 | 12076040011219954 | 12076040011288868 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 58 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32960 | 0x7f46b6ef1600 | 0x7f4586e35800 | 398372 | 398372 | 30408 | 3186984 | 65536 | 331769473 | 3016011 | 0 | 1328895156 | 12076040011315718 | 12076040011508913 | 12076040011763151 | 12076040011829944 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 60 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 33920 | 0x7f46b6ef1500 | 0x7f4586e35840 | 215068 | 215068 | 20215 | 1720552 | 65536 | 149087096 | 1552098 | 0 | 598167456 | 12076040011853808 | 12076040012055950 | 12076040012186989 | 12076040012190764 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 62 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 34880 | 0x7f46b6ef1400 | 0x7f4586e35880 | 221564 | 221564 | 24129 | 1772520 | 65536 | 141315312 | 1600583 | 0 | 567116728 | 12076040012248652 | 12076040012435148 | 12076040012571468 | 12076040012575189 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 64 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 35840 | 0x7f46b6ef1300 | 0x7f4586e358c0 | 213725 | 213725 | 23195 | 1709808 | 65536 | 162185688 | 1530612 | 0 | 650561504 | 12076040012645760 | 12076040012825866 | 12076040012956106 | 12076040012959874 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 66 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 37312 | 0x7f46b6ef1a00 | 0x7f4586e35900 | 402829 | 402829 | 34801 | 3222640 | 65536 | 304325355 | 3050038 | 0 | 1219116076 | 12076040013017511 | 12076040013210025 | 12076040013467943 | 12076040013534432 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 68 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 38272 | 0x7f46b6ef1900 | 0x7f4586e35940 | 402301 | 402301 | 32974 | 3218416 | 65536 | 336533671 | 3038445 | 0 | 1347949324 | 12076040013558657 | 12076040013759622 | 12076040014015781 | 12076040014083292 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 70 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 39232 | 0x7f46b6ef1800 | 0x7f4586e35980 | 221189 | 221189 | 21395 | 1769520 | 65536 | 138816453 | 1603172 | 0 | 557109100 | 12076040014106445 | 12076040014311460 | 12076040014447139 | 12076040014450935 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 72 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 40192 | 0x7f46b6ef1700 | 0x7f4586e359c0 | 228596 | 228596 | 21313 | 1828776 | 65536 | 147694065 | 1633388 | 0 | 592613200 | 12076040014512189 | 12076040014694498 | 12076040014834657 | 12076040014838525 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 74 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 41152 | 0x7f46b6ef1600 | 0x7f4586e35a00 | 217508 | 217508 | 20381 | 1740072 | 65536 | 149659432 | 1571933 | 0 | 600468836 | 12076040014908535 | 12076040015090176 | 12076040015223615 | 12076040015227709 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 76 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 42624 | 0x7f46b6ef1500 | 0x7f4586e35a40 | 395085 | 395085 | 34824 | 3160688 | 65536 | 355123117 | 2984658 | 0 | 1422311496 | 12076040015278874 | 12076040015473214 | 12076040015725853 | 12076040015792208 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 78 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 43584 | 0x7f46b6ef1400 | 0x7f4586e35a80 | 395460 | 395460 | 29528 | 3163688 | 65536 | 336533184 | 2997445 | 0 | 1347949360 | 12076040015818036 | 12076040016018812 | 12076040016272090 | 12076040016339014 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 80 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 44544 | 0x7f46b6ef1300 | 0x7f4586e35ac0 | 213164 | 213164 | 23395 | 1705320 | 65536 | 145411565 | 1525211 | 0 | 583465420 | 12076040016364231 | 12076040016558169 | 12076040016687929 | 12076040016691749 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 82 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 45760 | 0x7f46b6ef1a00 | 0x7f4586e35b00 | 216012 | 216012 | 23491 | 1728104 | 65536 | 168110900 | 1549287 | 0 | 674264856 | 12076040016748325 | 12076040016935447 | 12076040017068567 | 12076040017072798 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 84 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 46720 | 0x7f46b6ef1900 | 0x7f4586e35b40 | 276405 | 276405 | 17144 | 2211248 | 65536 | 151698861 | 1504142 | 0 | 608632020 | 12076040017144240 | 12076040017320246 | 12076040017491605 | 12076040017495864 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 86 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 48448 | 0x7f46b6ef1800 | 0x7f4586e35b80 | 417108 | 417108 | 32922 | 3336872 | 65536 | 346542788 | 3147212 | 0 | 1387986792 | 12076040017547029 | 12076040017737684 | 12076040018004082 | 12076040018071153 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 88 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 49408 | 0x7f46b6ef1700 | 0x7f4586e35bc0 | 404172 | 404172 | 27165 | 3233384 | 65536 | 265223938 | 3048556 | 0 | 1062710568 | 12076040018097332 | 12076040018291601 | 12076040018549680 | 12076040018616326 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 90 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 50368 | 0x7f46b6ef1600 | 0x7f4586e35c00 | 214917 | 214917 | 25713 | 1719344 | 65536 | 167240005 | 1527358 | 0 | 670780284 | 12076040018641303 | 12076040018843279 | 12076040018972398 | 12076040018976155 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 92 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 51584 | 0x7f46b6ef1500 | 0x7f4586e35c40 | 228805 | 228805 | 26601 | 1830448 | 65536 | 139034295 | 1647105 | 0 | 557955984 | 12076040019035515 | 12076040019222797 | 12076040019364396 | 12076040019368604 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 94 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 52544 | 0x7f46b6ef1400 | 0x7f4586e35c80 | 213492 | 213492 | 24645 | 1707944 | 65536 | 165662660 | 1532723 | 0 | 664471224 | 12076040019440237 | 12076040019615595 | 12076040019745354 | 12076040019749342 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 96 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 54272 | 0x7f46b6ef1300 | 0x7f4586e35cc0 | 422820 | 422820 | 32634 | 3382568 | 65536 | 332517670 | 3202227 | 0 | 1331884936 | 12076040019805747 | 12076040019997993 | 12076040020269352 | 12076040020336563 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 98 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 55488 | 0x7f46b6ef1a00 | 0x7f4586e35d00 | 403973 | 403973 | 35907 | 3231792 | 65536 | 268875294 | 3055543 | 0 | 1077316752 | 12076040020361850 | 12076040020559911 | 12076040020817509 | 12076040020884301 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 100 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 56704 | 0x7f46b6ef1900 | 0x7f4586e35d40 | 217364 | 217364 | 22212 | 1738920 | 65536 | 143261419 | 1565211 | 0 | 574864052 | 12076040020908376 | 12076040021107588 | 12076040021240547 | 12076040021244521 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 102 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 58176 | 0x7f46b6ef1800 | 0x7f4586e35d80 | 219844 | 219844 | 24846 | 1758760 | 65536 | 162878586 | 1574982 | 0 | 653341836 | 12076040021305814 | 12076040021488386 | 12076040021623586 | 12076040021627452 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 104 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 59392 | 0x7f46b6ef1700 | 0x7f4586e35dc0 | 211261 | 211261 | 20495 | 1690096 | 65536 | 157804177 | 1522073 | 0 | 633055988 | 12076040021697613 | 12076040021871265 | 12076040022000864 | 12076040022006156 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 106 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 61376 | 0x7f46b6ef1600 | 0x7f4586e35e00 | 421829 | 421829 | 31779 | 3374640 | 65536 | 325666640 | 3189574 | 0 | 1304481592 | 12076040022063493 | 12076040022250303 | 12076040022521182 | 12076040022587516 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 108 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 62592 | 0x7f46b6ef1500 | 0x7f4586e35e40 | 402612 | 402612 | 34343 | 3220904 | 65536 | 342789483 | 3044975 | 0 | 1372973792 | 12076040022611090 | 12076040022804060 | 12076040023061499 | 12076040023131427 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 110 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 63808 | 0x7f46b6ef1400 | 0x7f4586e35e80 | 221061 | 221061 | 24071 | 1768496 | 65536 | 151902772 | 1593952 | 0 | 609492560 | 12076040023156303 | 12076040023356858 | 12076040023492057 | 12076040023495865 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 112 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 65280 | 0x7f46b6ef1300 | 0x7f4586e35ec0 | 219973 | 219973 | 27383 | 1759792 | 65536 | 165216878 | 1581038 | 0 | 662720020 | 12076040023555936 | 12076040023738296 | 12076040023874135 | 12076040023878075 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 114 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 66496 | 0x7f46b6ef1a00 | 0x7f4586e35f00 | 219285 | 219285 | 22755 | 1754288 | 65536 | 150859612 | 1575077 | 0 | 605260544 | 12076040023949788 | 12076040024133174 | 12076040024267093 | 12076040024271045 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 116 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 68480 | 0x7f46b6ef1900 | 0x7f4586e35f40 | 410069 | 410069 | 32895 | 3280560 | 65536 | 302528280 | 3078932 | 0 | 1211934764 | 12076040024320707 | 12076040024515732 | 12076040024779251 | 12076040024845853 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 118 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 69696 | 0x7f46b6ef1800 | 0x7f4586e35f80 | 404020 | 404020 | 32260 | 3232168 | 65536 | 341127222 | 3055090 | 0 | 1366324524 | 12076040024871140 | 12076040025067410 | 12076040025325809 | 12076040025366461 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 120 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 70912 | 0x7f46b6ef1700 | 0x7f4586e35fc0 | 212733 | 212733 | 21535 | 1701872 | 65536 | 154471952 | 1511540 | 0 | 619738028 | 12076040025410292 | 12076040025585327 | 12076040025715567 | 12076040025719587 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 122 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 72384 | 0x7f46b6ef1600 | 0x7f4586e36000 | 227941 | 227941 | 29995 | 1823536 | 65536 | 183609225 | 1636205 | 0 | 736302528 | 12076040025779048 | 12076040025967886 | 12076040026108205 | 12076040026112357 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 124 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 73600 | 0x7f46b6ef1500 | 0x7f4586e36040 | 226972 | 226972 | 22345 | 1815784 | 65536 | 136056177 | 1644714 | 0 | 546076656 | 12076040026183389 | 12076040026356204 | 12076040026496043 | 12076040026499927 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 126 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 75840 | 0x7f46b6ef1400 | 0x7f4586e36080 | 419660 | 419660 | 36272 | 3357288 | 65536 | 351223801 | 3178328 | 0 | 1406711404 | 12076040026558306 | 12076040026742282 | 12076040027012201 | 12076040027079845 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 128 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 77056 | 0x7f46b6ef1300 | 0x7f4586e360c0 | 391068 | 391068 | 33310 | 3128552 | 65536 | 350805882 | 2956206 | 0 | 1405040316 | 12076040027105553 | 12076040027299079 | 12076040027550118 | 12076040027616302 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 130 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 78272 | 0x7f46b6ef1a00 | 0x7f4586e36100 | 213036 | 213036 | 23307 | 1704296 | 65536 | 155534833 | 1536208 | 0 | 623966380 | 12076040027641078 | 12076040027842597 | 12076040027972836 | 12076040027976622 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 132 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 80000 | 0x7f46b6ef1900 | 0x7f4586e36140 | 233037 | 233037 | 26904 | 1864304 | 65536 | 180497637 | 1685506 | 0 | 723856164 | 12076040028035732 | 12076040028230915 | 12076040028375234 | 12076040028379340 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 134 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 81216 | 0x7f46b6ef1800 | 0x7f4586e36180 | 218709 | 218709 | 24363 | 1749680 | 65536 | 144454426 | 1575456 | 0 | 579637756 | 12076040028450182 | 12076040028625473 | 12076040028759553 | 12076040028763424 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 136 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 83456 | 0x7f46b6ef1700 | 0x7f4586e361c0 | 415205 | 415205 | 35595 | 3321648 | 65536 | 325617425 | 3132835 | 0 | 1304294048 | 12076040028819518 | 12076040029008352 | 12076040029276190 | 12076040029317544 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 138 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 84928 | 0x7f46b6ef1600 | 0x7f4586e36200 | 402261 | 402261 | 29153 | 3218096 | 65536 | 316642048 | 3028011 | 0 | 1268391616 | 12076040029355835 | 12076040029535389 | 12076040029793148 | 12076040029860032 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 140 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 86400 | 0x7f46b6ef1500 | 0x7f4586e36240 | 218933 | 218933 | 21869 | 1751472 | 65536 | 154241356 | 1571819 | 0 | 618833404 | 12076040029885980 | 12076040030084187 | 12076040030218906 | 12076040030223117 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 142 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 88128 | 0x7f46b6ef1400 | 0x7f4586e36280 | 242196 | 242196 | 28097 | 1937576 | 65536 | 212079256 | 1768719 | 0 | 850175892 | 12076040030282187 | 12076040030467705 | 12076040030617784 | 12076040030621778 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 144 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 89600 | 0x7f46b6ef1300 | 0x7f4586e362c0 | 217141 | 217141 | 21715 | 1737136 | 65536 | 145241714 | 1552626 | 0 | 582801008 | 12076040030711184 | 12076040030885303 | 12076040031018582 | 12076040031022593 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 146 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 92096 | 0x7f46b6ef1a00 | 0x7f4586e36300 | 425476 | 425476 | 40306 | 3403816 | 65536 | 310779772 | 3214266 | 0 | 1244933184 | 12076040031082464 | 12076040031271861 | 12076040031545940 | 12076040031613210 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 148 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 93568 | 0x7f46b6ef1900 | 0x7f4586e36340 | 403556 | 403556 | 26485 | 3228456 | 65536 | 247708998 | 3039591 | 0 | 992653912 | 12076040031638197 | 12076040031834418 | 12076040032093937 | 12076040032163864 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 150 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 95040 | 0x7f46b6ef1800 | 0x7f4586e36380 | 221972 | 221972 | 23260 | 1775784 | 65536 | 141702538 | 1596849 | 0 | 568689508 | 12076040032188560 | 12076040032390256 | 12076040032526415 | 12076040032530385 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 152 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 96768 | 0x7f46b6ef1700 | 0x7f4586e363c0 | 252564 | 252564 | 27749 | 2020520 | 65536 | 221088296 | 1853491 | 0 | 886210644 | 12076040032589374 | 12076040032772974 | 12076040032930733 | 12076040032934746 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 154 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 98240 | 0x7f46b6ef1600 | 0x7f4586e36400 | 221052 | 221052 | 26934 | 1768424 | 65536 | 156486868 | 1561851 | 0 | 627766052 | 12076040033010607 | 12076040033186892 | 12076040033320812 | 12076040033324811 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 156 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 100736 | 0x7f46b6ef1500 | 0x7f4586e36440 | 408028 | 408028 | 33794 | 3264232 | 65536 | 312155402 | 3063013 | 0 | 1250440760 | 12076040033383280 | 12076040033575050 | 12076040033838249 | 12076040033904478 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 158 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 102208 | 0x7f46b6ef1400 | 0x7f4586e36480 | 399717 | 399717 | 30314 | 3197744 | 65536 | 280640458 | 3008301 | 0 | 1124385644 | 12076040033927511 | 12076040034129928 | 12076040034386407 | 12076040034453369 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 160 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 103680 | 0x7f46b6ef1300 | 0x7f4586e364c0 | 225101 | 225101 | 23956 | 1800816 | 65536 | 148479802 | 1635268 | 0 | 595773752 | 12076040034476712 | 12076040034672165 | 12076040034811205 | 12076040034815091 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 162 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 105664 | 0x7f46b6ef1a00 | 0x7f4586e36500 | 264941 | 264941 | 29436 | 2119536 | 65536 | 232859789 | 1950786 | 0 | 933291312 | 12076040034874902 | 12076040035067363 | 12076040035233123 | 12076040035237235 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 164 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 107136 | 0x7f46b6ef1900 | 0x7f4586e36540 | 225516 | 225516 | 21355 | 1804136 | 65536 | 150857371 | 1630527 | 0 | 605291512 | 12076040035308798 | 12076040035482242 | 12076040035620961 | 12076040035625066 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 166 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 109888 | 0x7f46b6ef1800 | 0x7f4586e36580 | 420741 | 420741 | 37334 | 3365936 | 65536 | 311347241 | 3167674 | 0 | 1247209404 | 12076040035682914 | 12076040035870240 | 12076040036140959 | 12076040036212187 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 168 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 111360 | 0x7f46b6ef1700 | 0x7f4586e365c0 | 394069 | 394069 | 32427 | 3152560 | 65536 | 347321194 | 2986176 | 0 | 1391103044 | 12076040036235210 | 12076040036435517 | 12076040036689596 | 12076040036756298 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 170 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 112832 | 0x7f46b6ef1600 | 0x7f4586e36600 | 225044 | 225044 | 23381 | 1800360 | 65536 | 152728772 | 1617037 | 0 | 612790844 | 12076040036779331 | 12076040036975515 | 12076040037114074 | 12076040037118031 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 172 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 114816 | 0x7f46b6ef1500 | 0x7f4586e36640 | 275997 | 275997 | 29399 | 2207984 | 65536 | 246231187 | 2044116 | 0 | 986780556 | 12076040037175437 | 12076040037363993 | 12076040037538072 | 12076040037542009 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 174 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 116288 | 0x7f46b6ef1400 | 0x7f4586e36680 | 219420 | 219420 | 24148 | 1755368 | 65536 | 151210687 | 1582495 | 0 | 606670324 | 12076040037612480 | 12076040037785751 | 12076040037920470 | 12076040037924329 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 176 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 119040 | 0x7f46b6ef1300 | 0x7f4586e366c0 | 406533 | 406533 | 35689 | 3252272 | 65536 | 327711813 | 3054093 | 0 | 1312676888 | 12076040037984070 | 12076040038177269 | 12076040038439828 | 12076040038506892 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 178 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 120768 | 0x7f46b6ef1a00 | 0x7f4586e36700 | 404429 | 404429 | 30468 | 3235440 | 65536 | 261015008 | 3037584 | 0 | 1045887448 | 12076040038533000 | 12076040038726707 | 12076040038986225 | 12076040039053868 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 180 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 122496 | 0x7f46b6ef1900 | 0x7f4586e36740 | 223212 | 223212 | 24045 | 1785704 | 65536 | 158160279 | 1609118 | 0 | 634522388 | 12076040039077643 | 12076040039275184 | 12076040039413263 | 12076040039417134 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 182 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 124736 | 0x7f46b6ef1800 | 0x7f4586e36780 | 290981 | 290981 | 31446 | 2327856 | 65536 | 257654743 | 2154415 | 0 | 1032466892 | 12076040039475101 | 12076040039659822 | 12076040039843021 | 12076040039849367 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 184 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 126464 | 0x7f46b6ef1700 | 0x7f4586e367c0 | 220692 | 220692 | 21916 | 1765544 | 65536 | 152708098 | 1591573 | 0 | 612713352 | 12076040039934205 | 12076040040121740 | 12076040040257899 | 12076040040262194 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 186 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 127936 | 0x7f46b6ef1600 | 0x7f4586e36800 | 412140 | 412140 | 36584 | 3297128 | 65536 | 377356614 | 3119848 | 0 | 1511247512 | 12076040040313820 | 12076040040508458 | 12076040040775177 | 12076040040823096 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 188 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 129664 | 0x7f46b6ef1500 | 0x7f4586e36840 | 461941 | 461941 | 15840 | 3695536 | 65536 | 292056055 | 3056391 | 0 | 1170042404 | 12076040040846490 | 12076040041048936 | 12076040041347974 | 12076040041414856 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 190 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 131392 | 0x7f46b6ef1400 | 0x7f4586e36880 | 221989 | 221989 | 26320 | 1775920 | 65536 | 155996426 | 1601369 | 0 | 625834816 | 12076040041438590 | 12076040041637413 | 12076040041773572 | 12076040041777560 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 192 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 133632 | 0x7f46b6ef1300 | 0x7f4586e368c0 | 303277 | 303277 | 30637 | 2426224 | 65536 | 263755855 | 2259514 | 0 | 1056872456 | 12076040041837171 | 12076040042024131 | 12076040042216610 | 12076040042265497 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 194 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 135360 | 0x7f46b6ef1a00 | 0x7f4586e36900 | 218364 | 218364 | 26842 | 1746920 | 65536 | 152039770 | 1568317 | 0 | 610002344 | 12076040042301314 | 12076040042478689 | 12076040042613089 | 12076040042616970 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 196 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 136832 | 0x7f46b6ef1900 | 0x7f4586e36940 | 437365 | 437365 | 38634 | 3498928 | 65536 | 405527112 | 3334259 | 0 | 1623929224 | 12076040042675229 | 12076040042858847 | 12076040043143486 | 12076040043192660 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 198 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 138560 | 0x7f46b6ef1800 | 0x7f4586e36980 | 402477 | 402477 | 34525 | 3219824 | 65536 | 316698012 | 3033706 | 0 | 1268619424 | 12076040043217486 | 12076040043411805 | 12076040043671004 | 12076040043719900 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 200 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 140288 | 0x7f46b6ef1700 | 0x7f4586e369c0 | 222028 | 222028 | 23875 | 1776232 | 65536 | 146251165 | 1607047 | 0 | 586877196 | 12076040043744265 | 12076040043937082 | 12076040044074362 | 12076040044078376 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 202 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 142784 | 0x7f46b6ef1600 | 0x7f4586e36a00 | 332565 | 332565 | 36293 | 2660528 | 65536 | 299225015 | 2472743 | 0 | 1198749696 | 12076040044137516 | 12076040044331321 | 12076040044541240 | 12076040044565301 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 204 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 144768 | 0x7f46b6ef1500 | 0x7f4586e36a40 | 226381 | 226381 | 26159 | 1811056 | 65536 | 156394762 | 1636670 | 0 | 627478820 | 12076040044628428 | 12076040044805558 | 12076040044945398 | 12076040044949305 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 206 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 145728 | 0x7f46b6ef1400 | 0x7f4586e36a80 | 516028 | 516028 | 16019 | 4128232 | 65536 | 439141143 | 3597916 | 0 | 1758384352 | 12076040045012142 | 12076040045205717 | 12076040045542835 | 12076040045592891 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 208 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 147712 | 0x7f46b6ef1300 | 0x7f4586e36ac0 | 404924 | 404924 | 34142 | 3239400 | 65536 | 306516385 | 3051554 | 0 | 1227885500 | 12076040045618188 | 12076040045820456 | 12076040046081415 | 12076040046133516 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 210 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 149696 | 0x7f46b6ef1a00 | 0x7f4586e36b00 | 225188 | 225188 | 26598 | 1801512 | 65536 | 156703650 | 1616290 | 0 | 628690872 | 12076040046154004 | 12076040046356773 | 12076040046496132 | 12076040046499225 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 212 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 152192 | 0x7f46b6ef1900 | 0x7f4586e36b40 | 358549 | 358549 | 35848 | 2868400 | 65536 | 327656313 | 2701398 | 0 | 1312481636 | 12076040046557123 | 12076040046742851 | 12076040046971490 | 12076040047018711 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 214 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 154176 | 0x7f46b6ef1800 | 0x7f4586e36b80 | 226821 | 226821 | 24572 | 1814576 | 65536 | 156314484 | 1629206 | 0 | 627141252 | 12076040047055289 | 12076040047239170 | 12076040047379649 | 12076040047382747 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 216 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 155648 | 0x7f46b6ef1700 | 0x7f4586e36bc0 | 548389 | 548389 | 16025 | 4387120 | 65536 | 471446754 | 3880763 | 0 | 1887607684 | 12076040047439833 | 12076040047633088 | 12076040047991647 | 12076040048040079 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 218 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 157632 | 0x7f46b6ef1600 | 0x7f4586e36c00 | 409396 | 409396 | 34508 | 3275176 | 65536 | 335993875 | 3083274 | 0 | 1345801336 | 12076040048063232 | 12076040048261726 | 12076040048526045 | 12076040048573320 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 220 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 159616 | 0x7f46b6ef1500 | 0x7f4586e36c40 | 220709 | 220709 | 22709 | 1765680 | 65536 | 157466238 | 1586250 | 0 | 631716152 | 12076040048596132 | 12076040048790684 | 12076040048927163 | 12076040048930353 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 222 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 162368 | 0x7f46b6ef1400 | 0x7f4586e36c80 | 382069 | 382069 | 37268 | 3056560 | 65536 | 349748165 | 2887162 | 0 | 1400837600 | 12076040048990084 | 12076040049187963 | 12076040049432602 | 12076040049481868 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 224 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 164608 | 0x7f46b6ef1300 | 0x7f4586e36cc0 | 221109 | 221109 | 25350 | 1768880 | 65536 | 161694863 | 1598166 | 0 | 648654712 | 12076040049517885 | 12076040049700921 | 12076040049837880 | 12076040049841356 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 226 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 166336 | 0x7f46b6ef1a00 | 0x7f4586e36d00 | 613797 | 613797 | 16879 | 4910384 | 65536 | 542053715 | 4426383 | 0 | 2170036184 | 12076040049899485 | 12076040050098839 | 12076040050500438 | 12076040050548691 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 228 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 168576 | 0x7f46b6ef1900 | 0x7f4586e36d40 | 412837 | 412837 | 35670 | 3302704 | 65536 | 324293449 | 3120826 | 0 | 1298989548 | 12076040050573407 | 12076040050779477 | 12076040051045716 | 12076040051095197 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 230 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 170816 | 0x7f46b6ef1800 | 0x7f4586e36d80 | 225268 | 225268 | 27905 | 1802152 | 65536 | 167234047 | 1618662 | 0 | 670791952 | 12076040051118620 | 12076040051325715 | 12076040051465554 | 12076040051468821 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 232 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 174080 | 0x7f46b6ef1700 | 0x7f4586e36dc0 | 435820 | 435820 | 41994 | 3486568 | 65536 | 402748261 | 3311663 | 0 | 1612841560 | 12076040051527991 | 12076040051723473 | 12076040052003792 | 12076040052052626 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 234 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 176576 | 0x7f46b6ef1600 | 0x7f4586e36e00 | 226029 | 226029 | 25603 | 1808240 | 65536 | 187517099 | 1638322 | 0 | 751958772 | 12076040052095706 | 12076040052274831 | 12076040052415471 | 12076040052418676 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 236 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 177792 | 0x7f46b6ef1500 | 0x7f4586e36e40 | 647501 | 647501 | 53316 | 5180016 | 65536 | 612768272 | 5003803 | 0 | 2452892660 | 12076040052477155 | 12076040052662830 | 12076040053088108 | 12076040053136671 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 238 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 180288 | 0x7f46b6ef1400 | 0x7f4586e36e80 | 410460 | 410460 | 37453 | 3283688 | 65536 | 320600764 | 3085040 | 0 | 1284246744 | 12076040053165484 | 12076040053358667 | 12076040053624907 | 12076040053646749 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 240 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 182784 | 0x7f46b6ef1300 | 0x7f4586e36ec0 | 225253 | 225253 | 26504 | 1802032 | 65536 | 184865023 | 1613351 | 0 | 741350856 | 12076040053705768 | 12076040053881706 | 12076040054021385 | 12076040054024791 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 242 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 186304 | 0x7f46b6ef1a00 | 0x7f4586e36f00 | 486380 | 486380 | 42148 | 3891048 | 65536 | 455319958 | 3723570 | 0 | 1823125548 | 12076040054080385 | 12076040054282184 | 12076040054596423 | 12076040054645665 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 244 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 189312 | 0x7f46b6ef1900 | 0x7f4586e36f40 | 245700 | 245700 | 29672 | 1965608 | 65536 | 213568781 | 1786509 | 0 | 856167524 | 12076040054680700 | 12076040054866022 | 12076040055018662 | 12076040055022435 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 246 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 190784 | 0x7f46b6ef1800 | 0x7f4586e36f80 | 787069 | 787069 | 61015 | 6296560 | 65536 | 750078754 | 6122257 | 0 | 3002135224 | 12076040055080273 | 12076040055267461 | 12076040055786339 | 12076040055834324 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 248 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 193792 | 0x7f46b6ef1700 | 0x7f4586e36fc0 | 458500 | 458500 | 43750 | 3668008 | 65536 | 378118662 | 3477152 | 0 | 1514305832 | 12076040055857728 | 12076040056059778 | 12076040056357697 | 12076040056405706 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 250 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 196800 | 0x7f46b6ef1600 | 0x7f4586e37000 | 246436 | 246436 | 27697 | 1971496 | 65536 | 217071943 | 1807845 | 0 | 870173548 | 12076040056429360 | 12076040056624896 | 12076040056779615 | 12076040056782927 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 252 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 201088 | 0x7f46b6ef1500 | 0x7f4586e37040 | 597740 | 597740 | 51179 | 4781928 | 65536 | 556092122 | 4594975 | 0 | 2226222436 | 12076040056843560 | 12076040057035614 | 12076040057422653 | 12076040057473460 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 254 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 204608 | 0x7f46b6ef1400 | 0x7f4586e37080 | 277292 | 277292 | 29533 | 2218344 | 65536 | 247985900 | 2053420 | 0 | 993817348 | 12076040057514286 | 12076040057690652 | 12076040057866011 | 12076040057869196 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 256 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 206080 | 0x7f46b6ef1300 | 0x7f4586e370c0 | 925940 | 925940 | 68748 | 7407528 | 65536 | 890033257 | 7234455 | 0 | 3561953580 | 12076040057927144 | 12076040058131130 | 12076040058741048 | 12076040058791690 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 258 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 209600 | 0x7f46b6ef1a00 | 0x7f4586e37100 | 502389 | 502389 | 43828 | 4019120 | 65536 | 456863999 | 3845192 | 0 | 1829288144 | 12076040058815995 | 12076040059010807 | 12076040059339286 | 12076040059407495 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 260 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 213120 | 0x7f46b6ef1900 | 0x7f4586e37140 | 283820 | 283820 | 30641 | 2270568 | 65536 | 254648430 | 2103333 | 0 | 1020491332 | 12076040059431590 | 12076040059627765 | 12076040059807284 | 12076040059810534 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 262 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 218176 | 0x7f46b6ef1800 | 0x7f4586e37180 | 698917 | 698917 | 55099 | 5591344 | 65536 | 664666426 | 5423297 | 0 | 2660516384 | 12076040059868842 | 12076040060059283 | 12076040060515282 | 12076040060581747 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 264 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 222208 | 0x7f46b6ef1700 | 0x7f4586e371c0 | 314949 | 314949 | 32770 | 2519600 | 65536 | 285340955 | 2349735 | 0 | 1143258716 | 12076040060617704 | 12076040060798481 | 12076040060999120 | 12076040061065526 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 266 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 223936 | 0x7f46b6ef1600 | 0x7f4586e37200 | 1067669 | 1067669 | 78092 | 8541360 | 65536 | 1029315946 | 8355328 | 0 | 4119084828 | 12076040061089020 | 12076040061288079 | 12076040061991116 | 12076040062058101 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 268 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 227968 | 0x7f46b6ef1500 | 0x7f4586e37240 | 567709 | 567709 | 48268 | 4541680 | 65536 | 528186542 | 4365953 | 0 | 2114582372 | 12076040062084480 | 12076040062282635 | 12076040062655274 | 12076040062722435 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 270 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 232000 | 0x7f46b6ef1400 | 0x7f4586e37280 | 318460 | 318460 | 31963 | 2547688 | 65536 | 289606961 | 2378361 | 0 | 1160316940 | 12076040062744056 | 12076040062941193 | 12076040063143912 | 12076040063212486 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 272 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 237824 | 0x7f46b6ef1300 | 0x7f4586e372c0 | 804493 | 804493 | 61370 | 6435952 | 65536 | 770078058 | 6268723 | 0 | 3082158068 | 12076040063235008 | 12076040063434311 | 12076040063960549 | 12076040064028603 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 274 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 242368 | 0x7f46b6ef1a00 | 0x7f4586e37300 | 348669 | 348669 | 33948 | 2789360 | 65536 | 319644239 | 2625000 | 0 | 1280472808 | 12076040064063538 | 12076040064249828 | 12076040064473187 | 12076040064539582 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 276 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 243584 | 0x7f46b6ef1900 | 0x7f4586e37340 | 1208797 | 1208797 | 86053 | 9670384 | 65536 | 1167848186 | 9495118 | 0 | 4673211976 | 12076040064562695 | 12076040064758146 | 12076040065556864 | 12076040065623858 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 278 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 248128 | 0x7f46b6ef1800 | 0x7f4586e37380 | 671693 | 671693 | 15698 | 5373552 | 65536 | 601408600 | 4926486 | 0 | 2407483076 | 12076040065647752 | 12076040065849183 | 12076040066291581 | 12076040066361508 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 280 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 252672 | 0x7f46b6ef1700 | 0x7f4586e373c0 | 353941 | 353941 | 34968 | 2831536 | 65536 | 323974848 | 2657721 | 0 | 1297791460 | 12076040066392917 | 12076040066590300 | 12076040066815899 | 12076040066883489 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 282 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 259264 | 0x7f46b6ef1600 | 0x7f4586e37400 | 910749 | 910749 | 66814 | 7286000 | 65536 | 874749114 | 7118768 | 0 | 3500850804 | 12076040066912322 | 12076040067111258 | 12076040067708696 | 12076040067777470 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 284 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 264832 | 0x7f46b6ef1500 | 0x7f4586e37440 | 418749 | 418749 | 41143 | 3350000 | 65536 | 385834707 | 3172230 | 0 | 1545237144 | 12076040067812435 | 12076040067999735 | 12076040068268374 | 12076040068334305 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 286 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 266304 | 0x7f46b6ef1400 | 0x7f4586e37480 | 1487405 | 1487405 | 100401 | 11899248 | 65536 | 1448118019 | 11734761 | 0 | 5794292692 | 12076040068358179 | 12076040068555253 | 12076040069539889 | 12076040069607471 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 288 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 28 | 24 | 271872 | 0x7f46b6ef1300 | 0x7f4586e374c0 | 778724 | 778724 | 61133 | 6229800 | 65536 | 740301220 | 6054961 | 0 | 2963055052 | 12076040069638679 | 12076040069839408 | 12076040070353486 | 12076040070420201 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 290 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 277440 | 0x7f46b6ef1a00 | 0x7f4586e37500 | 421332 | 421332 | 39769 | 3370664 | 65536 | 388847980 | 3195123 | 0 | 1557277648 | 12076040070444727 | 12076040070648045 | 12076040070918604 | 12076040070985332 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 292 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 279936 | 0x7f46b6ef1900 | 0x7f4586e37540 | 1128668 | 1128668 | 81730 | 9029352 | 65536 | 1090254836 | 8850361 | 0 | 4362842032 | 12076040071017071 | 12076040071208683 | 12076040071947401 | 12076040072019263 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 294 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 286528 | 0x7f46b6ef1800 | 0x7f4586e37580 | 489364 | 489364 | 43570 | 3914920 | 65536 | 457006460 | 3744426 | 0 | 1829918964 | 12076040072054649 | 12076040072242120 | 12076040072558598 | 12076040072624889 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 296 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 288000 | 0x7f46b6ef1700 | 0x7f4586e375c0 | 1769173 | 1769173 | 47913 | 14153392 | 65536 | 1725176960 | 13980415 | 0 | 6902528292 | 12076040072649124 | 12076040072848197 | 12076040074018433 | 12076040074085553 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 298 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 32 | 24 | 294592 | 0x7f46b6ef1600 | 0x7f4586e37600 | 921821 | 921821 | 66731 | 7374576 | 65536 | 806910370 | 7185413 | 0 | 3229484164 | 12076040074111251 | 12076040074315552 | 12076040074926430 | 12076040074992759 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 300 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 301184 | 0x7f46b6ef1500 | 0x7f4586e37640 | 494956 | 494956 | 44518 | 3959656 | 65536 | 461474066 | 3788888 | 0 | 1847797124 | 12076040075021463 | 12076040075223389 | 12076040075544028 | 12076040075612641 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 302 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 304960 | 0x7f46b6ef1400 | 0x7f4586e37680 | 1337804 | 1337804 | 21438 | 10702440 | 65536 | 1299299561 | 10535104 | 0 | 5199024672 | 12076040075636355 | 12076040075831387 | 12076040076709623 | 12076040076776184 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 304 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 24 | 24 | 313600 | 0x7f46b6ef1300 | 0x7f4586e376c0 | 632413 | 632413 | 55755 | 5059312 | 65536 | 593310764 | 4861437 | 0 | 2375134968 | 12076040076811940 | 12076040076995862 | 12076040077406101 | 12076040077472588 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 306 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 314816 | 0x7f46b6ef1a00 | 0x7f4586e37700 | 2333372 | 2333372 | 150564 | 18666984 | 65536 | 2284383508 | 18496506 | 0 | 9139354464 | 12076040077495811 | 12076040077694100 | 12076040079241294 | 12076040079309111 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 308 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 317568 | 0x7f46b6ef1900 | 0x7f4586e37740 | 1202484 | 1202484 | 85773 | 9619880 | 65536 | 1162895111 | 9448033 | 0 | 4653405256 | 12076040079338095 | 12076040079543373 | 12076040080337930 | 12076040080405529 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 310 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 320320 | 0x7f46b6ef1800 | 0x7f4586e37780 | 641156 | 641156 | 51815 | 5129256 | 65536 | 606988758 | 4961425 | 0 | 2429782584 | 12076040080435795 | 12076040080632969 | 12076040081048008 | 12076040081115528 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 312 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 324096 | 0x7f46b6ef1700 | 0x7f4586e377c0 | 1757917 | 1757917 | 117186 | 14063344 | 65536 | 1714655878 | 13895314 | 0 | 6860448616 | 12076040081146566 | 12076040081336327 | 12076040082494723 | 12076040082561846 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 314 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 328896 | 0x7f46b6ef1600 | 0x7f4586e37800 | 1201173 | 1201173 | 85559 | 9609392 | 65536 | 1162252225 | 9425960 | 0 | 4650836868 | 12076040082600558 | 12076040082782082 | 12076040083567999 | 12076040083638617 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 316 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 330112 | 0x7f46b6ef1500 | 0x7f4586e37840 | 4584461 | 4584461 | 211089 | 36675696 | 65536 | 4515904082 | 36505507 | 0 | 18065438032 | 12076040083670266 | 12076040083866878 | 12076040086917267 | 12076040086985577 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 318 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 332864 | 0x7f46b6ef1400 | 0x7f4586e37880 | 2325957 | 2325957 | 152451 | 18607664 | 65536 | 2275707759 | 18431237 | 0 | 9104656164 | 12076040087021314 | 12076040087221426 | 12076040088762860 | 12076040088832309 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 320 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 335616 | 0x7f46b6ef1300 | 0x7f4586e378c0 | 1203877 | 1203877 | 84087 | 9631024 | 65536 | 1165663987 | 9463618 | 0 | 4664480200 | 12076040088862886 | 12076040089079179 | 12076040089869256 | 12076040089938545 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 322 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 339392 | 0x7f46b6ef1a00 | 0x7f4586e37900 | 3445092 | 3445092 | 216660 | 27560744 | 65536 | 3385931750 | 27384125 | 0 | 13545553880 | 12076040089966658 | 12076040090175015 | 12076040092457087 | 12076040092525754 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 324 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 344192 | 0x7f46b6ef1900 | 0x7f4586e37940 | 2324173 | 2324173 | 150367 | 18593392 | 65536 | 2274813929 | 18419327 | 0 | 9101081164 | 12076040092569946 | 12076040092747326 | 12076040094282680 | 12076040094351026 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 326 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 345408 | 0x7f46b6ef1800 | 0x7f4586e37980 | 9088668 | 9088668 | 545077 | 72709352 | 65536 | 8977963230 | 72531097 | 0 | 35913674548 | 12076040094382514 | 12076040094577879 | 12076040100631297 | 12076040100702480 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 328 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 20 | 24 | 348160 | 0x7f46b6ef1700 | 0x7f4586e379c0 | 4573549 | 4573549 | 212175 | 36588400 | 65536 | 4502216092 | 36414859 | 0 | 18010687144 | 12076040100729830 | 12076040100929856 | 12076040103971765 | 12076040104042537 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 330 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 350912 | 0x7f46b6ef1600 | 0x7f4586e37a00 | 2330860 | 2330860 | 151131 | 18646888 | 65536 | 2281424313 | 18464606 | 0 | 9127522648 | 12076040104072813 | 12076040104272564 | 12076040105812079 | 12076040105879952 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 332 | 906511 | 906516 | 4194304 | 256 | 0 | 0 | 12 | 24 | 0 | 0x7f46b6ef1500 | 0x7f4586e37a40 | 6814372 | 6814372 | 413964 | 54514984 | 65536 | 6720902779 | 54338031 | 0 | 26885436972 | 12076040105910138 | 12076040106108808 | 12076040110635992 | 12076040110712513 |