48 KiB
48 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | SQ_CYCLES | SQ_BUSY_CYCLES | SQ_WAVES | GRBM_COUNT | GRBM_GUI_ACTIVE | CPF_CPF_STAT_BUSY | CPF_CPF_STAT_STALL | CPF_CPF_TCIU_BUSY | CPF_CPF_TCIU_STALL | CPF_CPF_STAT_IDLE | CPF_CPF_TCIU_IDLE | CPF_CMP_UTCL1_STALL_ON_TRANSLATION | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBufferAligned.kd | 0 | 0 | 0 | 162887 | 162887 | 33554432 | 256 | 0 | 0 | 4 | 32 | 4160 | 0x0 | 0x7f0b90204280 | 3066680 | 2977166 | 524288 | 383334 | 383334 | 383334 | 0 | 2896 | 0 | 0 | 380125 | 0 | 16527598245916 | 16530446836344 | 16530447076822 | 16527746399726 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 162887 | 162887 | 32768 | 256 | 0 | 0 | 12 | 24 | 13888 | 0x0 | 0x7f0b90223f80 | 269736 | 161104 | 512 | 33716 | 33716 | 33716 | 0 | 1798 | 0 | 0 | 31489 | 0 | 16527751557277 | 16530451863499 | 16530451877259 | 16527751693352 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 5 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 14336 | 0x7f0baea38380 | 0x7f0b90223fc0 | 1323456 | 1214693 | 65536 | 165431 | 165431 | 165431 | 0 | 1855 | 0 | 0 | 162564 | 0 | 16527751728691 | 16530451947178 | 16530452039817 | 16527752064919 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 8 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15040 | 0x7f0baea38200 | 0x7f0b90224000 | 2475992 | 2366960 | 65536 | 309498 | 309498 | 309498 | 0 | 1846 | 0 | 0 | 309104 | 0 | 16527752103057 | 16530452073097 | 16530452252455 | 16527752473314 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 11 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15488 | 0x7f0baea38080 | 0x7f0b90224040 | 2476240 | 2374199 | 65536 | 309529 | 309529 | 309529 | 0 | 1823 | 0 | 0 | 307591 | 0 | 16527752506633 | 16530452277895 | 16530452458533 | 16527752872929 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 14 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 15936 | 0x7f0bac409f00 | 0x7f0b90224080 | 1325480 | 1216078 | 65536 | 165684 | 165684 | 165684 | 0 | 1786 | 0 | 0 | 162298 | 0 | 16527752905488 | 16530452482533 | 16530452575972 | 16527753178638 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 17 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 16384 | 0x7f0bac409d80 | 0x7f0b902240c0 | 1322120 | 1207782 | 65536 | 165264 | 165264 | 165264 | 0 | 1840 | 0 | 0 | 162240 | 0 | 16527753211147 | 16530452602212 | 16530452694051 | 16527753487487 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 20 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 17088 | 0x7f0bac409c00 | 0x7f0b90224100 | 1303480 | 1198691 | 65536 | 162934 | 162934 | 162934 | 0 | 1837 | 0 | 0 | 162983 | 0 | 16527753532295 | 16530452734051 | 16530452826530 | 16527753792776 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 23 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 16 | 24 | 17792 | 0x7f0bac409a80 | 0x7f0b90224140 | 2476304 | 2369047 | 65536 | 309537 | 309537 | 309537 | 0 | 1829 | 0 | 0 | 308583 | 0 | 16527753825304 | 16530452858690 | 16530453039008 | 16527754190011 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 26 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 16 | 24 | 18496 | 0x7f0bac409900 | 0x7f0b90224180 | 2461080 | 2353703 | 65536 | 307634 | 307634 | 307634 | 0 | 1839 | 0 | 0 | 307574 | 0 | 16527754221890 | 16530453071648 | 16530453251806 | 16527754609176 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 29 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19200 | 0x7f0bac409780 | 0x7f0b902241c0 | 1312632 | 1205018 | 65536 | 164078 | 164078 | 164078 | 0 | 1846 | 0 | 0 | 162029 | 0 | 16527754641315 | 16530453290206 | 16530453383645 | 16527754886496 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 32 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19904 | 0x7f0bac409600 | 0x7f0b90224200 | 1318968 | 1212892 | 65536 | 164870 | 164870 | 164870 | 0 | 1798 | 0 | 0 | 162759 | 0 | 16527754919034 | 16530453412605 | 16530453505404 | 16527755163286 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 35 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 20608 | 0x7f0baea38480 | 0x7f0b90224240 | 1324320 | 1213026 | 65536 | 165539 | 165539 | 165539 | 0 | 1809 | 0 | 0 | 160902 | 0 | 16527755204774 | 16530453537724 | 16530453630203 | 16527755441545 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 38 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 16 | 24 | 21312 | 0x7f0baea38300 | 0x7f0b90224280 | 2480872 | 2371525 | 65536 | 310108 | 310108 | 310108 | 0 | 1891 | 0 | 0 | 307752 | 0 | 16527755474674 | 16530453659643 | 16530453839321 | 16527755806902 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 41 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22016 | 0x7f0baea38180 | 0x7f0b902242c0 | 2509328 | 2391788 | 65536 | 313665 | 313665 | 313665 | 0 | 1804 | 0 | 0 | 305754 | 0 | 16527755839161 | 16530453864921 | 16530454045879 | 16527756173949 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 44 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22720 | 0x7f0baea38000 | 0x7f0b90224300 | 1311040 | 1208082 | 65536 | 163879 | 163879 | 163879 | 0 | 1799 | 0 | 0 | 162021 | 0 | 16527756206877 | 16530454073879 | 16530454166998 | 16527756450309 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 47 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 23424 | 0x7f0bac409e80 | 0x7f0b90224340 | 1316216 | 1207008 | 65536 | 164526 | 164526 | 164526 | 0 | 1839 | 0 | 0 | 161639 | 0 | 16527756481967 | 16530454196118 | 16530454290197 | 16527756739068 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 50 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 24128 | 0x7f0bac409d00 | 0x7f0b90224380 | 1305520 | 1197448 | 65536 | 163189 | 163189 | 163189 | 0 | 1844 | 0 | 0 | 160576 | 0 | 16527756779836 | 16530454325077 | 16530454418836 | 16527757015328 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 53 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 16 | 24 | 24832 | 0x7f0bac409b80 | 0x7f0b902243c0 | 2472464 | 2362078 | 65536 | 309057 | 309057 | 309057 | 0 | 1829 | 0 | 0 | 306159 | 0 | 16527757048047 | 16530454474036 | 16530454653874 | 16527757383154 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 56 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 16 | 24 | 25536 | 0x7f0bac409a00 | 0x7f0b90224400 | 2442224 | 2333727 | 65536 | 305277 | 305277 | 305277 | 0 | 1868 | 0 | 0 | 305798 | 0 | 16527757415683 | 16530454687154 | 16530454866512 | 16527757747371 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 59 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26240 | 0x7f0bac409880 | 0x7f0b90224440 | 1308528 | 1202335 | 65536 | 163565 | 163565 | 163565 | 0 | 1874 | 0 | 0 | 160937 | 0 | 16527757779380 | 16530454919472 | 16530455012591 | 16527758025281 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 62 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26944 | 0x7f0bac409700 | 0x7f0b90224480 | 1306448 | 1203391 | 65536 | 163305 | 163305 | 163305 | 0 | 1893 | 0 | 0 | 162104 | 0 | 16527758057670 | 16530455038351 | 16530455131150 | 16527758309631 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 65 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 27648 | 0x7f0baea38580 | 0x7f0b902244c0 | 1308584 | 1207858 | 65536 | 163572 | 163572 | 163572 | 0 | 1833 | 0 | 0 | 161344 | 0 | 16527758349409 | 16530455165069 | 16530455257229 | 16527758588150 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 68 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 16 | 24 | 28608 | 0x7f0baea38400 | 0x7f0b90224500 | 2460280 | 2355799 | 65536 | 307534 | 307534 | 307534 | 0 | 1839 | 0 | 0 | 305420 | 0 | 16527758620939 | 16530455282508 | 16530455461387 | 16527758951537 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 71 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 16 | 24 | 29312 | 0x7f0baea38280 | 0x7f0b90224540 | 2444952 | 2343679 | 65536 | 305618 | 305618 | 305618 | 0 | 1853 | 0 | 0 | 305982 | 0 | 16527758984176 | 16530455488107 | 16530455667465 | 16527759322073 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 74 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 30016 | 0x7f0baea38100 | 0x7f0b90224580 | 1300344 | 1192507 | 65536 | 162542 | 162542 | 162542 | 0 | 1887 | 0 | 0 | 160334 | 0 | 16527759355632 | 16530455690985 | 16530455782664 | 16527759598493 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 77 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 30976 | 0x7f0bac409f80 | 0x7f0b902245c0 | 1289448 | 1188425 | 65536 | 161180 | 161180 | 161180 | 0 | 1844 | 0 | 0 | 160308 | 0 | 16527759631602 | 16530455807464 | 16530455899143 | 16527759875793 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 80 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 31680 | 0x7f0bac409e00 | 0x7f0b90224600 | 1296104 | 1192899 | 65536 | 162012 | 162012 | 162012 | 0 | 1824 | 0 | 0 | 161086 | 0 | 16527759914982 | 16530455931463 | 16530456024582 | 16527760156483 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 83 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 16 | 24 | 32640 | 0x7f0bac409c80 | 0x7f0b90224640 | 2450184 | 2339341 | 65536 | 306272 | 306272 | 306272 | 0 | 1813 | 0 | 0 | 304030 | 0 | 16527760190302 | 16530456050022 | 16530456229860 | 16527760521369 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 86 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 33600 | 0x7f0bac409b00 | 0x7f0b90224680 | 2457592 | 2354784 | 65536 | 307198 | 307198 | 307198 | 0 | 1826 | 0 | 0 | 304150 | 0 | 16527760553698 | 16530456253860 | 16530456434338 | 16527760887146 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 89 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 34560 | 0x7f0bac409980 | 0x7f0b902246c0 | 1311768 | 1205947 | 65536 | 163970 | 163970 | 163970 | 0 | 1795 | 0 | 0 | 162588 | 0 | 16527760919575 | 16530456458658 | 16530456552897 | 16527761164756 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 92 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 35520 | 0x7f0bac409800 | 0x7f0b90224700 | 1305640 | 1201221 | 65536 | 163204 | 163204 | 163204 | 0 | 1788 | 0 | 0 | 161868 | 0 | 16527761197965 | 16530456578977 | 16530456673216 | 16527761458495 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 95 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 36224 | 0x7f0bac409680 | 0x7f0b90224740 | 1300232 | 1199431 | 65536 | 162528 | 162528 | 162528 | 0 | 1829 | 0 | 0 | 160539 | 0 | 16527761498824 | 16530456705216 | 16530456798335 | 16527761745634 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 98 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 37184 | 0x7f0baea38500 | 0x7f0b90224780 | 2453672 | 2347680 | 65536 | 306708 | 306708 | 306708 | 0 | 1817 | 0 | 0 | 304868 | 0 | 16527761778523 | 16530456823295 | 16530457004253 | 16527762111921 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 101 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 38144 | 0x7f0baea38380 | 0x7f0b902247c0 | 2446056 | 2341406 | 65536 | 305756 | 305756 | 305756 | 0 | 1755 | 0 | 0 | 303816 | 0 | 16527762145770 | 16530457029533 | 16530457210971 | 16527762482368 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 104 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 39104 | 0x7f0baea38200 | 0x7f0b90224800 | 1300024 | 1196357 | 65536 | 162502 | 162502 | 162502 | 0 | 1817 | 0 | 0 | 160414 | 0 | 16527762514946 | 16530457236571 | 16530457330170 | 16527762760747 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 107 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 40320 | 0x7f0baea38080 | 0x7f0b90224840 | 1314040 | 1207009 | 65536 | 164254 | 164254 | 164254 | 0 | 1799 | 0 | 0 | 160456 | 0 | 16527762794086 | 16530457355610 | 16530457450489 | 16527763040557 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 110 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 41280 | 0x7f0bac409f00 | 0x7f0b90224880 | 1299352 | 1198763 | 65536 | 162418 | 162418 | 162418 | 0 | 1801 | 0 | 0 | 160295 | 0 | 16527763080456 | 16530457483129 | 16530457576888 | 16527763321727 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 113 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 42240 | 0x7f0bac409d80 | 0x7f0b902248c0 | 2456408 | 2346224 | 65536 | 307050 | 307050 | 307050 | 0 | 1801 | 0 | 0 | 305336 | 0 | 16527763354456 | 16530457602968 | 16530457781686 | 16527763689323 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 116 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 43200 | 0x7f0bac409c00 | 0x7f0b90224900 | 2434344 | 2329888 | 65536 | 304292 | 304292 | 304292 | 0 | 1808 | 0 | 0 | 302129 | 0 | 16527763722262 | 16530457806326 | 16530457987124 | 16527764053390 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 119 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 44160 | 0x7f0bac409a80 | 0x7f0b90224940 | 1302520 | 1202231 | 65536 | 162814 | 162814 | 162814 | 0 | 1812 | 0 | 0 | 160737 | 0 | 16527764085569 | 16530458013204 | 16530458107923 | 16527764335470 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 122 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 45376 | 0x7f0bac409900 | 0x7f0b90224980 | 1308584 | 1200679 | 65536 | 163572 | 163572 | 163572 | 0 | 1807 | 0 | 0 | 162576 | 0 | 16527764368019 | 16530458131443 | 16530458226162 | 16527764614420 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 125 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 46336 | 0x7f0bac409780 | 0x7f0b902249c0 | 1293928 | 1191852 | 65536 | 161740 | 161740 | 161740 | 0 | 1798 | 0 | 0 | 159745 | 0 | 16527764654558 | 16530458258802 | 16530458351761 | 16527764897509 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 128 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 47552 | 0x7f0bac409600 | 0x7f0b90224a00 | 2439496 | 2332363 | 65536 | 304936 | 304936 | 304936 | 0 | 1811 | 0 | 0 | 305021 | 0 | 16527764930478 | 16530458375921 | 16530458555279 | 16527765262346 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 131 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 48512 | 0x7f0baea38480 | 0x7f0b90224a40 | 2444888 | 2335426 | 65536 | 305610 | 305610 | 305610 | 0 | 1861 | 0 | 0 | 302787 | 0 | 16527765295745 | 16530458580239 | 16530458761997 | 16527765629273 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 134 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 49472 | 0x7f0baea38300 | 0x7f0b90224a80 | 1287416 | 1182582 | 65536 | 160926 | 160926 | 160926 | 0 | 1776 | 0 | 0 | 160123 | 0 | 16527765661981 | 16530458786477 | 16530458879116 | 16527765908292 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 137 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 50688 | 0x7f0baea38180 | 0x7f0b90224ac0 | 1298208 | 1188991 | 65536 | 162275 | 162275 | 162275 | 0 | 1792 | 0 | 0 | 160392 | 0 | 16527765940891 | 16530458904396 | 16530458998315 | 16527766198182 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 140 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 51648 | 0x7f0baea38000 | 0x7f0b90224b00 | 1287728 | 1183940 | 65536 | 160965 | 160965 | 160965 | 0 | 1772 | 0 | 0 | 158806 | 0 | 16527766239560 | 16530459030315 | 16530459122314 | 16527766489251 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 143 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 52864 | 0x7f0bac409e80 | 0x7f0b90224b40 | 2424144 | 2312053 | 65536 | 303017 | 303017 | 303017 | 0 | 1802 | 0 | 0 | 300809 | 0 | 16527766521670 | 16530459149834 | 16530459328872 | 16527766853648 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 146 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 54080 | 0x7f0bac409d00 | 0x7f0b90224b80 | 2436288 | 2330086 | 65536 | 304535 | 304535 | 304535 | 0 | 1784 | 0 | 0 | 301592 | 0 | 16527766886827 | 16530459353672 | 16530459532871 | 16527767221304 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 149 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 55296 | 0x7f0bac409b80 | 0x7f0b90224bc0 | 1305192 | 1195223 | 65536 | 163148 | 163148 | 163148 | 0 | 1784 | 0 | 0 | 159731 | 0 | 16527767253303 | 16530459556070 | 16530459649670 | 16527767501664 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 152 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 56768 | 0x7f0bac409a00 | 0x7f0b90224c00 | 1301600 | 1196100 | 65536 | 162699 | 162699 | 162699 | 0 | 1808 | 0 | 0 | 160722 | 0 | 16527767534403 | 16530459674309 | 16530459770308 | 16527767783294 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 155 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 57728 | 0x7f0bac409880 | 0x7f0b90224c40 | 1296672 | 1185038 | 65536 | 162083 | 162083 | 162083 | 0 | 1786 | 0 | 0 | 160135 | 0 | 16527767822372 | 16530459803268 | 16530459897347 | 16527768067613 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 158 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 58944 | 0x7f0bac409700 | 0x7f0b90224c80 | 2424752 | 2313843 | 65536 | 303093 | 303093 | 303093 | 0 | 1808 | 0 | 0 | 300082 | 0 | 16527768101762 | 16530459921987 | 16530460102466 | 16527768435460 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 161 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 60160 | 0x7f0baea38580 | 0x7f0b90224cc0 | 2406624 | 2298964 | 65536 | 300827 | 300827 | 300827 | 0 | 1878 | 0 | 0 | 299240 | 0 | 16527768468869 | 16530460127585 | 16530460309344 | 16527768801597 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 164 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 61376 | 0x7f0baea38400 | 0x7f0b90224d00 | 1298144 | 1196196 | 65536 | 162267 | 162267 | 162267 | 0 | 1815 | 0 | 0 | 160829 | 0 | 16527768834105 | 16530460335743 | 16530460430463 | 16527769084166 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 167 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 62848 | 0x7f0baea38280 | 0x7f0b90224d40 | 1357536 | 1249293 | 65536 | 169691 | 169691 | 169691 | 0 | 1836 | 0 | 0 | 166679 | 0 | 16527769116815 | 16530460457182 | 16530460556221 | 16527769368206 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 170 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 63808 | 0x7f0baea38100 | 0x7f0b90224d80 | 1297296 | 1193160 | 65536 | 162161 | 162161 | 162161 | 0 | 1891 | 0 | 0 | 159698 | 0 | 16527769408555 | 16530460588381 | 16530460681500 | 16527769651296 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 173 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 65024 | 0x7f0bac409f80 | 0x7f0b90224dc0 | 2386576 | 2281395 | 65536 | 298321 | 298321 | 298321 | 0 | 1812 | 0 | 0 | 297798 | 0 | 16527769684994 | 16530460709660 | 16530460888539 | 16527770014252 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 176 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 66240 | 0x7f0bac409e00 | 0x7f0b90224e00 | 2398512 | 2284666 | 65536 | 299813 | 299813 | 299813 | 0 | 1784 | 0 | 0 | 300425 | 0 | 16527770046771 | 16530460914458 | 16530461092697 | 16527770392109 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 179 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 67456 | 0x7f0bac409c80 | 0x7f0b90224e40 | 1313760 | 1198132 | 65536 | 164219 | 164219 | 164219 | 0 | 1838 | 0 | 0 | 161229 | 0 | 16527770424597 | 16530461119416 | 16530461213496 | 16527770674658 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 182 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 68928 | 0x7f0bac409b00 | 0x7f0b90224e80 | 1410672 | 1306123 | 65536 | 176333 | 176333 | 176333 | 0 | 1797 | 0 | 0 | 173724 | 0 | 16527770707057 | 16530461238135 | 16530461343254 | 16527770963248 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 185 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 70144 | 0x7f0bac409980 | 0x7f0b90224ec0 | 1292032 | 1179503 | 65536 | 161503 | 161503 | 161503 | 0 | 1866 | 0 | 0 | 159945 | 0 | 16527771002916 | 16530461375094 | 16530461468053 | 16527771242777 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 188 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 71616 | 0x7f0bac409800 | 0x7f0b90224f00 | 2418552 | 2313489 | 65536 | 302318 | 302318 | 302318 | 0 | 1875 | 0 | 0 | 299507 | 0 | 16527771274496 | 16530461493653 | 16530461673812 | 16527771613604 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 191 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 72832 | 0x7f0bac409680 | 0x7f0b90224f40 | 2426008 | 2318916 | 65536 | 303250 | 303250 | 303250 | 0 | 1845 | 0 | 0 | 299777 | 0 | 16527771645493 | 16530461700051 | 16530461880050 | 16527771980790 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 194 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 74048 | 0x7f0baea38500 | 0x7f0b90224f80 | 1297888 | 1193606 | 65536 | 162235 | 162235 | 162235 | 0 | 1835 | 0 | 0 | 159013 | 0 | 16527772013349 | 16530461904849 | 16530461998769 | 16527772260420 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 197 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 75776 | 0x7f0baea38380 | 0x7f0b90224fc0 | 1453968 | 1346302 | 65536 | 181745 | 181745 | 181745 | 0 | 1803 | 0 | 0 | 180334 | 0 | 16527772292939 | 16530462024528 | 16530462131727 | 16527772552389 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 200 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 76992 | 0x7f0baea38200 | 0x7f0b90225000 | 1287360 | 1182226 | 65536 | 160919 | 160919 | 160919 | 0 | 1868 | 0 | 0 | 158917 | 0 | 16527772605657 | 16530462166447 | 16530462259726 | 16527772834279 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 203 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 78464 | 0x7f0baea38080 | 0x7f0b90225040 | 2422880 | 2310198 | 65536 | 302859 | 302859 | 302859 | 0 | 1845 | 0 | 0 | 300272 | 0 | 16527772867338 | 16530462283726 | 16530462462925 | 16527773203295 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 206 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 79936 | 0x7f0bac409f00 | 0x7f0b90225080 | 2394384 | 2285990 | 65536 | 299297 | 299297 | 299297 | 0 | 1862 | 0 | 0 | 298327 | 0 | 16527773236424 | 16530462486604 | 16530462666283 | 16527773566512 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 209 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 81408 | 0x7f0bac409d80 | 0x7f0b902250c0 | 1295656 | 1192793 | 65536 | 161956 | 161956 | 161956 | 0 | 1819 | 0 | 0 | 161158 | 0 | 16527773599161 | 16530462690603 | 16530462785322 | 16527773845552 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 212 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 83136 | 0x7f0bac409c00 | 0x7f0b90225100 | 1536336 | 1424919 | 65536 | 192041 | 192041 | 192041 | 0 | 1836 | 0 | 0 | 190563 | 0 | 16527773877371 | 16530462808521 | 16530462923400 | 16527774148981 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 215 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 84352 | 0x7f0bac409a80 | 0x7f0b90225140 | 1288256 | 1185834 | 65536 | 161031 | 161031 | 161031 | 0 | 1773 | 0 | 0 | 159140 | 0 | 16527774188629 | 16530462968040 | 16530463061959 | 16527774443830 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 218 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 85824 | 0x7f0bac409900 | 0x7f0b90225180 | 2400184 | 2288339 | 65536 | 300022 | 300022 | 300022 | 0 | 1866 | 0 | 0 | 299372 | 0 | 16527774476389 | 16530463087399 | 16530463266917 | 16527774811556 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 221 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 87296 | 0x7f0bac409780 | 0x7f0b902251c0 | 2390472 | 2279329 | 65536 | 298808 | 298808 | 298808 | 0 | 1819 | 0 | 0 | 298097 | 0 | 16527774844695 | 16530463291877 | 16530463471236 | 16527775176203 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 224 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 88768 | 0x7f0bac409600 | 0x7f0b90225200 | 1301256 | 1194316 | 65536 | 162656 | 162656 | 162656 | 0 | 1833 | 0 | 0 | 161632 | 0 | 16527775208692 | 16530463495875 | 16530463590755 | 16527775456633 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 227 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 90752 | 0x7f0baea38480 | 0x7f0b90225240 | 1616728 | 1499954 | 65536 | 202090 | 202090 | 202090 | 0 | 1791 | 0 | 0 | 201367 | 0 | 16527775489042 | 16530463618114 | 16530463738753 | 16527775762582 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 230 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 91968 | 0x7f0baea38300 | 0x7f0b90225280 | 1295312 | 1190255 | 65536 | 161913 | 161913 | 161913 | 0 | 1772 | 0 | 0 | 159839 | 0 | 16527775802350 | 16530463771073 | 16530463864032 | 16527776054071 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 233 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 93440 | 0x7f0baea38180 | 0x7f0b902252c0 | 2414648 | 2310990 | 65536 | 301830 | 301830 | 301830 | 0 | 1784 | 0 | 0 | 302991 | 0 | 16527776086020 | 16530463889952 | 16530464069470 | 16527776427767 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 236 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 94912 | 0x7f0baea38000 | 0x7f0b90225300 | 2385728 | 2281424 | 65536 | 298215 | 298215 | 298215 | 0 | 1841 | 0 | 0 | 297111 | 0 | 16527776461236 | 16530464096030 | 16530464275868 | 16527776794504 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 239 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 96384 | 0x7f0bac409e80 | 0x7f0b90225340 | 1296992 | 1190427 | 65536 | 162123 | 162123 | 162123 | 0 | 1850 | 0 | 0 | 160782 | 0 | 16527776827563 | 16530464301468 | 16530464396827 | 16527777075374 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 242 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 98368 | 0x7f0bac409d00 | 0x7f0b90225380 | 1683792 | 1571279 | 65536 | 210473 | 210473 | 210473 | 0 | 1821 | 0 | 0 | 207574 | 0 | 16527777107853 | 16530464421947 | 16530464548986 | 16527777387122 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 245 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 99584 | 0x7f0bac409b80 | 0x7f0b902253c0 | 1283648 | 1181286 | 65536 | 160455 | 160455 | 160455 | 0 | 1806 | 0 | 0 | 159244 | 0 | 16527777426691 | 16530464582586 | 16530464676665 | 16527777665992 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 248 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 101312 | 0x7f0bac409a00 | 0x7f0b90225400 | 2389984 | 2282523 | 65536 | 298747 | 298747 | 298747 | 0 | 1816 | 0 | 0 | 294791 | 0 | 16527777697341 | 16530464702105 | 16530464882263 | 16527778028879 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 251 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 102784 | 0x7f0bac409880 | 0x7f0b90225440 | 2421584 | 2307908 | 65536 | 302697 | 302697 | 302697 | 0 | 1789 | 0 | 0 | 296888 | 0 | 16527778061538 | 16530464907863 | 16530465087701 | 16527778402345 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 254 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 104256 | 0x7f0bac409700 | 0x7f0b90225480 | 1287152 | 1183733 | 65536 | 160893 | 160893 | 160893 | 0 | 1787 | 0 | 0 | 158991 | 0 | 16527778434734 | 16530465112341 | 16530465206260 | 16527778683165 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 257 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 106240 | 0x7f0baea38580 | 0x7f0b902254c0 | 1754744 | 1638861 | 65536 | 219342 | 219342 | 219342 | 0 | 1786 | 0 | 0 | 217257 | 0 | 16527778715344 | 16530465232180 | 16530465362259 | 16527779002903 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 260 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 107712 | 0x7f0baea38400 | 0x7f0b90225500 | 1282096 | 1179182 | 65536 | 160261 | 160261 | 160261 | 0 | 1761 | 0 | 0 | 159506 | 0 | 16527779043492 | 16530465394578 | 16530465487698 | 16527779287273 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 263 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 16 | 24 | 109440 | 0x7f0baea38280 | 0x7f0b90225540 | 2379576 | 2277689 | 65536 | 297446 | 297446 | 297446 | 0 | 1807 | 0 | 0 | 297971 | 0 | 16527779320172 | 16530465514257 | 16530465693456 | 16527779654660 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 266 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 16 | 24 | 111168 | 0x7f0baea38100 | 0x7f0b90225580 | 2397664 | 2284647 | 65536 | 299707 | 299707 | 299707 | 0 | 1814 | 0 | 0 | 296523 | 0 | 16527779687118 | 16530465718256 | 16530465897774 | 16527780031636 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 269 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 112896 | 0x7f0bac409f80 | 0x7f0b902255c0 | 1301800 | 1199807 | 65536 | 162724 | 162724 | 162724 | 0 | 1797 | 0 | 0 | 160398 | 0 | 16527780064185 | 16530465922094 | 16530466017773 | 16527780314845 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 272 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 115136 | 0x7f0bac409e00 | 0x7f0b90225600 | 1825088 | 1719381 | 65536 | 228135 | 228135 | 228135 | 0 | 1821 | 0 | 0 | 227100 | 0 | 16527780346624 | 16530466042093 | 16530466181291 | 16527780636264 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 275 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 116608 | 0x7f0bac409c80 | 0x7f0b90225640 | 1297176 | 1193665 | 65536 | 162146 | 162146 | 162146 | 0 | 1824 | 0 | 0 | 159182 | 0 | 16527780675982 | 16530466214411 | 16530466308650 | 16527780922033 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 278 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 16 | 24 | 118336 | 0x7f0bac409b00 | 0x7f0b90225680 | 2373512 | 2266418 | 65536 | 296688 | 296688 | 296688 | 0 | 1847 | 0 | 0 | 296044 | 0 | 16527780954212 | 16530466332650 | 16530466512009 | 16527781288170 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 281 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 16 | 24 | 120064 | 0x7f0bac409980 | 0x7f0b902256c0 | 2370520 | 2267903 | 65536 | 296314 | 296314 | 296314 | 0 | 1800 | 0 | 0 | 295087 | 0 | 16527781320699 | 16530466536488 | 16530466716007 | 16527781655177 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 284 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 121792 | 0x7f0bac409800 | 0x7f0b90225700 | 1302136 | 1199830 | 65536 | 162766 | 162766 | 162766 | 0 | 1824 | 0 | 0 | 162654 | 0 | 16527781687785 | 16530466740167 | 16530466836966 | 16527781936346 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 287 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 124032 | 0x7f0bac409680 | 0x7f0b90225740 | 1907560 | 1801246 | 65536 | 238444 | 238444 | 238444 | 0 | 1803 | 0 | 0 | 236070 | 0 | 16527781968525 | 16530466862085 | 16530467007364 | 16527782283894 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 290 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 125504 | 0x7f0baea38500 | 0x7f0b90225780 | 1291816 | 1182027 | 65536 | 161476 | 161476 | 161476 | 0 | 1810 | 0 | 0 | 159400 | 0 | 16527782324192 | 16530467039364 | 16530467134883 | 16527782574353 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 293 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 127488 | 0x7f0baea38380 | 0x7f0b902257c0 | 2385432 | 2278586 | 65536 | 298178 | 298178 | 298178 | 0 | 1778 | 0 | 0 | 295898 | 0 | 16527782607442 | 16530467160003 | 16530467339841 | 16527782940589 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 296 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 129216 | 0x7f0baea38200 | 0x7f0b90225800 | 2398312 | 2274055 | 65536 | 299788 | 299788 | 299788 | 0 | 1793 | 0 | 0 | 296195 | 0 | 16527782973558 | 16530467365441 | 16530467546399 | 16527783304896 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 299 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 130944 | 0x7f0baea38080 | 0x7f0b90225840 | 1363800 | 1257931 | 65536 | 170474 | 170474 | 170474 | 0 | 1787 | 0 | 0 | 168651 | 0 | 16527783337305 | 16530467570719 | 16530467670558 | 16527783593535 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 302 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 133440 | 0x7f0bac409f00 | 0x7f0b90225880 | 2074040 | 1959043 | 65536 | 259254 | 259254 | 259254 | 0 | 1814 | 0 | 0 | 257540 | 0 | 16527783625904 | 16530467694398 | 16530467849757 | 16527783940403 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 305 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 134912 | 0x7f0bac409d80 | 0x7f0b902258c0 | 1286080 | 1185026 | 65536 | 160759 | 160759 | 160759 | 0 | 1773 | 0 | 0 | 159913 | 0 | 16527783980611 | 16530467882716 | 16530467977116 | 16527784223562 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 308 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 136896 | 0x7f0bac409c00 | 0x7f0b90225900 | 2371760 | 2264359 | 65536 | 296469 | 296469 | 296469 | 0 | 1777 | 0 | 0 | 295219 | 0 | 16527784255221 | 16530468001435 | 16530468180634 | 16527784589799 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 311 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 138880 | 0x7f0bac409a80 | 0x7f0b90225940 | 2367600 | 2264092 | 65536 | 295949 | 295949 | 295949 | 0 | 1800 | 0 | 0 | 293234 | 0 | 16527784621608 | 16530468205913 | 16530468386072 | 16527784953836 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 314 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 140864 | 0x7f0bac409900 | 0x7f0b90225980 | 1427304 | 1318763 | 65536 | 178412 | 178412 | 178412 | 0 | 1833 | 0 | 0 | 176831 | 0 | 16527784986194 | 16530468411032 | 16530468518551 | 16527785244245 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 317 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 143616 | 0x7f0bac409780 | 0x7f0b902259c0 | 2244928 | 2132382 | 65536 | 280615 | 280615 | 280615 | 0 | 1878 | 0 | 0 | 278647 | 0 | 16527785276814 | 16530468542870 | 16530468712789 | 16527785602102 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 320 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 145344 | 0x7f0bac409600 | 0x7f0b90225a00 | 1289632 | 1183163 | 65536 | 161203 | 161203 | 161203 | 0 | 1787 | 0 | 0 | 160928 | 0 | 16527785641970 | 16530468745749 | 16530468841908 | 16527785886321 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 323 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 147584 | 0x7f0baea38480 | 0x7f0b90225a40 | 2375648 | 2261798 | 65536 | 296955 | 296955 | 296955 | 0 | 1890 | 0 | 0 | 294213 | 0 | 16527785918370 | 16530468866548 | 16530469046866 | 16527786252948 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 326 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 149568 | 0x7f0baea38300 | 0x7f0b90225a80 | 2359552 | 2251601 | 65536 | 294943 | 294943 | 294943 | 0 | 1788 | 0 | 0 | 295469 | 0 | 16527786285807 | 16530469071346 | 16530469251504 | 16527786626104 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 329 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 151552 | 0x7f0baea38180 | 0x7f0b90225ac0 | 1498584 | 1378958 | 65536 | 187322 | 187322 | 187322 | 0 | 1830 | 0 | 0 | 186225 | 0 | 16527786658603 | 16530469275184 | 16530469385263 | 16527786924873 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 332 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 154304 | 0x7f0baea38000 | 0x7f0b90225b00 | 2397552 | 2284041 | 65536 | 299693 | 299693 | 299693 | 0 | 1826 | 0 | 0 | 297343 | 0 | 16527786957782 | 16530469408783 | 16530469590221 | 16527787292000 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 335 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 156288 | 0x7f0bac409e80 | 0x7f0b90225b40 | 1374632 | 1268032 | 65536 | 171828 | 171828 | 171828 | 0 | 1844 | 0 | 0 | 170310 | 0 | 16527787331559 | 16530469622701 | 16530469723980 | 16527787585029 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 338 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 158784 | 0x7f0bac409d00 | 0x7f0b90225b80 | 2383088 | 2258516 | 65536 | 297885 | 297885 | 297885 | 0 | 1823 | 0 | 0 | 298581 | 0 | 16527787617228 | 16530469748620 | 16530469931818 | 16527787952116 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 341 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 161024 | 0x7f0bac409b80 | 0x7f0b90225bc0 | 2386792 | 2267076 | 65536 | 298348 | 298348 | 298348 | 0 | 1815 | 0 | 0 | 296575 | 0 | 16527787984905 | 16530469957738 | 16530470144616 | 16527788324332 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 344 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 163264 | 0x7f0bac409a00 | 0x7f0b90225c00 | 1649936 | 1537583 | 65536 | 206241 | 206241 | 206241 | 0 | 1789 | 0 | 0 | 204270 | 0 | 16527788356881 | 16530470170216 | 16530470292935 | 16527788633861 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 347 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 166528 | 0x7f0bac409880 | 0x7f0b90225c40 | 2716352 | 2606837 | 65536 | 339543 | 339543 | 339543 | 0 | 1791 | 0 | 0 | 338883 | 0 | 16527788666410 | 16530470320935 | 16530470527653 | 16527789073955 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 350 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 168512 | 0x7f0bac409700 | 0x7f0b90225c80 | 1468272 | 1359603 | 65536 | 183533 | 183533 | 183533 | 0 | 1786 | 0 | 0 | 182122 | 0 | 16527789092584 | 16530470601412 | 16530470712931 | 16527789362094 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 353 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 171264 | 0x7f0baea38580 | 0x7f0b90225cc0 | 2375816 | 2233445 | 65536 | 296976 | 296976 | 296976 | 0 | 1822 | 0 | 0 | 296645 | 0 | 16527789397663 | 16530470738531 | 16530470923649 | 16527789731861 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 356 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 173760 | 0x7f0baea38400 | 0x7f0b90225d00 | 2392576 | 2252765 | 65536 | 299071 | 299071 | 299071 | 0 | 1838 | 0 | 0 | 296127 | 0 | 16527789765360 | 16530470947969 | 16530471133407 | 16527790102707 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 359 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 176256 | 0x7f0baea38280 | 0x7f0b90225d40 | 1803048 | 1691804 | 65536 | 225380 | 225380 | 225380 | 0 | 1824 | 0 | 0 | 222786 | 0 | 16527790135636 | 16530471159647 | 16530471294846 | 16527790426805 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 362 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 179776 | 0x7f0baea38100 | 0x7f0b90225d80 | 3055616 | 2938337 | 65536 | 381951 | 381951 | 381951 | 0 | 1817 | 0 | 0 | 378293 | 0 | 16527790459204 | 16530471318846 | 16530471551164 | 16527790891388 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 365 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 182272 | 0x7f0bac409f80 | 0x7f0b90225dc0 | 1690168 | 1577915 | 65536 | 211270 | 211270 | 211270 | 0 | 1824 | 0 | 0 | 208784 | 0 | 16527790910098 | 16530471622043 | 16530471748762 | 16527791192417 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 368 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 185536 | 0x7f0bac409e00 | 0x7f0b90225e00 | 2440592 | 2320828 | 65536 | 305073 | 305073 | 305073 | 0 | 1807 | 0 | 0 | 302885 | 0 | 16527791224616 | 16530471775802 | 16530471967800 | 16527791566194 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 371 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 188544 | 0x7f0bac409c80 | 0x7f0b90225e40 | 2474400 | 2345412 | 65536 | 309299 | 309299 | 309299 | 0 | 1836 | 0 | 0 | 307924 | 0 | 16527791597993 | 16530471992440 | 16530472186358 | 16527791993628 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 374 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 191552 | 0x7f0bac409b00 | 0x7f0b90225e80 | 2126368 | 2016579 | 65536 | 265795 | 265795 | 265795 | 0 | 1785 | 0 | 0 | 263556 | 0 | 16527792004488 | 16530472209238 | 16530472369716 | 16527792326546 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 377 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 195840 | 0x7f0bac409980 | 0x7f0b90225ec0 | 3694000 | 3578192 | 65536 | 461749 | 461749 | 461749 | 0 | 1836 | 0 | 0 | 459963 | 0 | 16527792359445 | 16530472394036 | 16530472677554 | 16527792838327 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 380 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 198592 | 0x7f0bac409800 | 0x7f0b90225f00 | 1944848 | 1835500 | 65536 | 243105 | 243105 | 243105 | 0 | 1860 | 0 | 0 | 240909 | 0 | 16527792856837 | 16530472748433 | 16530472897232 | 16527793159766 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 383 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 202368 | 0x7f0bac409680 | 0x7f0b90225f40 | 2675128 | 2557948 | 65536 | 334390 | 334390 | 334390 | 0 | 1824 | 0 | 0 | 332608 | 0 | 16527793194324 | 16530472923472 | 16530473131150 | 16527793596190 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 386 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 205888 | 0x7f0baea38500 | 0x7f0b90225f80 | 2649760 | 2524762 | 65536 | 331219 | 331219 | 331219 | 0 | 1842 | 0 | 0 | 329142 | 0 | 16527793606679 | 16530473170189 | 16530473380908 | 16527793994145 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 389 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 209408 | 0x7f0baea38380 | 0x7f0b90225fc0 | 2455760 | 2345421 | 65536 | 306969 | 306969 | 306969 | 0 | 1824 | 0 | 0 | 304352 | 0 | 16527794010085 | 16530473415787 | 16530473602186 | 16527794360492 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 392 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 214464 | 0x7f0baea38200 | 0x7f0b90226000 | 4337792 | 4224233 | 65536 | 542223 | 542223 | 542223 | 0 | 1788 | 0 | 0 | 540587 | 0 | 16527794394110 | 16530473626985 | 16530473961702 | 16527794924281 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 395 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 217728 | 0x7f0baea38080 | 0x7f0b90226040 | 2171880 | 2063261 | 65536 | 271484 | 271484 | 271484 | 0 | 1828 | 0 | 0 | 269025 | 0 | 16527794943060 | 16530474005862 | 16530474174500 | 16527795270488 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 398 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 222016 | 0x7f0bac409f00 | 0x7f0b90226080 | 2949160 | 2828767 | 65536 | 368644 | 368644 | 368644 | 0 | 1817 | 0 | 0 | 363855 | 0 | 16527795303537 | 16530474199620 | 16530474426338 | 16527795733491 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 401 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 226048 | 0x7f0bac409d80 | 0x7f0b902260c0 | 2935368 | 2815450 | 65536 | 366920 | 366920 | 366920 | 0 | 1765 | 0 | 0 | 367015 | 0 | 16527795743551 | 16530474466658 | 16530474695456 | 16527796181145 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 404 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 230080 | 0x7f0bac409c00 | 0x7f0b90226100 | 2784136 | 2669586 | 65536 | 348016 | 348016 | 348016 | 0 | 1804 | 0 | 0 | 344553 | 0 | 16527796191305 | 16530474754815 | 16530474966493 | 16527796609679 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 407 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 235904 | 0x7f0bac409a80 | 0x7f0b90226140 | 4992792 | 4873732 | 65536 | 624098 | 624098 | 624098 | 0 | 1796 | 0 | 0 | 622147 | 0 | 16527796620189 | 16530475025533 | 16530475411289 | 16527797212437 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 410 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 239424 | 0x7f0bac409900 | 0x7f0b90226180 | 2427656 | 2317986 | 65536 | 303456 | 303456 | 303456 | 0 | 1774 | 0 | 0 | 300673 | 0 | 16527797230476 | 16530475482009 | 16530475668887 | 16527797571884 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 413 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 244224 | 0x7f0bac409780 | 0x7f0b902261c0 | 3181192 | 3068821 | 65536 | 397648 | 397648 | 397648 | 0 | 1770 | 0 | 0 | 394960 | 0 | 16527797605443 | 16530475693527 | 16530475944405 | 16527798024897 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 416 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 248768 | 0x7f0bac409600 | 0x7f0b90226200 | 3140152 | 3034625 | 65536 | 392518 | 392518 | 392518 | 0 | 1792 | 0 | 0 | 389147 | 0 | 16527798038677 | 16530476002804 | 16530476251442 | 16527798503830 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 419 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 253312 | 0x7f0baea38480 | 0x7f0b90226240 | 3099872 | 2987292 | 65536 | 387483 | 387483 | 387483 | 0 | 1769 | 0 | 0 | 386874 | 0 | 16527798514919 | 16530476311441 | 16530476548239 | 16527798970833 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 422 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 255552 | 0x7f0baea38300 | 0x7f0b90226280 | 5751768 | 5629198 | 65536 | 718970 | 718970 | 718970 | 0 | 1800 | 0 | 0 | 716566 | 0 | 16527798981762 | 16530476587279 | 16530477040395 | 16527799640678 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 425 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 259840 | 0x7f0baea38180 | 0x7f0b902262c0 | 2918232 | 2798266 | 65536 | 364778 | 364778 | 364778 | 0 | 1793 | 0 | 0 | 360331 | 0 | 16527799659878 | 16530477082795 | 16530477308873 | 16527800079562 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 428 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 265664 | 0x7f0baea38000 | 0x7f0b90226300 | 3750648 | 3640401 | 65536 | 468830 | 468830 | 468830 | 0 | 1773 | 0 | 0 | 466601 | 0 | 16527800090202 | 16530477343432 | 16530477641830 | 16527800593133 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 431 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 271232 | 0x7f0bac409e80 | 0x7f0b90226340 | 3761712 | 3660866 | 65536 | 470213 | 470213 | 470213 | 0 | 1808 | 0 | 0 | 469296 | 0 | 16527800603823 | 16530477679429 | 16530477977667 | 16527801107335 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 434 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 273216 | 0x7f0bac409d00 | 0x7f0b90226380 | 3866320 | 3752679 | 65536 | 483289 | 483289 | 483289 | 0 | 1807 | 0 | 0 | 481518 | 0 | 16527801117654 | 16530478016866 | 16530478313504 | 16527801619206 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 437 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 275712 | 0x7f0bac409b80 | 0x7f0b902263c0 | 7038448 | 6934428 | 65536 | 879805 | 879805 | 879805 | 0 | 1794 | 0 | 0 | 877563 | 0 | 16527801630076 | 16530478353183 | 16530478911418 | 16527802395128 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 440 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 280768 | 0x7f0bac409a00 | 0x7f0b90226400 | 3410752 | 3301530 | 65536 | 426343 | 426343 | 426343 | 0 | 1795 | 0 | 0 | 424881 | 0 | 16527802414347 | 16530478959258 | 16530479222456 | 16527802883190 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 443 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 287616 | 0x7f0bac409880 | 0x7f0b90226440 | 4393680 | 4290125 | 65536 | 549209 | 549209 | 549209 | 0 | 1782 | 0 | 0 | 547587 | 0 | 16527802893999 | 16530479260375 | 16530479611572 | 16527803456119 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 446 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 294208 | 0x7f0bac409700 | 0x7f0b90226480 | 4400648 | 4292908 | 65536 | 550080 | 550080 | 550080 | 0 | 1786 | 0 | 0 | 548758 | 0 | 16527803466878 | 16530479660052 | 16530480010769 | 16527804019498 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 449 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 296960 | 0x7f0baea38580 | 0x7f0b902264c0 | 4508200 | 4389280 | 65536 | 563524 | 563524 | 563524 | 0 | 1853 | 0 | 0 | 561768 | 0 | 16527804030308 | 16530480057168 | 16530480404045 | 16527804581378 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 452 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 299968 | 0x7f0baea38400 | 0x7f0b90226500 | 8337736 | 8220623 | 65536 | 1042216 | 1042216 | 1042216 | 0 | 1820 | 0 | 0 | 1038552 | 0 | 16527804592037 | 16530480444205 | 16530481105159 | 16527805456526 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 455 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 302720 | 0x7f0baea38280 | 0x7f0b90226540 | 5767376 | 5656019 | 65536 | 720921 | 720921 | 720921 | 0 | 1827 | 0 | 0 | 718325 | 0 | 16527805475945 | 16530481154118 | 16530481602754 | 16527806142821 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 458 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 305472 | 0x7f0baea38100 | 0x7f0b90226580 | 5787232 | 5671957 | 65536 | 723403 | 723403 | 723403 | 0 | 1871 | 0 | 0 | 721443 | 0 | 16527806154180 | 16530481643234 | 16530482108830 | 16527806954141 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 461 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 308224 | 0x7f0bac409f80 | 0x7f0b902265c0 | 5788928 | 5665413 | 65536 | 723615 | 723615 | 723615 | 0 | 1819 | 0 | 0 | 720629 | 0 | 16527806965461 | 16530482152190 | 16530482613625 | 16527807764771 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 464 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 310976 | 0x7f0bac409e00 | 0x7f0b90226600 | 5810256 | 5692286 | 65536 | 726281 | 726281 | 726281 | 0 | 1799 | 0 | 0 | 723708 | 0 | 16527807776401 | 16530482656985 | 16530483109141 | 16527808549212 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 467 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 313216 | 0x7f0bac409c80 | 0x7f0b90226640 | 10939624 | 10815736 | 65536 | 1367452 | 1367452 | 1367452 | 0 | 1791 | 0 | 0 | 1365108 | 0 | 16527808561152 | 16530483149781 | 16530484018733 | 16527809751118 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 470 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 315968 | 0x7f0bac409b00 | 0x7f0b90226680 | 10965032 | 10849810 | 65536 | 1370628 | 1370628 | 1370628 | 0 | 1852 | 0 | 0 | 1368282 | 0 | 16527809770388 | 16530484066893 | 16530484921605 | 16527810974834 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 473 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 318720 | 0x7f0bac409980 | 0x7f0b902266c0 | 10991536 | 10862608 | 65536 | 1373941 | 1373941 | 1373941 | 0 | 1815 | 0 | 0 | 1372115 | 0 | 16527810987553 | 16530484983684 | 16530485868797 | 16527812236547 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 476 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 321472 | 0x7f0bac409800 | 0x7f0b90226700 | 10958472 | 10844711 | 65536 | 1369808 | 1369808 | 1369808 | 0 | 1776 | 0 | 0 | 1369335 | 0 | 16527812247837 | 16530485931036 | 16530486812148 | 16527813492222 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 479 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 324224 | 0x7f0bac409680 | 0x7f0b90226740 | 11021840 | 10886750 | 65536 | 1377729 | 1377729 | 1377729 | 0 | 1813 | 0 | 0 | 1373206 | 0 | 16527813504591 | 16530486880468 | 16530487740460 | 16527814737736 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 482 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 326464 | 0x7f0baea38500 | 0x7f0b90226780 | 21309168 | 21182928 | 65536 | 2663645 | 2663645 | 2663645 | 0 | 1835 | 0 | 0 | 2658671 | 0 | 16527814750506 | 16530487805419 | 16530489504604 | 16527816799321 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 485 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 329216 | 0x7f0baea38380 | 0x7f0b902267c0 | 21364368 | 21225475 | 65536 | 2670545 | 2670545 | 2670545 | 0 | 1858 | 0 | 0 | 2668092 | 0 | 16527816818000 | 16530489604283 | 16530491270029 | 16527818790778 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 488 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 331968 | 0x7f0baea38200 | 0x7f0b90226800 | 21353920 | 21239036 | 65536 | 2669239 | 2669239 | 2669239 | 0 | 1789 | 0 | 0 | 2666863 | 0 | 16527818802708 | 16530491359308 | 16530493085852 | 16527820846263 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 491 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 12 | 24 | 334720 | 0x7f0baea38080 | 0x7f0b90226840 | 21376880 | 21237111 | 65536 | 2672109 | 2672109 | 2672109 | 0 | 1780 | 0 | 0 | 2669988 | 0 | 16527820857503 | 16530493174012 | 16530494890796 | 16527822898268 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 494 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 337472 | 0x7f0bac409f00 | 0x7f0b90226880 | 21371392 | 21257472 | 65536 | 2671423 | 2671423 | 2671423 | 0 | 1864 | 0 | 0 | 2669733 | 0 | 16527822910287 | 16530494982316 | 16530496656861 | 16527824898164 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 497 | 162887 | 162887 | 4194304 | 256 | 0 | 0 | 8 | 24 | 0 | 0x7f0bac409d80 | 0x7f0b902268c0 | 42069472 | 41927427 | 65536 | 5258683 | 5258683 | 5258683 | 0 | 1841 | 0 | 0 | 5256262 | 0 | 16527824909134 | 16530496744540 | 16530500103390 | 16527828581890 |