40 KiB
40 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | SQ_INSTS_LDS | SQ_INST_LEVEL_LDS | SQ_ACCUM_PREV_HIRES | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBuffer.kd | 0 | 0 | 0 | 936874 | 936879 | 33554432 | 256 | 0 | 0 | 8 | 32 | 6464 | 0x0 | 0x7f608a404180 | 0 | 0 | 0 | 12076632988700397 | 12076633233142956 | 12076633233467755 | 12076633233577518 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 936874 | 936879 | 32768 | 256 | 0 | 0 | 24 | 24 | 12480 | 0x0 | 0x7f608a435100 | 0 | 0 | 0 | 12076633248282761 | 12076633248580944 | 12076633248587504 | 12076633248592868 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 4 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 12928 | 0x7f61960b3900 | 0x7f608a435140 | 0 | 0 | 0 | 12076633248655084 | 12076633248875663 | 12076633249007503 | 12076633249011156 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 6 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 36 | 24 | 13632 | 0x7f61960b3800 | 0x7f608a435180 | 0 | 0 | 0 | 12076633249083771 | 12076633249259982 | 12076633249507821 | 12076633249576949 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 8 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 14080 | 0x7f61960b3700 | 0x7f608a4351c0 | 0 | 0 | 0 | 12076633249608708 | 12076633249789420 | 12076633250035179 | 12076633250105001 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 10 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14528 | 0x7f61960b3600 | 0x7f608a435200 | 0 | 0 | 0 | 12076633250132733 | 12076633250316619 | 12076633250448138 | 12076633250451676 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 12 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14976 | 0x7f61960b3500 | 0x7f608a435240 | 0 | 0 | 0 | 12076633250496409 | 12076633250679817 | 12076633250812617 | 12076633250816044 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 14 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 15424 | 0x7f61960b3400 | 0x7f608a435280 | 0 | 0 | 0 | 12076633250895321 | 12076633251060296 | 12076633251191976 | 12076633251195409 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 16 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16128 | 0x7f61960b3300 | 0x7f608a4352c0 | 0 | 0 | 0 | 12076633251246094 | 12076633251413895 | 12076633251661094 | 12076633251701931 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 18 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16832 | 0x7f61960b3a00 | 0x7f608a435300 | 0 | 0 | 0 | 12076633251729673 | 12076633251907493 | 12076633252160773 | 12076633252201130 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 20 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 17536 | 0x7f61960b3900 | 0x7f608a435340 | 0 | 0 | 0 | 12076633252226157 | 12076633252419172 | 12076633252550691 | 12076633252553866 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 22 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18240 | 0x7f61960b3800 | 0x7f608a435380 | 0 | 0 | 0 | 12076633252603118 | 12076633252768771 | 12076633252906050 | 12076633252909488 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 24 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18944 | 0x7f61960b3700 | 0x7f608a4353c0 | 0 | 0 | 0 | 12076633252972405 | 12076633253146529 | 12076633253279009 | 12076633253282592 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 26 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 19904 | 0x7f61960b3600 | 0x7f608a435400 | 0 | 0 | 0 | 12076633253333196 | 12076633253509248 | 12076633253764447 | 12076633253830080 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 28 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 20608 | 0x7f61960b3500 | 0x7f608a435440 | 0 | 0 | 0 | 12076633253855728 | 12076633254038846 | 12076633254284126 | 12076633254350308 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 30 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 21312 | 0x7f61960b3400 | 0x7f608a435480 | 0 | 0 | 0 | 12076633254374643 | 12076633254569085 | 12076633254696444 | 12076633254699788 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 32 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22016 | 0x7f61960b3300 | 0x7f608a4354c0 | 0 | 0 | 0 | 12076633254755151 | 12076633254917723 | 12076633255047163 | 12076633255050440 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 34 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22720 | 0x7f61960b3a00 | 0x7f608a435500 | 0 | 0 | 0 | 12076633255116212 | 12076633255282362 | 12076633255415482 | 12076633255419256 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 36 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 23680 | 0x7f61960b3900 | 0x7f608a435540 | 0 | 0 | 0 | 12076633255468568 | 12076633255632441 | 12076633255880920 | 12076633255946547 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 38 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 24384 | 0x7f61960b3800 | 0x7f608a435580 | 0 | 0 | 0 | 12076633255971814 | 12076633256167319 | 12076633256415479 | 12076633256482744 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 40 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25088 | 0x7f61960b3700 | 0x7f608a4355c0 | 0 | 0 | 0 | 12076633256513532 | 12076633256694198 | 12076633256824277 | 12076633256827666 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 42 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25792 | 0x7f61960b3600 | 0x7f608a435600 | 0 | 0 | 0 | 12076633256875695 | 12076633257046196 | 12076633257177396 | 12076633257180993 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 44 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 26496 | 0x7f61960b3500 | 0x7f608a435640 | 0 | 0 | 0 | 12076633257242778 | 12076633257409555 | 12076633257538995 | 12076633257542646 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 46 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 27712 | 0x7f61960b3400 | 0x7f608a435680 | 0 | 0 | 0 | 12076633257590625 | 12076633257760274 | 12076633258010673 | 12076633258077330 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 48 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 28416 | 0x7f61960b3300 | 0x7f608a4356c0 | 0 | 0 | 0 | 12076633258106565 | 12076633258284592 | 12076633258536752 | 12076633258601906 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 50 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 29120 | 0x7f61960b3a00 | 0x7f608a435700 | 0 | 0 | 0 | 12076633258628776 | 12076633258812431 | 12076633258942990 | 12076633258946537 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 52 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30080 | 0x7f61960b3900 | 0x7f608a435740 | 0 | 0 | 0 | 12076633258996160 | 12076633259162990 | 12076633259293709 | 12076633259297250 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 54 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30784 | 0x7f61960b3800 | 0x7f608a435780 | 0 | 0 | 0 | 12076633259359465 | 12076633259524268 | 12076633259654668 | 12076633259658061 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 56 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32000 | 0x7f61960b3700 | 0x7f608a4357c0 | 0 | 0 | 0 | 12076633259706872 | 12076633259876267 | 12076633260129866 | 12076633260195971 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 58 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32960 | 0x7f61960b3600 | 0x7f608a435800 | 0 | 0 | 0 | 12076633260219806 | 12076633260405705 | 12076633260656105 | 12076633260721960 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 60 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 33920 | 0x7f61960b3500 | 0x7f608a435840 | 0 | 0 | 0 | 12076633260748980 | 12076633260926024 | 12076633261062503 | 12076633261066380 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 62 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 34880 | 0x7f61960b3400 | 0x7f608a435880 | 0 | 0 | 0 | 12076633261118247 | 12076633261280263 | 12076633261410822 | 12076633261414207 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 64 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 35840 | 0x7f61960b3300 | 0x7f608a4358c0 | 0 | 0 | 0 | 12076633261475622 | 12076633261637701 | 12076633261767301 | 12076633261771041 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 66 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 37312 | 0x7f61960b3a00 | 0x7f608a435900 | 0 | 0 | 0 | 12076633261822056 | 12076633261987140 | 12076633262240579 | 12076633262309383 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 68 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 38272 | 0x7f61960b3900 | 0x7f608a435940 | 0 | 0 | 0 | 12076633262335331 | 12076633262518499 | 12076633262770338 | 12076633262836934 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 70 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 39232 | 0x7f61960b3800 | 0x7f608a435980 | 0 | 0 | 0 | 12076633262862812 | 12076633263045537 | 12076633263174976 | 12076633263179050 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 72 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 40192 | 0x7f61960b3700 | 0x7f608a4359c0 | 0 | 0 | 0 | 12076633263228392 | 12076633263397856 | 12076633263532415 | 12076633263536104 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 74 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 41152 | 0x7f61960b3600 | 0x7f608a435a00 | 0 | 0 | 0 | 12076633263597128 | 12076633263758014 | 12076633263886494 | 12076633263890213 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 76 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 42624 | 0x7f61960b3500 | 0x7f608a435a40 | 0 | 0 | 0 | 12076633263939264 | 12076633264116733 | 12076633264370012 | 12076633264436299 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 78 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 43584 | 0x7f61960b3400 | 0x7f608a435a80 | 0 | 0 | 0 | 12076633264461406 | 12076633264642332 | 12076633264895611 | 12076633264936519 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 80 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 44544 | 0x7f61960b3300 | 0x7f608a435ac0 | 0 | 0 | 0 | 12076633264961666 | 12076633265147450 | 12076633265280569 | 12076633265284477 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 82 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 45760 | 0x7f61960b3a00 | 0x7f608a435b00 | 0 | 0 | 0 | 12076633265328649 | 12076633265503449 | 12076633265636568 | 12076633265640128 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 84 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 46720 | 0x7f61960b3900 | 0x7f608a435b40 | 0 | 0 | 0 | 12076633265701512 | 12076633265864248 | 12076633265994967 | 12076633265998645 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 86 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 48448 | 0x7f61960b3800 | 0x7f608a435b80 | 0 | 0 | 0 | 12076633266047025 | 12076633266217846 | 12076633266486326 | 12076633266554890 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 88 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 49408 | 0x7f61960b3700 | 0x7f608a435bc0 | 0 | 0 | 0 | 12076633266577662 | 12076633266756405 | 12076633267008404 | 12076633267075178 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 90 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 50368 | 0x7f61960b3600 | 0x7f608a435c00 | 0 | 0 | 0 | 12076633267102458 | 12076633267288243 | 12076633267415762 | 12076633267419418 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 92 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 51584 | 0x7f61960b3500 | 0x7f608a435c40 | 0 | 0 | 0 | 12076633267469291 | 12076633267632402 | 12076633267763441 | 12076633267767095 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 94 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 52544 | 0x7f61960b3400 | 0x7f608a435c80 | 0 | 0 | 0 | 12076633267827266 | 12076633267990641 | 12076633268119760 | 12076633268123598 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 96 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 54272 | 0x7f61960b3300 | 0x7f608a435cc0 | 0 | 0 | 0 | 12076633268173450 | 12076633268353839 | 12076633268615119 | 12076633268657311 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 98 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 55488 | 0x7f61960b3a00 | 0x7f608a435d00 | 0 | 0 | 0 | 12076633268680654 | 12076633268861358 | 12076633269151117 | 12076633269218785 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 100 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 56704 | 0x7f61960b3900 | 0x7f608a435d40 | 0 | 0 | 0 | 12076633269246707 | 12076633269422316 | 12076633269592075 | 12076633269595656 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 102 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 58176 | 0x7f61960b3800 | 0x7f608a435d80 | 0 | 0 | 0 | 12076633269648084 | 12076633269809515 | 12076633269949354 | 12076633269953011 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 104 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 59392 | 0x7f61960b3700 | 0x7f608a435dc0 | 0 | 0 | 0 | 12076633270019164 | 12076633270181353 | 12076633270313833 | 12076633270317358 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 106 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 61376 | 0x7f61960b3600 | 0x7f608a435e00 | 0 | 0 | 0 | 12076633270370327 | 12076633270532552 | 12076633270796231 | 12076633270862883 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 108 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 62592 | 0x7f61960b3500 | 0x7f608a435e40 | 0 | 0 | 0 | 12076633270898730 | 12076633271061991 | 12076633271308390 | 12076633271374665 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 110 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 63808 | 0x7f61960b3400 | 0x7f608a435e80 | 0 | 0 | 0 | 12076633271405943 | 12076633271589509 | 12076633271718628 | 12076633271722212 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 112 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 65280 | 0x7f61960b3300 | 0x7f608a435ec0 | 0 | 0 | 0 | 12076633271770912 | 12076633271932868 | 12076633272075747 | 12076633272079266 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 114 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 66496 | 0x7f61960b3a00 | 0x7f608a435f00 | 0 | 0 | 0 | 12076633272141782 | 12076633272304866 | 12076633272433666 | 12076633272437141 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 116 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 68480 | 0x7f61960b3900 | 0x7f608a435f40 | 0 | 0 | 0 | 12076633272485612 | 12076633272647905 | 12076633272911264 | 12076633272933585 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 118 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 69696 | 0x7f61960b3800 | 0x7f608a435f80 | 0 | 0 | 0 | 12076633272975523 | 12076633273140864 | 12076633273395743 | 12076633273445096 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 120 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 70912 | 0x7f61960b3700 | 0x7f608a435fc0 | 0 | 0 | 0 | 12076633273473159 | 12076633273648382 | 12076633273782142 | 12076633273785700 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 122 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 72384 | 0x7f61960b3600 | 0x7f608a436000 | 0 | 0 | 0 | 12076633273834611 | 12076633274003741 | 12076633274144860 | 12076633274148886 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 124 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 73600 | 0x7f61960b3500 | 0x7f608a436040 | 0 | 0 | 0 | 12076633274210490 | 12076633274371260 | 12076633274502299 | 12076633274505970 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 126 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 75840 | 0x7f61960b3400 | 0x7f608a436080 | 0 | 0 | 0 | 12076633274555151 | 12076633274717499 | 12076633274978138 | 12076633275025376 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 128 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 77056 | 0x7f61960b3300 | 0x7f608a4360c0 | 0 | 0 | 0 | 12076633275051615 | 12076633275229657 | 12076633275477816 | 12076633275524695 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 130 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 78272 | 0x7f61960b3a00 | 0x7f608a436100 | 0 | 0 | 0 | 12076633275550663 | 12076633275726775 | 12076633275857175 | 12076633275860810 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 132 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 80000 | 0x7f61960b3900 | 0x7f608a436140 | 0 | 0 | 0 | 12076633275909751 | 12076633276086454 | 12076633276231414 | 12076633276235206 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 134 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 81216 | 0x7f61960b3800 | 0x7f608a436180 | 0 | 0 | 0 | 12076633276298985 | 12076633276459573 | 12076633276595412 | 12076633276599324 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 136 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 83456 | 0x7f61960b3700 | 0x7f608a4361c0 | 0 | 0 | 0 | 12076633276647052 | 12076633276813492 | 12076633277074771 | 12076633277125593 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 138 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 84928 | 0x7f61960b3600 | 0x7f608a436200 | 0 | 0 | 0 | 12076633277149186 | 12076633277331890 | 12076633277590129 | 12076633277638687 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 140 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 86400 | 0x7f61960b3500 | 0x7f608a436240 | 0 | 0 | 0 | 12076633277665527 | 12076633277840208 | 12076633277972688 | 12076633277976395 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 142 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 88128 | 0x7f61960b3400 | 0x7f608a436280 | 0 | 0 | 0 | 12076633278029724 | 12076633278204047 | 12076633278354127 | 12076633278357765 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 144 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 89600 | 0x7f61960b3300 | 0x7f608a4362c0 | 0 | 0 | 0 | 12076633278438284 | 12076633278598446 | 12076633278732685 | 12076633278736299 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 146 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 92096 | 0x7f61960b3a00 | 0x7f608a436300 | 0 | 0 | 0 | 12076633278786382 | 12076633278948525 | 12076633279214764 | 12076633279262998 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 148 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 93568 | 0x7f61960b3900 | 0x7f608a436340 | 0 | 0 | 0 | 12076633279293315 | 12076633279473163 | 12076633279726922 | 12076633279778217 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 150 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 95040 | 0x7f61960b3800 | 0x7f608a436380 | 0 | 0 | 0 | 12076633279813532 | 12076633279990601 | 12076633280127081 | 12076633280130863 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 152 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 96768 | 0x7f61960b3700 | 0x7f608a4363c0 | 0 | 0 | 0 | 12076633280180215 | 12076633280349320 | 12076633280506760 | 12076633280510399 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 154 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 98240 | 0x7f61960b3600 | 0x7f608a436400 | 0 | 0 | 0 | 12076633280571983 | 12076633280734279 | 12076633280864358 | 12076633280867904 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 156 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 100736 | 0x7f61960b3500 | 0x7f608a436440 | 0 | 0 | 0 | 12076633280916294 | 12076633281085478 | 12076633281345317 | 12076633281395826 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 158 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 102208 | 0x7f61960b3400 | 0x7f608a436480 | 0 | 0 | 0 | 12076633281422776 | 12076633281599716 | 12076633281849155 | 12076633281895325 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 160 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 103680 | 0x7f61960b3300 | 0x7f608a4364c0 | 0 | 0 | 0 | 12076633281921594 | 12076633282100194 | 12076633282236994 | 12076633282240898 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 162 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 105664 | 0x7f61960b3a00 | 0x7f608a436500 | 0 | 0 | 0 | 12076633282284318 | 12076633282460353 | 12076633282625953 | 12076633282629581 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 164 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 107136 | 0x7f61960b3900 | 0x7f608a436540 | 0 | 0 | 0 | 12076633282690083 | 12076633282847872 | 12076633282979391 | 12076633282983018 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 166 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 109888 | 0x7f61960b3800 | 0x7f608a436580 | 0 | 0 | 0 | 12076633283031619 | 12076633283211711 | 12076633283472510 | 12076633283519316 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 168 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 111360 | 0x7f61960b3700 | 0x7f608a4365c0 | 0 | 0 | 0 | 12076633283545524 | 12076633283720349 | 12076633283975868 | 12076633284022852 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 170 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 112832 | 0x7f61960b3600 | 0x7f608a436600 | 0 | 0 | 0 | 12076633284047989 | 12076633284232827 | 12076633284366107 | 12076633284370068 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 172 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 114816 | 0x7f61960b3500 | 0x7f608a436640 | 0 | 0 | 0 | 12076633284418819 | 12076633284585946 | 12076633284759866 | 12076633284763480 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 174 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 116288 | 0x7f61960b3400 | 0x7f608a436680 | 0 | 0 | 0 | 12076633284826527 | 12076633284988985 | 12076633285125464 | 12076633285129290 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 176 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 119040 | 0x7f61960b3300 | 0x7f608a4366c0 | 0 | 0 | 0 | 12076633285181057 | 12076633285342744 | 12076633285599383 | 12076633285625774 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 178 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 120768 | 0x7f61960b3a00 | 0x7f608a436700 | 0 | 0 | 0 | 12076633285668022 | 12076633285828182 | 12076633286081141 | 12076633286108352 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 180 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 122496 | 0x7f61960b3900 | 0x7f608a436740 | 0 | 0 | 0 | 12076633286153105 | 12076633286312180 | 12076633286445300 | 12076633286449176 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 182 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 124736 | 0x7f61960b3800 | 0x7f608a436780 | 0 | 0 | 0 | 12076633286498036 | 12076633286660659 | 12076633286843539 | 12076633286847456 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 184 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 126464 | 0x7f61960b3700 | 0x7f608a4367c0 | 0 | 0 | 0 | 12076633286908300 | 12076633287070098 | 12076633287208498 | 12076633287212586 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 186 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 20 | 24 | 127936 | 0x7f61960b3600 | 0x7f608a436800 | 0 | 0 | 0 | 12076633287261647 | 12076633287430097 | 12076633287696336 | 12076633287716503 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 188 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 129664 | 0x7f61960b3500 | 0x7f608a436840 | 0 | 0 | 0 | 12076633287761988 | 12076633287923695 | 12076633288174094 | 12076633288199181 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 190 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 131392 | 0x7f61960b3400 | 0x7f608a436880 | 0 | 0 | 0 | 12076633288243794 | 12076633288403374 | 12076633288532813 | 12076633288536689 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 192 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 133632 | 0x7f61960b3300 | 0x7f608a4368c0 | 0 | 0 | 0 | 12076633288584948 | 12076633288745772 | 12076633288938412 | 12076633288961709 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 194 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 135360 | 0x7f61960b3a00 | 0x7f608a436900 | 0 | 0 | 0 | 12076633289024366 | 12076633289187371 | 12076633289317931 | 12076633289321859 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 196 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 20 | 24 | 136832 | 0x7f61960b3900 | 0x7f608a436940 | 0 | 0 | 0 | 12076633289363366 | 12076633289545130 | 12076633289831369 | 12076633289855281 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 198 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 138560 | 0x7f61960b3800 | 0x7f608a436980 | 0 | 0 | 0 | 12076633289899614 | 12076633290070728 | 12076633290354727 | 12076633290382903 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 200 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 140288 | 0x7f61960b3700 | 0x7f608a4369c0 | 0 | 0 | 0 | 12076633290424430 | 12076633290584806 | 12076633290719366 | 12076633290723366 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 202 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 142784 | 0x7f61960b3600 | 0x7f608a436a00 | 0 | 0 | 0 | 12076633290771716 | 12076633290941765 | 12076633291152165 | 12076633291176639 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 204 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 144768 | 0x7f61960b3500 | 0x7f608a436a40 | 0 | 0 | 0 | 12076633291242452 | 12076633291407204 | 12076633291545443 | 12076633291549302 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 206 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 20 | 24 | 145728 | 0x7f61960b3400 | 0x7f608a436a80 | 0 | 0 | 0 | 12076633291597883 | 12076633291776163 | 12076633292083522 | 12076633292109204 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 208 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 147712 | 0x7f61960b3300 | 0x7f608a436ac0 | 0 | 0 | 0 | 12076633292158676 | 12076633292322561 | 12076633292578880 | 12076633292602492 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 210 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 149696 | 0x7f61960b3a00 | 0x7f608a436b00 | 0 | 0 | 0 | 12076633292649299 | 12076633292807839 | 12076633292943679 | 12076633292947423 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 212 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 152192 | 0x7f61960b3900 | 0x7f608a436b40 | 0 | 0 | 0 | 12076633292996174 | 12076633293161598 | 12076633293389917 | 12076633293417397 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 214 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 154176 | 0x7f61960b3800 | 0x7f608a436b80 | 0 | 0 | 0 | 12076633293470767 | 12076633293631836 | 12076633293769116 | 12076633293772798 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 216 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 20 | 24 | 155648 | 0x7f61960b3700 | 0x7f608a436bc0 | 0 | 0 | 0 | 12076633293822220 | 12076633293993115 | 12076633294324954 | 12076633294353188 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 218 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 157632 | 0x7f61960b3600 | 0x7f608a436c00 | 0 | 0 | 0 | 12076633294398242 | 12076633294560153 | 12076633294822233 | 12076633294847177 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 220 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 159616 | 0x7f61960b3500 | 0x7f608a436c40 | 0 | 0 | 0 | 12076633294890838 | 12076633295059352 | 12076633295197271 | 12076633295201356 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 222 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 162368 | 0x7f61960b3400 | 0x7f608a436c80 | 0 | 0 | 0 | 12076633295249225 | 12076633295419031 | 12076633295665110 | 12076633295685576 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 224 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 164608 | 0x7f61960b3300 | 0x7f608a436cc0 | 0 | 0 | 0 | 12076633295742252 | 12076633295904949 | 12076633296045589 | 12076633296049513 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 226 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 20 | 24 | 166336 | 0x7f61960b3a00 | 0x7f608a436d00 | 0 | 0 | 0 | 12076633296097262 | 12076633296280966 | 12076633296657766 | 12076633296685867 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 228 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 168576 | 0x7f61960b3900 | 0x7f608a436d40 | 0 | 0 | 0 | 12076633296723918 | 12076633296886884 | 12076633297151364 | 12076633297178453 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 230 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 170816 | 0x7f61960b3800 | 0x7f608a436d80 | 0 | 0 | 0 | 12076633297220161 | 12076633297383523 | 12076633297525603 | 12076633297528014 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 232 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 174080 | 0x7f61960b3700 | 0x7f608a436dc0 | 0 | 0 | 0 | 12076633297580221 | 12076633297741283 | 12076633298020962 | 12076633298044374 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 234 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 176576 | 0x7f61960b3600 | 0x7f608a436e00 | 0 | 0 | 0 | 12076633298107161 | 12076633298273282 | 12076633298413602 | 12076633298416346 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 236 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 20 | 24 | 177792 | 0x7f61960b3500 | 0x7f608a436e40 | 0 | 0 | 0 | 12076633298466800 | 12076633298647681 | 12076633299071201 | 12076633299095800 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 238 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 180288 | 0x7f61960b3400 | 0x7f608a436e80 | 0 | 0 | 0 | 12076633299139231 | 12076633299308160 | 12076633299567360 | 12076633299590430 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 240 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 182784 | 0x7f61960b3300 | 0x7f608a436ec0 | 0 | 0 | 0 | 12076633299637748 | 12076633299804639 | 12076633299945439 | 12076633299948135 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 242 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 186304 | 0x7f61960b3a00 | 0x7f608a436f00 | 0 | 0 | 0 | 12076633299990254 | 12076633300171999 | 12076633300487678 | 12076633300511293 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 244 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 189312 | 0x7f61960b3900 | 0x7f608a436f40 | 0 | 0 | 0 | 12076633300570533 | 12076633300734398 | 12076633300887038 | 12076633300889767 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 246 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 20 | 24 | 190784 | 0x7f61960b3800 | 0x7f608a436f80 | 0 | 0 | 0 | 12076633300937165 | 12076633301115037 | 12076633301631676 | 12076633301659378 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 248 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 193792 | 0x7f61960b3700 | 0x7f608a436fc0 | 0 | 0 | 0 | 12076633301703430 | 12076633301865276 | 12076633302163515 | 12076633302190136 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 250 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 196800 | 0x7f61960b3600 | 0x7f608a437000 | 0 | 0 | 0 | 12076633302234328 | 12076633302396315 | 12076633302551995 | 12076633302554644 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 252 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 201088 | 0x7f61960b3500 | 0x7f608a437040 | 0 | 0 | 0 | 12076633302603625 | 12076633302771354 | 12076633303156474 | 12076633303180137 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 254 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 204608 | 0x7f61960b3400 | 0x7f608a437080 | 0 | 0 | 0 | 12076633303242233 | 12076633303404793 | 12076633303582393 | 12076633303584940 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 256 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 20 | 24 | 206080 | 0x7f61960b3300 | 0x7f608a4370c0 | 0 | 0 | 0 | 12076633303633100 | 12076633303803993 | 12076633304414232 | 12076633304437426 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 258 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 209600 | 0x7f61960b3a00 | 0x7f608a437100 | 0 | 0 | 0 | 12076633304476499 | 12076633304637111 | 12076633304965591 | 12076633304984393 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 260 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 213120 | 0x7f61960b3900 | 0x7f608a437140 | 0 | 0 | 0 | 12076633305038173 | 12076633305203350 | 12076633305382550 | 12076633305385199 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 262 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 218176 | 0x7f61960b3800 | 0x7f608a437180 | 0 | 0 | 0 | 12076633305433739 | 12076633305614070 | 12076633306070229 | 12076633306094148 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 264 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 222208 | 0x7f61960b3700 | 0x7f608a4371c0 | 0 | 0 | 0 | 12076633306154109 | 12076633306318389 | 12076633306519028 | 12076633306564092 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 266 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 20 | 24 | 223936 | 0x7f61960b3600 | 0x7f608a437200 | 0 | 0 | 0 | 12076633306592905 | 12076633306767028 | 12076633307477107 | 12076633307523847 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 268 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 227968 | 0x7f61960b3500 | 0x7f608a437240 | 0 | 0 | 0 | 12076633307547892 | 12076633307737106 | 12076633308109426 | 12076633308157085 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 270 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 232000 | 0x7f61960b3400 | 0x7f608a437280 | 0 | 0 | 0 | 12076633308188143 | 12076633308370385 | 12076633308573425 | 12076633308619024 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 272 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 237824 | 0x7f61960b3300 | 0x7f608a4372c0 | 0 | 0 | 0 | 12076633308647447 | 12076633308822065 | 12076633309347504 | 12076633309396020 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 274 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 242368 | 0x7f61960b3a00 | 0x7f608a437300 | 0 | 0 | 0 | 12076633309437276 | 12076633309601423 | 12076633309825103 | 12076633309874319 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 276 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 20 | 24 | 243584 | 0x7f61960b3900 | 0x7f608a437340 | 0 | 0 | 0 | 12076633309899737 | 12076633310079822 | 12076633310878861 | 12076633310926136 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 278 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 248128 | 0x7f61960b3800 | 0x7f608a437380 | 0 | 0 | 0 | 12076633310953026 | 12076633311138381 | 12076633311557900 | 12076633311605259 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 280 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 252672 | 0x7f61960b3700 | 0x7f608a4373c0 | 0 | 0 | 0 | 12076633311631217 | 12076633311813740 | 12076633312041419 | 12076633312087807 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 282 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 259264 | 0x7f61960b3600 | 0x7f608a437400 | 0 | 0 | 0 | 12076633312112462 | 12076633312292139 | 12076633312889738 | 12076633312935874 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 284 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 264832 | 0x7f61960b3500 | 0x7f608a437440 | 0 | 0 | 0 | 12076633312975969 | 12076633313151337 | 12076633313419977 | 12076633313467934 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 286 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 20 | 24 | 266304 | 0x7f61960b3400 | 0x7f608a437480 | 0 | 0 | 0 | 12076633313493632 | 12076633313672937 | 12076633314658695 | 12076633314725303 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 288 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 28 | 24 | 271872 | 0x7f61960b3300 | 0x7f608a4374c0 | 0 | 0 | 0 | 12076633314756260 | 12076633314936934 | 12076633315454214 | 12076633315523988 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 290 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 277440 | 0x7f61960b3a00 | 0x7f608a437500 | 0 | 0 | 0 | 12076633315542292 | 12076633315737733 | 12076633316011173 | 12076633316058723 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 292 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 12 | 24 | 279936 | 0x7f61960b3900 | 0x7f608a437540 | 0 | 0 | 0 | 12076633316081686 | 12076633316265252 | 12076633317003331 | 12076633317069343 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 294 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 286528 | 0x7f61960b3800 | 0x7f608a437580 | 0 | 0 | 0 | 12076633317108786 | 12076633317274851 | 12076633317590690 | 12076633317656675 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 296 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 20 | 24 | 288000 | 0x7f61960b3700 | 0x7f608a4375c0 | 0 | 0 | 0 | 12076633317677955 | 12076633317864450 | 12076633319034368 | 12076633319102575 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 298 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 32 | 24 | 294592 | 0x7f61960b3600 | 0x7f608a437600 | 0 | 0 | 0 | 12076633319135887 | 12076633319321247 | 12076633319932606 | 12076633319998361 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 300 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 301184 | 0x7f61960b3500 | 0x7f608a437640 | 0 | 0 | 0 | 12076633320030581 | 12076633320206366 | 12076633320523965 | 12076633320592386 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 302 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 12 | 24 | 304960 | 0x7f61960b3400 | 0x7f608a437680 | 0 | 0 | 0 | 12076633320617833 | 12076633320794685 | 12076633321674043 | 12076633321740522 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 304 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 24 | 24 | 313600 | 0x7f61960b3300 | 0x7f608a4376c0 | 0 | 0 | 0 | 12076633321778643 | 12076633321945723 | 12076633322379322 | 12076633322446986 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 306 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 20 | 24 | 314816 | 0x7f61960b3a00 | 0x7f608a437700 | 0 | 0 | 0 | 12076633322473746 | 12076633322659802 | 12076633324208919 | 12076633324276459 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 308 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 20 | 24 | 317568 | 0x7f61960b3900 | 0x7f608a437740 | 0 | 0 | 0 | 12076633324306204 | 12076633324488439 | 12076633325283157 | 12076633325350296 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 310 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 12 | 24 | 320320 | 0x7f61960b3800 | 0x7f608a437780 | 0 | 0 | 0 | 12076633325379391 | 12076633325562997 | 12076633325977236 | 12076633326043927 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 312 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 12 | 24 | 324096 | 0x7f61960b3700 | 0x7f608a4377c0 | 0 | 0 | 0 | 12076633326068562 | 12076633326254996 | 12076633327413714 | 12076633327480288 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 314 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 12 | 24 | 328896 | 0x7f61960b3600 | 0x7f608a437800 | 0 | 0 | 0 | 12076633327521235 | 12076633327688753 | 12076633328474832 | 12076633328542544 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 316 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 20 | 24 | 330112 | 0x7f61960b3500 | 0x7f608a437840 | 0 | 0 | 0 | 12076633328574043 | 12076633328754672 | 12076633331806507 | 12076633331873751 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 318 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 20 | 24 | 332864 | 0x7f61960b3400 | 0x7f608a437880 | 0 | 0 | 0 | 12076633331903486 | 12076633332089866 | 12076633333631304 | 12076633333699707 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 320 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 12 | 24 | 335616 | 0x7f61960b3300 | 0x7f608a4378c0 | 0 | 0 | 0 | 12076633333718633 | 12076633333917383 | 12076633334706502 | 12076633334775609 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 322 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 12 | 24 | 339392 | 0x7f61960b3a00 | 0x7f608a437900 | 0 | 0 | 0 | 12076633334805665 | 12076633334990981 | 12076633337274018 | 12076633337341361 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 324 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 12 | 24 | 344192 | 0x7f61960b3900 | 0x7f608a437940 | 0 | 0 | 0 | 12076633337386325 | 12076633337554977 | 12076633339089695 | 12076633339156187 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 326 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 20 | 24 | 345408 | 0x7f61960b3800 | 0x7f608a437980 | 0 | 0 | 0 | 12076633339186183 | 12076633339368574 | 12076633345424244 | 12076633345491572 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 328 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 20 | 24 | 348160 | 0x7f61960b3700 | 0x7f608a4379c0 | 0 | 0 | 0 | 12076633345522229 | 12076633345703764 | 12076633348746799 | 12076633348812990 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 330 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 12 | 24 | 350912 | 0x7f61960b3600 | 0x7f608a437a00 | 0 | 0 | 0 | 12076633348844318 | 12076633349024238 | 12076633350563916 | 12076633350634588 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 332 | 936874 | 936879 | 4194304 | 256 | 0 | 0 | 12 | 24 | 0 | 0x7f61960b3500 | 0x7f608a437a40 | 0 | 0 | 0 | 12076633350663292 | 12076633350846795 | 12076633355372868 | 12076633355441740 |