48 KiB
48 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | GRBM_COUNT | GRBM_GUI_ACTIVE | CPC_ME1_BUSY_FOR_PACKET_DECODE | SQ_CYCLES | SQ_WAVES | SQ_WAVE_CYCLES | SQ_BUSY_CYCLES | SQ_LEVEL_WAVES | SQ_ACCUM_PREV_HIRES | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBufferAligned.kd | 0 | 0 | 0 | 175379 | 175379 | 33554432 | 256 | 0 | 0 | 4 | 32 | 4160 | 0x0 | 0x7fce3f804280 | 380644 | 380644 | 8666 | 3045160 | 524288 | 238245177 | 2959012 | 0 | 969233460 | 16792691334909 | 16791983173970 | 16792834490537 | 16792834600426 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 175379 | 175379 | 32768 | 256 | 0 | 0 | 12 | 24 | 13888 | 0x0 | 0x7fce3f823f80 | 33369 | 33369 | 29853 | 266960 | 512 | 1661479 | 164195 | 0 | 6659136 | 16792839726115 | 16792834490537 | 16792839860756 | 16792839865120 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 5 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 14336 | 0x7fcf4a0c6380 | 0x7fce3f823fc0 | 168953 | 168953 | 15194 | 1351632 | 65536 | 94817324 | 1245795 | 0 | 381000384 | 16792839908649 | 16792839860756 | 16792840256915 | 16792840259288 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 8 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15040 | 0x7fcf4a0c6200 | 0x7fce3f824000 | 319429 | 319429 | 24354 | 2555440 | 65536 | 233288207 | 2451793 | 0 | 934882360 | 16792840305987 | 16792840256915 | 16792840690033 | 16792840692284 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 11 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15488 | 0x7fcf4a0c6080 | 0x7fce3f824040 | 319876 | 319876 | 23907 | 2559016 | 65536 | 235901159 | 2455981 | 0 | 945335812 | 16792840733953 | 16792840690033 | 16792841115632 | 16792841117571 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 14 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 15936 | 0x7fcf401c3f00 | 0x7fce3f824080 | 172748 | 172748 | 15999 | 1381992 | 65536 | 99098189 | 1276923 | 0 | 398115668 | 16792841158950 | 16792841115632 | 16792841454350 | 16792841456360 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 17 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 16384 | 0x7fcf401c3d80 | 0x7fce3f8240c0 | 169846 | 169846 | 15344 | 1358776 | 65536 | 95621491 | 1252978 | 0 | 384211304 | 16792841497389 | 16792841454350 | 16792841781709 | 16792841783830 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 20 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 17088 | 0x7fcf401c3c00 | 0x7fce3f824100 | 169702 | 169702 | 16389 | 1357624 | 65536 | 109155539 | 1257096 | 0 | 438355124 | 16792841837408 | 16792841781709 | 16792842106828 | 16792842108780 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 23 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 16 | 24 | 17792 | 0x7fcf401c3a80 | 0x7fce3f824140 | 319682 | 319682 | 24981 | 2557464 | 65536 | 237036267 | 2452329 | 0 | 949877172 | 16792842150018 | 16792842106828 | 16792842542346 | 16792842544326 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 26 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 16 | 24 | 18496 | 0x7fcf401c3900 | 0x7fce3f824180 | 317314 | 317314 | 23872 | 2538520 | 65536 | 232982217 | 2435182 | 0 | 933658588 | 16792842585005 | 16792842542346 | 16792842964105 | 16792842966153 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 29 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19200 | 0x7fcf401c3780 | 0x7fce3f8241c0 | 167206 | 167206 | 15060 | 1337656 | 65536 | 107948887 | 1238392 | 0 | 433525984 | 16792843006082 | 16792842964105 | 16792843286984 | 16792843289053 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 32 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19904 | 0x7fcf401c3600 | 0x7fce3f824200 | 168081 | 168081 | 15504 | 1344656 | 65536 | 105617277 | 1239895 | 0 | 424197524 | 16792843328591 | 16792843286984 | 16792843611303 | 16792843613503 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 35 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 20608 | 0x7fcf4a0c6480 | 0x7fce3f824240 | 168194 | 168194 | 16132 | 1345560 | 65536 | 110406495 | 1242867 | 0 | 443354104 | 16792843666801 | 16792843611303 | 16792843938981 | 16792843941112 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 38 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 16 | 24 | 21312 | 0x7fcf4a0c6300 | 0x7fce3f824280 | 317394 | 317394 | 24550 | 2539160 | 65536 | 237155014 | 2435280 | 0 | 950349808 | 16792843983141 | 16792843938981 | 16792844353220 | 16792844355249 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 41 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22016 | 0x7fcf4a0c6180 | 0x7fce3f8242c0 | 317416 | 317416 | 24797 | 2539336 | 65536 | 230264381 | 2426582 | 0 | 922789000 | 16792844396058 | 16792844353220 | 16792844770338 | 16792844772566 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 44 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22720 | 0x7fcf4a0c6000 | 0x7fce3f824300 | 166282 | 166282 | 15737 | 1330264 | 65536 | 104504622 | 1226278 | 0 | 419747140 | 16792844813395 | 16792844770338 | 16792845101697 | 16792845103656 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 47 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 23424 | 0x7fcf401c3e80 | 0x7fce3f824340 | 168038 | 168038 | 15517 | 1344312 | 65536 | 111512917 | 1243154 | 0 | 447782968 | 16792845144405 | 16792845101697 | 16792845425376 | 16792845427386 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 50 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 24128 | 0x7fcf401c3d00 | 0x7fce3f824380 | 166605 | 166605 | 16400 | 1332848 | 65536 | 110514050 | 1229244 | 0 | 443777984 | 16792845476434 | 16792845425376 | 16792845749695 | 16792845751895 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 53 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 16 | 24 | 24832 | 0x7fcf401c3b80 | 0x7fce3f8243c0 | 315832 | 315832 | 24567 | 2526664 | 65536 | 234164606 | 2420848 | 0 | 938390212 | 16792845793444 | 16792845749695 | 16792846164413 | 16792846166712 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 56 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 16 | 24 | 25536 | 0x7fcf401c3a00 | 0x7fce3f824400 | 314767 | 314767 | 23957 | 2518144 | 65536 | 227168116 | 2411625 | 0 | 910407996 | 16792846207911 | 16792846164413 | 16792846591452 | 16792846593399 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 59 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26240 | 0x7fcf401c3880 | 0x7fce3f824440 | 168785 | 168785 | 15713 | 1350288 | 65536 | 110992495 | 1247054 | 0 | 445704444 | 16792846633478 | 16792846591452 | 16792846914170 | 16792846916409 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 62 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26944 | 0x7fcf401c3700 | 0x7fce3f824480 | 167940 | 167940 | 14652 | 1343528 | 65536 | 109577008 | 1241608 | 0 | 440028964 | 16792846956528 | 16792846914170 | 16792847256249 | 16792847257708 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 65 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 27648 | 0x7fcf4a0c6580 | 0x7fce3f8244c0 | 166697 | 166697 | 15779 | 1333584 | 65536 | 110061716 | 1231074 | 0 | 441977960 | 16792847306637 | 16792847256249 | 16792847559128 | 16792847560759 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 68 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 16 | 24 | 28608 | 0x7fcf4a0c6400 | 0x7fce3f824500 | 319841 | 319841 | 24440 | 2558736 | 65536 | 236815075 | 2459750 | 0 | 948992600 | 16792847606327 | 16792847559128 | 16792847954487 | 16792847955796 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 71 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 16 | 24 | 29312 | 0x7fcf4a0c6280 | 0x7fce3f824540 | 314496 | 314496 | 23432 | 2515976 | 65536 | 229823686 | 2410723 | 0 | 921024964 | 16792847998435 | 16792847954487 | 16792848342325 | 16792848343894 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 74 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 30016 | 0x7fcf4a0c6100 | 0x7fce3f824580 | 166832 | 166832 | 15772 | 1334664 | 65536 | 109625625 | 1233269 | 0 | 440238532 | 16792848387102 | 16792848342325 | 16792848642964 | 16792848644364 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 77 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 30976 | 0x7fcf401c3f80 | 0x7fce3f8245c0 | 168643 | 168643 | 14708 | 1349152 | 65536 | 106023020 | 1246751 | 0 | 425816800 | 16792848686633 | 16792848642964 | 16792848942963 | 16792848944565 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 80 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 31680 | 0x7fcf401c3e00 | 0x7fce3f824600 | 166196 | 166196 | 16222 | 1329576 | 65536 | 112911905 | 1225470 | 0 | 453359948 | 16792848993563 | 16792848942963 | 16792849243922 | 16792849245465 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 83 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 16 | 24 | 32640 | 0x7fcf401c3c80 | 0x7fce3f824640 | 314215 | 314215 | 24612 | 2513728 | 65536 | 232205349 | 2406232 | 0 | 930557152 | 16792849287104 | 16792849243922 | 16792849637361 | 16792849639013 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 86 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 33600 | 0x7fcf401c3b00 | 0x7fce3f824680 | 313140 | 313140 | 23227 | 2505128 | 65536 | 227170080 | 2398851 | 0 | 910411528 | 16792849682222 | 16792849637361 | 16792850023119 | 16792850024691 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 89 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 34560 | 0x7fcf401c3980 | 0x7fce3f8246c0 | 166425 | 166425 | 15570 | 1331408 | 65536 | 107677887 | 1227474 | 0 | 432438708 | 16792850065470 | 16792850023119 | 16792850333198 | 16792850334991 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 92 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 35520 | 0x7fcf401c3800 | 0x7fce3f824700 | 169115 | 169115 | 16072 | 1352928 | 65536 | 100298926 | 1247637 | 0 | 402918424 | 16792850376640 | 16792850333198 | 16792850636557 | 16792850638102 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 95 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 36224 | 0x7fcf401c3680 | 0x7fce3f824740 | 165829 | 165829 | 15458 | 1326640 | 65536 | 105824107 | 1224631 | 0 | 425024436 | 16792850686020 | 16792850636557 | 16792850939436 | 16792850940902 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 98 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 37184 | 0x7fcf4a0c6500 | 0x7fce3f824780 | 313266 | 313266 | 24796 | 2506136 | 65536 | 232213110 | 2399764 | 0 | 930586672 | 16792850987151 | 16792850939436 | 16792851337514 | 16792851338950 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 101 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 38144 | 0x7fcf4a0c6380 | 0x7fce3f8247c0 | 312976 | 312976 | 24950 | 2503816 | 65536 | 227360200 | 2395518 | 0 | 911171780 | 16792851381248 | 16792851337514 | 16792851723593 | 16792851725098 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 104 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 39104 | 0x7fcf4a0c6200 | 0x7fce3f824800 | 167475 | 167475 | 15709 | 1339808 | 65536 | 100626635 | 1236940 | 0 | 404230208 | 16792851766126 | 16792851723593 | 16792852026952 | 16792852028368 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 107 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 40320 | 0x7fcf4a0c6080 | 0x7fce3f824840 | 169643 | 169643 | 15615 | 1357152 | 65536 | 100753010 | 1255043 | 0 | 404730632 | 16792852069757 | 16792852026952 | 16792852324871 | 16792852326219 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 110 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 41280 | 0x7fcf401c3f00 | 0x7fce3f824880 | 168059 | 168059 | 15563 | 1344480 | 65536 | 108961918 | 1242153 | 0 | 437574928 | 16792852374977 | 16792852324871 | 16792852629190 | 16792852630639 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 113 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 42240 | 0x7fcf401c3d80 | 0x7fce3f8248c0 | 314481 | 314481 | 25058 | 2515856 | 65536 | 234007823 | 2412456 | 0 | 937760756 | 16792852671358 | 16792852629190 | 16792853015588 | 16792853017107 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 116 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 43200 | 0x7fcf401c3c00 | 0x7fce3f824900 | 312068 | 312068 | 22944 | 2496552 | 65536 | 227221747 | 2391867 | 0 | 910619860 | 16792853059096 | 16792853015588 | 16792853399747 | 16792853401325 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 119 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 44160 | 0x7fcf401c3a80 | 0x7fce3f824940 | 169121 | 169121 | 15294 | 1352976 | 65536 | 107413387 | 1249941 | 0 | 431379760 | 16792853441874 | 16792853399747 | 16792853694786 | 16792853696346 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 122 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 45376 | 0x7fcf401c3900 | 0x7fce3f824980 | 170513 | 170513 | 16497 | 1364112 | 65536 | 102411818 | 1256688 | 0 | 411370780 | 16792853737854 | 16792853694786 | 16792853994465 | 16792853995916 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 125 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 46336 | 0x7fcf401c3780 | 0x7fce3f8249c0 | 166790 | 166790 | 15337 | 1334328 | 65536 | 108802487 | 1229269 | 0 | 436929784 | 16792854044965 | 16792853994465 | 16792854310143 | 16792854311796 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 128 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 47552 | 0x7fcf401c3600 | 0x7fce3f824a00 | 309952 | 309952 | 25522 | 2479624 | 65536 | 230954271 | 2375957 | 0 | 925547556 | 16792854353285 | 16792854310143 | 16792854696062 | 16792854697404 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 131 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 48512 | 0x7fcf4a0c6480 | 0x7fce3f824a40 | 304008 | 304008 | 21751 | 2432072 | 65536 | 218769920 | 2330678 | 0 | 876811868 | 16792854740703 | 16792854696062 | 16792855076701 | 16792855078142 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 134 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 49472 | 0x7fcf4a0c6300 | 0x7fce3f824a80 | 165946 | 165946 | 15211 | 1327576 | 65536 | 99765597 | 1223525 | 0 | 400792692 | 16792855119821 | 16792855076701 | 16792855372860 | 16792855374433 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 137 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 50688 | 0x7fcf4a0c6180 | 0x7fce3f824ac0 | 167910 | 167910 | 17101 | 1343288 | 65536 | 101021652 | 1239554 | 0 | 405819396 | 16792855416412 | 16792855372860 | 16792855670938 | 16792855672433 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 140 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 51648 | 0x7fcf4a0c6000 | 0x7fce3f824b00 | 167000 | 167000 | 15655 | 1336008 | 65536 | 100913418 | 1225021 | 0 | 405380804 | 16792855721512 | 16792855670938 | 16792855975897 | 16792855977334 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 143 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 52864 | 0x7fcf401c3e80 | 0x7fce3f824b40 | 308360 | 308360 | 24580 | 2466888 | 65536 | 229146392 | 2364573 | 0 | 918315392 | 16792856019083 | 16792855975897 | 16792856366936 | 16792856368352 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 146 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 54080 | 0x7fcf401c3d00 | 0x7fce3f824b80 | 305326 | 305326 | 23003 | 2442616 | 65536 | 220966012 | 2336744 | 0 | 885594684 | 16792856409950 | 16792856366936 | 16792856756054 | 16792856757479 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 149 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 55296 | 0x7fcf401c3b80 | 0x7fce3f824bc0 | 167476 | 167476 | 15322 | 1339816 | 65536 | 97611205 | 1235958 | 0 | 392167156 | 16792856797968 | 16792856756054 | 16792857055253 | 16792857056850 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 152 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 56768 | 0x7fcf401c3a00 | 0x7fce3f824c00 | 169937 | 169937 | 16888 | 1359504 | 65536 | 105339553 | 1254081 | 0 | 423082536 | 16792857098119 | 16792857055253 | 16792857355412 | 16792857356800 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 155 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 57728 | 0x7fcf401c3880 | 0x7fce3f824c40 | 168254 | 168254 | 16347 | 1346040 | 65536 | 100885131 | 1230405 | 0 | 405259660 | 16792857405509 | 16792857355412 | 16792857654131 | 16792857655531 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 158 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 58944 | 0x7fcf401c3700 | 0x7fce3f824c80 | 306169 | 306169 | 24863 | 2449360 | 65536 | 225996492 | 2344113 | 0 | 905720308 | 16792857701580 | 16792857654131 | 16792858042290 | 16792858043879 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 161 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 60160 | 0x7fcf4a0c6580 | 0x7fce3f824cc0 | 306630 | 306630 | 23673 | 2453048 | 65536 | 220300326 | 2344096 | 0 | 882929280 | 16792858093407 | 16792858042290 | 16792858433328 | 16792858435006 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 164 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 61376 | 0x7fcf4a0c6400 | 0x7fce3f824d00 | 170627 | 170627 | 16492 | 1365024 | 65536 | 102290279 | 1255202 | 0 | 410885416 | 16792858477365 | 16792858433328 | 16792858734447 | 16792858735887 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 167 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 62848 | 0x7fcf4a0c6280 | 0x7fce3f824d40 | 173068 | 173068 | 16822 | 1384552 | 65536 | 105239076 | 1280034 | 0 | 422694140 | 16792858776986 | 16792858734447 | 16792859044526 | 16792859046157 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 170 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 63808 | 0x7fcf4a0c6100 | 0x7fce3f824d80 | 167087 | 167087 | 16388 | 1336704 | 65536 | 102332940 | 1226282 | 0 | 411054840 | 16792859095786 | 16792859044526 | 16792859346445 | 16792859347838 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 173 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 65024 | 0x7fcf401c3f80 | 0x7fce3f824dc0 | 304928 | 304928 | 24334 | 2439432 | 65536 | 222187667 | 2333491 | 0 | 890487116 | 16792859392316 | 16792859346445 | 16792859732204 | 16792859733736 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 176 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 66240 | 0x7fcf401c3e00 | 0x7fce3f824e00 | 306957 | 306957 | 23977 | 2455664 | 65536 | 219538623 | 2347655 | 0 | 879890536 | 16792859774764 | 16792859732204 | 16792860115882 | 16792860117314 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 179 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 67456 | 0x7fcf401c3c80 | 0x7fce3f824e40 | 168616 | 168616 | 15685 | 1348936 | 65536 | 97471438 | 1243952 | 0 | 391616288 | 16792860157732 | 16792860115882 | 16792860411401 | 16792860412864 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 182 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 68928 | 0x7fcf401c3b00 | 0x7fce3f824e80 | 176886 | 176886 | 16749 | 1415096 | 65536 | 112154064 | 1311652 | 0 | 450348608 | 16792860455073 | 16792860411401 | 16792860716360 | 16792860717835 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 185 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 70144 | 0x7fcf401c3980 | 0x7fce3f824ec0 | 167346 | 167346 | 16013 | 1338776 | 65536 | 100628721 | 1230038 | 0 | 404241172 | 16792860766733 | 16792860716360 | 16792861019559 | 16792861020925 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 188 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 71616 | 0x7fcf401c3800 | 0x7fce3f824f00 | 303483 | 303483 | 23501 | 2427872 | 65536 | 220292154 | 2316766 | 0 | 882899672 | 16792861062304 | 16792861019559 | 16792861404997 | 16792861406453 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 191 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 72832 | 0x7fcf401c3680 | 0x7fce3f824f40 | 303480 | 303480 | 23579 | 2427848 | 65536 | 222349039 | 2319885 | 0 | 891129332 | 16792861448272 | 16792861404997 | 16792861784996 | 16792861786331 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 194 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 74048 | 0x7fcf4a0c6500 | 0x7fce3f824f80 | 168242 | 168242 | 15525 | 1345944 | 65536 | 98836545 | 1240470 | 0 | 397069812 | 16792861827510 | 16792861784996 | 16792862083075 | 16792862087332 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 197 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 75776 | 0x7fcf4a0c6380 | 0x7fce3f824fc0 | 182736 | 182736 | 17251 | 1461896 | 65536 | 108343629 | 1357734 | 0 | 435120676 | 16792862127401 | 16792862083075 | 16792862397314 | 16792862398742 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 200 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 76992 | 0x7fcf4a0c6200 | 0x7fce3f825000 | 166216 | 166216 | 16281 | 1329736 | 65536 | 102370369 | 1223191 | 0 | 411203244 | 16792862460530 | 16792862397314 | 16792862699393 | 16792862700883 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 203 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 78464 | 0x7fcf4a0c6080 | 0x7fce3f825040 | 305080 | 305080 | 25124 | 2440648 | 65536 | 215980451 | 2328490 | 0 | 865658684 | 16792862746631 | 16792862699393 | 16792863092511 | 16792863094000 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 206 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 79936 | 0x7fcf401c3f00 | 0x7fce3f825080 | 305546 | 305546 | 24385 | 2444376 | 65536 | 220933139 | 2339698 | 0 | 885465324 | 16792863136379 | 16792863092511 | 16792863482750 | 16792863484228 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 209 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 81408 | 0x7fcf401c3d80 | 0x7fce3f8250c0 | 168308 | 168308 | 15395 | 1346472 | 65536 | 99132717 | 1244091 | 0 | 398267344 | 16792863525017 | 16792863482750 | 16792863783389 | 16792863784728 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 212 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 83136 | 0x7fcf401c3c00 | 0x7fce3f825100 | 192876 | 192876 | 18601 | 1543016 | 65536 | 110838677 | 1429368 | 0 | 445096928 | 16792863825607 | 16792863783389 | 16792864097468 | 16792864099049 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 215 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 84352 | 0x7fcf401c3a80 | 0x7fce3f825140 | 167176 | 167176 | 15294 | 1337416 | 65536 | 104241151 | 1232996 | 0 | 418692260 | 16792864147257 | 16792864097468 | 16792864396507 | 16792864398079 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 218 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 85824 | 0x7fcf401c3900 | 0x7fce3f825180 | 302402 | 302402 | 22675 | 2419224 | 65536 | 214067569 | 2315723 | 0 | 858013436 | 16792864443418 | 16792864396507 | 16792864782745 | 16792864784167 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 221 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 87296 | 0x7fcf401c3780 | 0x7fce3f8251c0 | 304465 | 304465 | 24587 | 2435728 | 65536 | 223360030 | 2330002 | 0 | 895170700 | 16792864826196 | 16792864782745 | 16792865167384 | 16792865168905 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 224 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 88768 | 0x7fcf401c3600 | 0x7fce3f825200 | 168817 | 168817 | 15518 | 1350544 | 65536 | 100122664 | 1247832 | 0 | 402217640 | 16792865209574 | 16792865167384 | 16792865468503 | 16792865469955 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 227 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 90752 | 0x7fcf4a0c6480 | 0x7fce3f825240 | 202029 | 202029 | 18194 | 1616240 | 65536 | 117537228 | 1499468 | 0 | 471894004 | 16792865511514 | 16792865468503 | 16792865794261 | 16792865795675 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 230 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 91968 | 0x7fcf4a0c6300 | 0x7fce3f825280 | 168303 | 168303 | 15957 | 1346432 | 65536 | 103874095 | 1240900 | 0 | 417223984 | 16792865844894 | 16792865794261 | 16792866101620 | 16792866103135 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 233 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 93440 | 0x7fcf4a0c6180 | 0x7fce3f8252c0 | 306052 | 306052 | 24347 | 2448424 | 65536 | 226141379 | 2342010 | 0 | 906297352 | 16792866145844 | 16792866101620 | 16792866490259 | 16792866491683 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 236 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 94912 | 0x7fcf4a0c6000 | 0x7fce3f825300 | 301441 | 301441 | 22746 | 2411536 | 65536 | 215276067 | 2303240 | 0 | 862837056 | 16792866535872 | 16792866490259 | 16792866877297 | 16792866878651 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 239 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 96384 | 0x7fcf401c3e80 | 0x7fce3f825340 | 168408 | 168408 | 16309 | 1347272 | 65536 | 101985831 | 1244569 | 0 | 409667216 | 16792866919240 | 16792866877297 | 16792867177776 | 16792867179252 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 242 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 98368 | 0x7fcf401c3d00 | 0x7fce3f825380 | 210184 | 210184 | 18901 | 1681480 | 65536 | 136205249 | 1576007 | 0 | 546561696 | 16792867220500 | 16792867177776 | 16792867507535 | 16792867509061 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 245 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 99584 | 0x7fcf401c3b80 | 0x7fce3f8253c0 | 168153 | 168153 | 15559 | 1345232 | 65536 | 99752652 | 1242641 | 0 | 400734528 | 16792867557860 | 16792867507535 | 16792867809774 | 16792867811422 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 248 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 101312 | 0x7fcf401c3a00 | 0x7fce3f825400 | 304350 | 304350 | 24978 | 2434808 | 65536 | 214953369 | 2328241 | 0 | 861551496 | 16792867852531 | 16792867809774 | 16792868200493 | 16792868201960 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 251 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 102784 | 0x7fcf401c3880 | 0x7fce3f825440 | 306499 | 306499 | 23785 | 2452000 | 65536 | 220559173 | 2348126 | 0 | 883968448 | 16792868243868 | 16792868200493 | 16792868586091 | 16792868587687 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 254 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 104256 | 0x7fcf401c3700 | 0x7fce3f825480 | 168120 | 168120 | 15559 | 1344968 | 65536 | 102093581 | 1243804 | 0 | 410104892 | 16792868628826 | 16792868586091 | 16792868886410 | 16792868887758 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 257 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 106240 | 0x7fcf4a0c6580 | 0x7fce3f8254c0 | 219347 | 219347 | 19299 | 1754784 | 65536 | 136275099 | 1642713 | 0 | 546847784 | 16792868928767 | 16792868886410 | 16792869218569 | 16792869220058 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 260 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 107712 | 0x7fcf4a0c6400 | 0x7fce3f825500 | 167096 | 167096 | 16232 | 1336776 | 65536 | 100663001 | 1233514 | 0 | 404384540 | 16792869269176 | 16792869218569 | 16792869521608 | 16792869523088 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 263 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 16 | 24 | 109440 | 0x7fcf4a0c6280 | 0x7fce3f825540 | 301461 | 301461 | 23033 | 2411696 | 65536 | 219905824 | 2302501 | 0 | 881360012 | 16792869569407 | 16792869521608 | 16792869911206 | 16792869912536 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 266 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 16 | 24 | 111168 | 0x7fcf4a0c6100 | 0x7fce3f825580 | 301238 | 301238 | 22498 | 2409912 | 65536 | 217245770 | 2303649 | 0 | 870714832 | 16792869953845 | 16792869911206 | 16792870310245 | 16792870311693 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 269 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 112896 | 0x7fcf401c3f80 | 0x7fce3f8255c0 | 170412 | 170412 | 16033 | 1363304 | 65536 | 103401859 | 1260389 | 0 | 415335336 | 16792870353012 | 16792870310245 | 16792870619684 | 16792870621124 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 272 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 115136 | 0x7fcf401c3e00 | 0x7fce3f825600 | 228675 | 228675 | 20025 | 1829408 | 65536 | 152662663 | 1724149 | 0 | 612392944 | 16792870661982 | 16792870619684 | 16792870959203 | 16792870960813 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 275 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 116608 | 0x7fcf401c3c80 | 0x7fce3f825640 | 168115 | 168115 | 16852 | 1344928 | 65536 | 101784305 | 1236751 | 0 | 408861396 | 16792871008671 | 16792870959203 | 16792871264641 | 16792871265963 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 278 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 16 | 24 | 118336 | 0x7fcf401c3b00 | 0x7fce3f825680 | 298104 | 298104 | 22251 | 2384840 | 65536 | 205264392 | 2275367 | 0 | 822799076 | 16792871307532 | 16792871264641 | 16792871650880 | 16792871652241 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 281 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 16 | 24 | 120064 | 0x7fcf401c3980 | 0x7fce3f8256c0 | 302864 | 302864 | 23119 | 2422920 | 65536 | 214983708 | 2313930 | 0 | 861673064 | 16792871693790 | 16792871650880 | 16792872033599 | 16792872035079 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 284 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 121792 | 0x7fcf401c3800 | 0x7fce3f825700 | 170727 | 170727 | 15876 | 1365824 | 65536 | 106069961 | 1263742 | 0 | 426013204 | 16792872075638 | 16792872033599 | 16792872335678 | 16792872337040 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 287 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 124032 | 0x7fcf401c3680 | 0x7fce3f825740 | 238277 | 238277 | 20090 | 1906224 | 65536 | 163273658 | 1802522 | 0 | 654837900 | 16792872377928 | 16792872335678 | 16792872685596 | 16792872686999 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 290 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 125504 | 0x7fcf4a0c6500 | 0x7fce3f825780 | 168360 | 168360 | 16466 | 1346888 | 65536 | 101773100 | 1240656 | 0 | 408822380 | 16792872735177 | 16792872685596 | 16792872988795 | 16792872990369 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 293 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 127488 | 0x7fcf4a0c6380 | 0x7fce3f8257c0 | 301139 | 301139 | 21891 | 2409120 | 65536 | 207780055 | 2301131 | 0 | 832855692 | 16792873033028 | 16792872988795 | 16792873376954 | 16792873378267 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 296 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 129216 | 0x7fcf4a0c6200 | 0x7fce3f825800 | 300792 | 300792 | 22477 | 2406344 | 65536 | 213422598 | 2298839 | 0 | 855424436 | 16792873420415 | 16792873376954 | 16792873761912 | 16792873763455 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 299 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 130944 | 0x7fcf4a0c6080 | 0x7fce3f825840 | 172715 | 172715 | 16484 | 1381728 | 65536 | 107359739 | 1279338 | 0 | 431189832 | 16792873805653 | 16792873761912 | 16792874066231 | 16792874067575 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 302 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 133440 | 0x7fcf401c3f00 | 0x7fce3f825880 | 258582 | 258582 | 21158 | 2068664 | 65536 | 172162749 | 1957131 | 0 | 690401092 | 16792874110024 | 16792874066231 | 16792874424790 | 16792874426184 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 305 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 134912 | 0x7fcf401c3d80 | 0x7fce3f8258c0 | 168175 | 168175 | 16397 | 1345408 | 65536 | 101297272 | 1240261 | 0 | 406920332 | 16792874475742 | 16792874424790 | 16792874730229 | 16792874731564 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 308 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 136896 | 0x7fcf401c3c00 | 0x7fce3f825900 | 299640 | 299640 | 21325 | 2397128 | 65536 | 209383794 | 2294200 | 0 | 839282052 | 16792874772933 | 16792874730229 | 16792875116947 | 16792875118382 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 311 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 138880 | 0x7fcf401c3a80 | 0x7fce3f825940 | 301231 | 301231 | 23181 | 2409856 | 65536 | 212480476 | 2305774 | 0 | 851662644 | 16792875159811 | 16792875116947 | 16792875500946 | 16792875502400 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 314 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 140864 | 0x7fcf401c3900 | 0x7fce3f825980 | 178572 | 178572 | 17101 | 1428584 | 65536 | 103614878 | 1320071 | 0 | 416196692 | 16792875543139 | 16792875500946 | 16792875809745 | 16792875811290 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 317 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 143616 | 0x7fcf401c3780 | 0x7fce3f8259c0 | 281121 | 281121 | 22659 | 2248976 | 65536 | 179163048 | 2131379 | 0 | 718402252 | 16792875852459 | 16792875809745 | 16792876181903 | 16792876183249 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 320 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 145344 | 0x7fcf401c3600 | 0x7fce3f825a00 | 168100 | 168100 | 16840 | 1344808 | 65536 | 102587987 | 1234755 | 0 | 412081760 | 16792876231817 | 16792876181903 | 16792876491502 | 16792876493079 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 323 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 147584 | 0x7fcf4a0c6480 | 0x7fce3f825a40 | 301317 | 301317 | 22948 | 2410544 | 65536 | 209713588 | 2302387 | 0 | 840591648 | 16792876539058 | 16792876491502 | 16792876883341 | 16792876884817 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 326 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 149568 | 0x7fcf4a0c6300 | 0x7fce3f825a80 | 303192 | 303192 | 24683 | 2425544 | 65536 | 215026616 | 2313058 | 0 | 861848412 | 16792876926945 | 16792876883341 | 16792877268939 | 16792877270455 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 329 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 151552 | 0x7fcf4a0c6180 | 0x7fce3f825ac0 | 187445 | 187445 | 18649 | 1499568 | 65536 | 106405206 | 1376994 | 0 | 427367312 | 16792877311643 | 16792877268939 | 16792877592458 | 16792877594034 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 332 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 154304 | 0x7fcf4a0c6000 | 0x7fce3f825b00 | 300214 | 300214 | 23739 | 2401720 | 65536 | 200618477 | 2286537 | 0 | 804222044 | 16792877635103 | 16792877592458 | 16792877979017 | 16792877980422 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 335 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 156288 | 0x7fcf401c3e80 | 0x7fce3f825b40 | 173049 | 173049 | 16989 | 1384400 | 65536 | 103657313 | 1278119 | 0 | 416373800 | 16792878029281 | 16792877979017 | 16792878291016 | 16792878292753 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 338 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 158784 | 0x7fcf401c3d00 | 0x7fce3f825b80 | 302390 | 302390 | 22583 | 2419128 | 65536 | 200150839 | 2309446 | 0 | 802346484 | 16792878334071 | 16792878291016 | 16792878680934 | 16792878682350 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 341 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 161024 | 0x7fcf401c3b80 | 0x7fce3f825bc0 | 303057 | 303057 | 22867 | 2424464 | 65536 | 209112294 | 2319447 | 0 | 838188100 | 16792878723959 | 16792878680934 | 16792879066853 | 16792879068108 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 344 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 163264 | 0x7fcf401c3a00 | 0x7fce3f825c00 | 206533 | 206533 | 18654 | 1652272 | 65536 | 125700958 | 1537662 | 0 | 504557640 | 16792879109227 | 16792879066853 | 16792879390532 | 16792879392018 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 347 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 166528 | 0x7fcf401c3880 | 0x7fce3f825c40 | 341459 | 341459 | 26252 | 2731680 | 65536 | 231248923 | 2613191 | 0 | 926753228 | 16792879433467 | 16792879390532 | 16792879800770 | 16792879846674 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 350 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 168512 | 0x7fcf401c3700 | 0x7fce3f825c80 | 184310 | 184310 | 18212 | 1474488 | 65536 | 123298796 | 1367102 | 0 | 494935296 | 16792879865433 | 16792879800770 | 16792880143329 | 16792880144714 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 353 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 171264 | 0x7fcf4a0c6580 | 0x7fce3f825cc0 | 302003 | 302003 | 22000 | 2416032 | 65536 | 200881418 | 2310773 | 0 | 805276348 | 16792880186553 | 16792880143329 | 16792880531968 | 16792880533372 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 356 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 173760 | 0x7fcf4a0c6400 | 0x7fce3f825d00 | 303498 | 303498 | 22298 | 2427992 | 65536 | 206387070 | 2323427 | 0 | 827302108 | 16792880575791 | 16792880531968 | 16792880922686 | 16792880924070 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 359 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 176256 | 0x7fcf4a0c6280 | 0x7fce3f825d40 | 225491 | 225491 | 19497 | 1803936 | 65536 | 133980159 | 1695018 | 0 | 537675968 | 16792880968448 | 16792880922686 | 16792881266045 | 16792881267519 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 362 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 179776 | 0x7fcf4a0c6100 | 0x7fce3f825d80 | 381268 | 381268 | 27472 | 3050152 | 65536 | 260335783 | 2937418 | 0 | 1043101524 | 16792881308887 | 16792881266045 | 16792881700283 | 16792881719884 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 365 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 182272 | 0x7fcf401c3f80 | 0x7fce3f825dc0 | 211337 | 211337 | 19061 | 1690704 | 65536 | 125622534 | 1580936 | 0 | 504246300 | 16792881750834 | 16792881700283 | 16792882038042 | 16792882039444 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 368 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 185536 | 0x7fcf401c3e00 | 0x7fce3f825e00 | 307553 | 307553 | 24915 | 2460432 | 65536 | 204153098 | 2350385 | 0 | 818388648 | 16792882087913 | 16792882038042 | 16792882455960 | 16792882477671 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 371 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 188544 | 0x7fcf401c3c80 | 0x7fce3f825e40 | 311655 | 311655 | 23893 | 2493248 | 65536 | 194889702 | 2376677 | 0 | 781326576 | 16792882499420 | 16792882455960 | 16792882859319 | 16792882879728 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 374 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 191552 | 0x7fcf401c3b00 | 0x7fce3f825e80 | 267485 | 267485 | 21674 | 2139888 | 65536 | 163042879 | 2029450 | 0 | 653941540 | 16792882901567 | 16792882859319 | 16792883224438 | 16792883225747 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 377 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 195840 | 0x7fcf401c3980 | 0x7fce3f825ec0 | 462383 | 462383 | 31693 | 3699072 | 65536 | 327056354 | 3584539 | 0 | 1309984928 | 16792883266836 | 16792883224438 | 16792883709556 | 16792883730171 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 380 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 198592 | 0x7fcf401c3800 | 0x7fce3f825f00 | 243299 | 243299 | 20662 | 1946400 | 65536 | 169025930 | 1841316 | 0 | 677846876 | 16792883759910 | 16792883709556 | 16792884064435 | 16792884065791 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 383 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 202368 | 0x7fcf401c3680 | 0x7fce3f825f40 | 335723 | 335723 | 23990 | 2685792 | 65536 | 206049218 | 2564367 | 0 | 825970620 | 16792884106310 | 16792884064435 | 16792884476273 | 16792884493757 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 386 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 205888 | 0x7fcf4a0c6500 | 0x7fce3f825f80 | 331041 | 331041 | 23052 | 2648336 | 65536 | 208470257 | 2524982 | 0 | 835641756 | 16792884519057 | 16792884476273 | 16792884885872 | 16792884903265 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 389 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 209408 | 0x7fcf4a0c6380 | 0x7fce3f825fc0 | 306711 | 306711 | 23711 | 2453696 | 65536 | 209013782 | 2344942 | 0 | 837812640 | 16792884927704 | 16792884885872 | 16792885285070 | 16792885286383 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 392 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 214464 | 0x7fcf4a0c6200 | 0x7fce3f826000 | 542641 | 542641 | 36026 | 4341136 | 65536 | 406346614 | 4225868 | 0 | 1627153404 | 16792885327881 | 16792885285070 | 16792885827788 | 16792885846425 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 395 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 217728 | 0x7fcf4a0c6080 | 0x7fce3f826040 | 271773 | 271773 | 22219 | 2174192 | 65536 | 191644590 | 2068246 | 0 | 768336024 | 16792885879944 | 16792885827788 | 16792886207467 | 16792886209014 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 398 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 222016 | 0x7fcf401c3f00 | 0x7fce3f826080 | 364357 | 364357 | 26844 | 2914864 | 65536 | 242701660 | 2795062 | 0 | 972592396 | 16792886251332 | 16792886207467 | 16792886644265 | 16792886686869 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 401 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 226048 | 0x7fcf401c3d80 | 0x7fce3f8260c0 | 365761 | 365761 | 29385 | 2926096 | 65536 | 253129421 | 2810048 | 0 | 1014276892 | 16792886697748 | 16792886644265 | 16792887096264 | 16792887138214 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 404 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 230080 | 0x7fcf401c3c00 | 0x7fce3f826100 | 346692 | 346692 | 25483 | 2773544 | 65536 | 247499627 | 2666126 | 0 | 991769600 | 16792887149754 | 16792887096264 | 16792887529862 | 16792887571731 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 407 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 235904 | 0x7fcf401c3a80 | 0x7fce3f826140 | 624232 | 624232 | 40169 | 4993864 | 65536 | 466516538 | 4875907 | 0 | 1867829176 | 16792887582800 | 16792887529862 | 16792888136420 | 16792888178402 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 410 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 239424 | 0x7fcf401c3900 | 0x7fce3f826180 | 304516 | 304516 | 23481 | 2436136 | 65536 | 196215139 | 2323234 | 0 | 786631012 | 16792888205081 | 16792888136420 | 16792888555458 | 16792888556890 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 413 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 244224 | 0x7fcf401c3780 | 0x7fce3f8261c0 | 399074 | 399074 | 27384 | 3192600 | 65536 | 273385084 | 3081455 | 0 | 1095323732 | 16792888598038 | 16792888555458 | 16792889009697 | 16792889052984 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 416 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 248768 | 0x7fcf401c3600 | 0x7fce3f826200 | 391968 | 391968 | 27877 | 3135752 | 65536 | 279894216 | 3021934 | 0 | 1121352496 | 16792889064094 | 16792889009697 | 16792889480095 | 16792889522119 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 419 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 253312 | 0x7fcf4a0c6480 | 0x7fce3f826240 | 388208 | 388208 | 28006 | 3105672 | 65536 | 280121734 | 2989667 | 0 | 1122253492 | 16792889534119 | 16792889480095 | 16792889939133 | 16792889957106 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 422 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 255552 | 0x7fcf4a0c6300 | 0x7fce3f826280 | 718568 | 718568 | 45022 | 5748552 | 65536 | 533405632 | 5632738 | 0 | 2135363168 | 16792889982145 | 16792889939133 | 16792890602331 | 16792890648274 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 425 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 259840 | 0x7fcf4a0c6180 | 0x7fce3f8262c0 | 365876 | 365876 | 28101 | 2927016 | 65536 | 257319797 | 2804184 | 0 | 1031038976 | 16792890668373 | 16792890602331 | 16792891058329 | 16792891101850 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 428 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 265664 | 0x7fcf4a0c6000 | 0x7fce3f826300 | 468652 | 468652 | 31969 | 3749224 | 65536 | 350229111 | 3640961 | 0 | 1402698156 | 16792891113739 | 16792891058329 | 16792891582647 | 16792891624333 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 431 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 271232 | 0x7fcf401c3e80 | 0x7fce3f826340 | 472850 | 472850 | 31982 | 3782808 | 65536 | 346128776 | 3673445 | 0 | 1386293840 | 16792891636103 | 16792891582647 | 16792892106485 | 16792892150327 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 434 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 273216 | 0x7fcf401c3d00 | 0x7fce3f826380 | 485398 | 485398 | 34402 | 3883192 | 65536 | 342447362 | 3751406 | 0 | 1371530316 | 16792892161516 | 16792892106485 | 16792892625363 | 16792892667311 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 437 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 275712 | 0x7fcf401c3b80 | 0x7fce3f8263c0 | 880234 | 880234 | 53098 | 7041880 | 65536 | 687029505 | 6935648 | 0 | 2749858628 | 16792892678930 | 16792892625363 | 16792893402480 | 16792893444656 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 440 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 280768 | 0x7fcf401c3a00 | 0x7fce3f826400 | 427156 | 427156 | 29858 | 3417256 | 65536 | 300046942 | 3304477 | 0 | 1201959256 | 16792893465106 | 16792893402480 | 16792893888559 | 16792893906642 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 443 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 287616 | 0x7fcf401c3880 | 0x7fce3f826440 | 549334 | 549334 | 36103 | 4394680 | 65536 | 409152819 | 4287584 | 0 | 1638399116 | 16792893931001 | 16792893888559 | 16792894447757 | 16792894492853 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 446 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 294208 | 0x7fcf401c3700 | 0x7fce3f826480 | 551012 | 551012 | 36263 | 4408104 | 65536 | 412553012 | 4299522 | 0 | 1651994332 | 16792894504163 | 16792894447757 | 16792895026155 | 16792895072195 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 449 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 296960 | 0x7fcf4a0c6580 | 0x7fce3f8264c0 | 563472 | 563472 | 36207 | 4507784 | 65536 | 416337020 | 4395302 | 0 | 1667090612 | 16792895084005 | 16792895026155 | 16792895597032 | 16792895639987 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 452 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 299968 | 0x7fcf4a0c6400 | 0x7fce3f826500 | 1042268 | 1042268 | 62431 | 8338152 | 65536 | 799663657 | 8218923 | 0 | 3200395928 | 16792895651107 | 16792895597032 | 16792896485189 | 16792896532659 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 455 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 302720 | 0x7fcf4a0c6280 | 0x7fce3f826540 | 722002 | 722002 | 44875 | 5776024 | 65536 | 539863301 | 5662251 | 0 | 2161193480 | 16792896552539 | 16792896484932 | 16792897159329 | 16792897208678 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 458 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 305472 | 0x7fcf4a0c6100 | 0x7fce3f826580 | 724208 | 724208 | 45893 | 5793672 | 65536 | 558170616 | 5672200 | 0 | 2234423728 | 16792897220718 | 16792897159329 | 16792897876125 | 16792897920086 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 461 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 308224 | 0x7fcf401c3f80 | 0x7fce3f8265c0 | 722702 | 722702 | 44432 | 5781624 | 65536 | 546333997 | 5659822 | 0 | 2187078380 | 16792897931105 | 16792897876125 | 16792898586522 | 16792898632793 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 464 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 310976 | 0x7fcf401c3e00 | 0x7fce3f826600 | 727287 | 727287 | 45298 | 5818304 | 65536 | 543490543 | 5696744 | 0 | 2175704380 | 16792898644643 | 16792898586522 | 16792899283319 | 16792899328901 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 467 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 313216 | 0x7fcf401c3c80 | 0x7fce3f826640 | 1368888 | 1368888 | 77384 | 10951112 | 65536 | 1060573322 | 10814796 | 0 | 4244035420 | 16792899340771 | 16792899283319 | 16792900389074 | 16792900435027 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 470 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 315968 | 0x7fcf401c3b00 | 0x7fce3f826680 | 1371124 | 1371124 | 77764 | 10969000 | 65536 | 1073826064 | 10842553 | 0 | 4297046088 | 16792900455116 | 16792900389074 | 16792901490830 | 16792901556101 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 473 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 318720 | 0x7fcf401c3980 | 0x7fce3f8266c0 | 1373256 | 1373256 | 77548 | 10986056 | 65536 | 1075277290 | 10862240 | 0 | 4302850740 | 16792901568691 | 16792901490830 | 16792902636265 | 16792902706585 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 476 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 321472 | 0x7fcf401c3800 | 0x7fce3f826700 | 1370695 | 1370695 | 77465 | 10965568 | 65536 | 1071497348 | 10843510 | 0 | 4287729800 | 16792902719195 | 16792902636265 | 16792903787140 | 16792903852809 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 479 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 324224 | 0x7fcf401c3680 | 0x7fce3f826740 | 1377912 | 1377912 | 76853 | 11023304 | 65536 | 1074148799 | 10880819 | 0 | 4298337292 | 16792903864509 | 16792903787140 | 16792904910815 | 16792904983954 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 482 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 326464 | 0x7fcf4a0c6500 | 0x7fce3f826780 | 2661612 | 2661612 | 142836 | 21292904 | 65536 | 2128465729 | 21183617 | 0 | 8515604736 | 16792904995333 | 16792904910815 | 16792906877527 | 16792906949282 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 485 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 329216 | 0x7fcf4a0c6380 | 0x7fce3f8267c0 | 2669004 | 2669004 | 142996 | 21352040 | 65536 | 2127225666 | 21225967 | 0 | 8510643816 | 16792906969461 | 16792906877527 | 16792908814959 | 16792908882491 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 488 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 331968 | 0x7fcf4a0c6200 | 0x7fce3f826800 | 2668964 | 2668964 | 143335 | 21351720 | 65536 | 2134640821 | 21237151 | 0 | 8540303824 | 16792908894691 | 16792908814959 | 16792910801671 | 16792910873258 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 491 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 12 | 24 | 334720 | 0x7fcf4a0c6080 | 0x7fce3f826840 | 2670820 | 2670820 | 143363 | 21366568 | 65536 | 2117835991 | 21229944 | 0 | 8473085140 | 16792910886248 | 16792910801671 | 16792912798943 | 16792912865576 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 494 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 337472 | 0x7fcf401c3f00 | 0x7fce3f826880 | 2673572 | 2673572 | 143579 | 21388584 | 65536 | 2120013542 | 21259021 | 0 | 8481795820 | 16792912877856 | 16792912798943 | 16792914739254 | 16792914809795 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 497 | 175379 | 175379 | 4194304 | 256 | 0 | 0 | 8 | 24 | 0 | 0x7fcf401c3d80 | 0x7fce3f8268c0 | 5257836 | 5257836 | 274170 | 42062696 | 65536 | 4232540942 | 41925799 | 0 | 16931905176 | 16792914822314 | 16792914739254 | 16792918372359 | 16792918444090 |