80 KiB
80 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | SQ_CYCLES | SQ_BUSY_CYCLES | SQ_BUSY_CU_CYCLES | SQ_WAVES | SQ_WAVE_CYCLES | GRBM_COUNT | GRBM_GUI_ACTIVE | TCP_GATE_EN1_sum | TCP_GATE_EN2_sum | TCP_TD_TCP_STALL_CYCLES_sum | TCP_TCR_TCP_STALL_CYCLES_sum | TCP_READ_TAGCONFLICT_STALL_CYCLES_sum | TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum | TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum | TCP_TA_TCP_STATE_READ_sum | TCP_VOLATILE_sum | TCP_TOTAL_ACCESSES_sum | TCP_TOTAL_READ_sum | TCP_TOTAL_WRITE_sum | TCP_TOTAL_ATOMIC_WITH_RET_sum | TCP_TOTAL_ATOMIC_WITHOUT_RET_sum | TCP_TOTAL_WRITEBACK_INVALIDATES_sum | TCP_TOTAL_CACHE_ACCESSES_sum | TCP_UTCL1_TRANSLATION_MISS_sum | TCP_UTCL1_TRANSLATION_HIT_sum | TCP_UTCL1_PERMISSION_MISS_sum | TCP_UTCL1_REQUEST_sum | TCP_TCP_LATENCY_sum | TCP_TCC_READ_REQ_LATENCY_sum | TCP_TCC_WRITE_REQ_LATENCY_sum | TCP_TCC_READ_REQ_sum | TCP_TCC_WRITE_REQ_sum | TCP_TCC_ATOMIC_WITH_RET_REQ_sum | TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum | TCP_TCC_NC_READ_REQ_sum | TCP_TCC_NC_WRITE_REQ_sum | TCP_TCC_NC_ATOMIC_REQ_sum | TCP_TCC_UC_READ_REQ_sum | TCP_TCC_UC_WRITE_REQ_sum | TCP_TCC_UC_ATOMIC_REQ_sum | TCP_TCC_CC_READ_REQ_sum | TCP_TCC_CC_WRITE_REQ_sum | TCP_TCC_CC_ATOMIC_REQ_sum | TCP_TCC_RW_READ_REQ_sum | TCP_TCC_RW_WRITE_REQ_sum | TCP_TCC_RW_ATOMIC_REQ_sum | TCP_PENDING_STALL_CYCLES_sum | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBuffer.kd | 0 | 0 | 0 | 861653 | 861658 | 33554432 | 256 | 0 | 0 | 8 | 32 | 6464 | 0x0 | 0x7fa2ed804180 | 4017616 | 3811368 | 57010570 | 524288 | 365706290 | 502201 | 502201 | 57581369 | 55291458 | 172 | 12242759 | 0 | 0 | 0 | 1048576 | 67108864 | 67108864 | 0 | 67108864 | 0 | 0 | 360 | 33554432 | 15326 | 16608774 | 0 | 16777216 | 641000253 | 0 | 2809786101 | 0 | 8388608 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8388608 | 0 | 42066228 | 12075111806057902 | 12075122867722970 | 12075122868048567 | 12075112052782898 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 861653 | 861658 | 32768 | 256 | 0 | 0 | 24 | 24 | 12480 | 0x0 | 0x7fa2ed835100 | 226960 | 75885 | 1058218 | 512 | 1129881 | 28369 | 28369 | 1525247 | 590890 | 223 | 123973 | 0 | 0 | 0 | 4096 | 65536 | 65536 | 65536 | 0 | 0 | 0 | 360 | 16384 | 120 | 15064 | 0 | 16384 | 13651060 | 18072802 | 0 | 8192 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8192 | 0 | 0 | 433075 | 12075112067269329 | 12075122883005681 | 12075122883012241 | 12075112067789165 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 4 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 12928 | 0x7fa3f9496900 | 0x7fa2ed835140 | 1851568 | 1678532 | 24917394 | 65536 | 123263676 | 231445 | 231445 | 25593587 | 24315118 | 28167 | 4305461 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7590 | 2017879 | 0 | 2097152 | 1001448598 | 1625054991 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16241328 | 12075112068132513 | 12075122883073521 | 12075122883205199 | 12075112068769106 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 6 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 36 | 24 | 13632 | 0x7fa3f9496800 | 0x7fa2ed835180 | 3162416 | 3000988 | 44751928 | 65536 | 254909348 | 395301 | 395301 | 45430547 | 44251996 | 0 | 22969185 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14249 | 8234928 | 0 | 8388608 | 1069438108 | 4162675756 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 32547310 | 12075112069121791 | 12075122883268399 | 12075122883512717 | 12075112069855224 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 8 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 14080 | 0x7fa3f9496700 | 0x7fa2ed8351c0 | 3276264 | 3101254 | 46148283 | 65536 | 276290548 | 409532 | 409532 | 46934417 | 45748304 | 0 | 27018540 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14467 | 8235803 | 0 | 8388608 | 1167116752 | 4249854265 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 35332125 | 12075112070119676 | 12075122883646156 | 12075122883890474 | 12075112070807043 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 10 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14528 | 0x7fa3f9496600 | 0x7fa2ed835200 | 1803328 | 1640662 | 24306130 | 65536 | 127479722 | 225415 | 225415 | 25025567 | 23747921 | 29099 | 6803011 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7610 | 2015864 | 0 | 2097152 | 1162972704 | 2051130222 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16924727 | 12075112071073609 | 12075122884028393 | 12075122884153671 | 12075112071565713 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 12 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14976 | 0x7fa3f9496500 | 0x7fa2ed835240 | 1795240 | 1623816 | 24132631 | 65536 | 121839867 | 224404 | 224404 | 24772727 | 23579653 | 30336 | 8124052 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7617 | 2017494 | 0 | 2097152 | 1105806377 | 1874276104 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15117074 | 12075112071893623 | 12075122884197831 | 12075122884322950 | 12075112072441671 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 14 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 15424 | 0x7fa3f9496400 | 0x7fa2ed835280 | 1680320 | 1520558 | 22608951 | 65536 | 153330185 | 210039 | 210039 | 23224187 | 22138727 | 31424 | 14139634 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7634 | 2047210 | 0 | 2097152 | 1297240033 | 2436891781 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 17330923 | 12075112072801740 | 12075122884391589 | 12075122884515908 | 12075112073323860 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 16 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16128 | 0x7fa3f9496300 | 0x7fa2ed8352c0 | 3180720 | 3011950 | 44903568 | 65536 | 347628733 | 397589 | 397589 | 45594977 | 44297065 | 0 | 26472482 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 13970 | 8239598 | 0 | 8388608 | 1164880024 | 4332181837 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 36255632 | 12075112073652141 | 12075122884556868 | 12075122884800706 | 12075112074401283 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 18 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16832 | 0x7fa3f9496a00 | 0x7fa2ed835300 | 3127528 | 2964380 | 44210207 | 65536 | 343456321 | 390940 | 390940 | 44884277 | 43802374 | 0 | 29790126 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 13244 | 8248179 | 0 | 8388608 | 1133401128 | 4082785069 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 36153325 | 12075112074666345 | 12075122884929185 | 12075122885173183 | 12075112075328927 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 20 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 17536 | 0x7fa3f9496900 | 0x7fa2ed835340 | 1674880 | 1508428 | 22371912 | 65536 | 160065277 | 209359 | 209359 | 23042267 | 21859763 | 32075 | 14860846 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7635 | 2048646 | 0 | 2097152 | 1144857068 | 2044852390 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 18543442 | 12075112075593749 | 12075122885306142 | 12075122885430621 | 12075112076090722 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 22 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18240 | 0x7fa3f9496800 | 0x7fa2ed835380 | 1743600 | 1573054 | 23300045 | 65536 | 139922725 | 217949 | 217949 | 24047933 | 22721222 | 32298 | 10187999 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7629 | 2047813 | 0 | 2097152 | 1198723830 | 2197755049 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15794814 | 12075112076418932 | 12075122885471420 | 12075122885595899 | 12075112076954337 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 24 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18944 | 0x7fa3f9496700 | 0x7fa2ed8353c0 | 1702296 | 1530592 | 22706846 | 65536 | 135855423 | 212786 | 212786 | 23374427 | 22192986 | 32425 | 13333310 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7630 | 2053155 | 0 | 2097152 | 1194073371 | 2242492858 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 17635502 | 12075112077302835 | 12075122885647099 | 12075122885771578 | 12075112077832449 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 26 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 19904 | 0x7fa3f9496600 | 0x7fa2ed835400 | 3246312 | 3086049 | 46028980 | 65536 | 322757017 | 405788 | 405788 | 46706147 | 45486152 | 0 | 24800396 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14510 | 8235620 | 0 | 8388608 | 1086083469 | 4066622667 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 34847881 | 12075112078162322 | 12075122885811098 | 12075122886059576 | 12075112078885366 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 28 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 20608 | 0x7fa3f9496500 | 0x7fa2ed835440 | 3269744 | 3074113 | 45833301 | 65536 | 311797193 | 408717 | 408717 | 46527197 | 45433894 | 0 | 23997203 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 13519 | 8246280 | 0 | 8388608 | 1109243592 | 4073533381 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 35880406 | 12075112079154356 | 12075122886191894 | 12075122886439732 | 12075112079840170 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 30 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 21312 | 0x7fa3f9496400 | 0x7fa2ed835480 | 1685464 | 1522773 | 22616440 | 65536 | 137585311 | 210682 | 210682 | 23257907 | 22110261 | 31900 | 14342487 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7644 | 2047315 | 0 | 2097152 | 1193691034 | 2185070869 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16449649 | 12075112080103349 | 12075122886539252 | 12075122886664210 | 12075112080590795 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 32 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22016 | 0x7fa3f9496300 | 0x7fa2ed8354c0 | 1811560 | 1645203 | 24346599 | 65536 | 130688598 | 226444 | 226444 | 25093607 | 23749594 | 30798 | 6622079 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7623 | 2053388 | 0 | 2097152 | 1212072977 | 2310896087 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15629359 | 12075112080915278 | 12075122886704370 | 12075122886829489 | 12075112081462926 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 34 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22720 | 0x7fa3f9496a00 | 0x7fa2ed835500 | 1774888 | 1609926 | 23796618 | 65536 | 139566155 | 221860 | 221860 | 24564707 | 23270078 | 30836 | 8149026 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7647 | 2053071 | 0 | 2097152 | 1255329195 | 2360176510 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16671941 | 12075112081796305 | 12075122886880209 | 12075122887004688 | 12075112082353130 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 36 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 23680 | 0x7fa3f9496900 | 0x7fa2ed835540 | 3273704 | 3105425 | 46278220 | 65536 | 307731148 | 409212 | 409212 | 46997237 | 45711915 | 0 | 20885678 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 13665 | 8242869 | 0 | 8388608 | 1188974204 | 4726157733 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 35826180 | 12075112082672173 | 12075122887046287 | 12075122887293645 | 12075112083403652 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 38 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 24384 | 0x7fa3f9496800 | 0x7fa2ed835580 | 3210368 | 3043977 | 45452004 | 65536 | 274587257 | 401295 | 401295 | 46114571 | 45033360 | 0 | 24386670 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14594 | 8233411 | 0 | 8388608 | 1112961245 | 4253084126 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 36243063 | 12075112083666391 | 12075122887393004 | 12075122887638762 | 12075112084357355 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 40 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25088 | 0x7fa3f9496700 | 0x7fa2ed8355c0 | 1701528 | 1531584 | 22678119 | 65536 | 138223422 | 212690 | 212690 | 23389607 | 22172511 | 31497 | 13415300 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7641 | 2050996 | 0 | 2097152 | 1218537656 | 2211625119 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 17669160 | 12075112084624221 | 12075122887735881 | 12075122887860840 | 12075112085111436 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 42 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25792 | 0x7fa3f9496600 | 0x7fa2ed835600 | 1772416 | 1612033 | 23835457 | 65536 | 134376343 | 221551 | 221551 | 24596237 | 23219729 | 31412 | 9403585 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7619 | 2045468 | 0 | 2097152 | 1293635662 | 2443211687 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15025901 | 12075112085441059 | 12075122887901000 | 12075122888025959 | 12075112085983196 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 44 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 26496 | 0x7fa3f9496500 | 0x7fa2ed835640 | 1692968 | 1523751 | 22599557 | 65536 | 145081924 | 211620 | 211620 | 23271917 | 22080083 | 32866 | 13515915 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7627 | 2054133 | 0 | 2097152 | 1210070050 | 2233094525 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16115971 | 12075112086321595 | 12075122888077799 | 12075122888201798 | 12075112086829469 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 46 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 27712 | 0x7fa3f9496400 | 0x7fa2ed835680 | 3647840 | 3055634 | 45592645 | 65536 | 338973972 | 455979 | 455979 | 46546307 | 45037321 | 0 | 22367802 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14685 | 8233527 | 0 | 8388608 | 1171495682 | 4195228536 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 35237866 | 12075112087152249 | 12075122888244197 | 12075122888490755 | 12075112087929904 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 48 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 28416 | 0x7fa3f9496300 | 0x7fa2ed8356c0 | 3288872 | 3114043 | 46471094 | 65536 | 345286587 | 411108 | 411108 | 47126327 | 46065001 | 0 | 26899981 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14591 | 8234566 | 0 | 8388608 | 1179749829 | 4870675947 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 35889729 | 12075112088196028 | 12075122888590274 | 12075122888836032 | 12075112088880621 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 50 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 29120 | 0x7fa3f9496a00 | 0x7fa2ed835700 | 1692752 | 1525457 | 22627329 | 65536 | 162121820 | 211593 | 211593 | 23297747 | 22086723 | 32882 | 13024615 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7632 | 2049121 | 0 | 2097152 | 1092346298 | 1919510895 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15955125 | 12075112089146374 | 12075122888960991 | 12075122889085790 | 12075112089626917 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 52 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30080 | 0x7fa3f9496900 | 0x7fa2ed835740 | 1758888 | 1589270 | 23550033 | 65536 | 141457518 | 219860 | 219860 | 24254807 | 22863661 | 31389 | 10197738 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7613 | 2053200 | 0 | 2097152 | 1168464219 | 2188935612 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 14610180 | 12075112089954917 | 12075122889126430 | 12075122889251229 | 12075112090505350 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 54 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30784 | 0x7fa3f9496800 | 0x7fa2ed835780 | 1763112 | 1566599 | 23244442 | 65536 | 152990180 | 220388 | 220388 | 23914637 | 22749682 | 32419 | 11034271 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7646 | 2048615 | 0 | 2097152 | 1243108054 | 2272133205 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15401420 | 12075112090842857 | 12075122889301468 | 12075122889426267 | 12075112091364697 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 56 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32000 | 0x7fa3f9496700 | 0x7fa2ed8357c0 | 3206576 | 3037357 | 45275887 | 65536 | 310010671 | 400821 | 400821 | 45976097 | 44694461 | 0 | 22943024 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 13871 | 8241694 | 0 | 8388608 | 1160859476 | 4146075527 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 36000948 | 12075112091685373 | 12075122889465787 | 12075122889721625 | 12075112092423595 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 58 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32960 | 0x7fa3f9496600 | 0x7fa2ed835800 | 3661184 | 3054110 | 45550868 | 65536 | 327713596 | 457647 | 457647 | 46476782 | 45119870 | 0 | 25900814 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 13895 | 8241350 | 0 | 8388608 | 1155667287 | 3919698853 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 33881008 | 12075112092659233 | 12075122889806104 | 12075122890049462 | 12075112093427480 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 60 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 33920 | 0x7fa3f9496500 | 0x7fa2ed835840 | 1686504 | 1512204 | 22465316 | 65536 | 154643807 | 210812 | 210812 | 23098847 | 21944089 | 32626 | 14584273 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7624 | 2053573 | 0 | 2097152 | 1298781323 | 2461502687 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15932024 | 12075112093652749 | 12075122890161301 | 12075122890286100 | 12075112094182043 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 62 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 34880 | 0x7fa3f9496400 | 0x7fa2ed835880 | 1798120 | 1631619 | 24144894 | 65536 | 136844558 | 224764 | 224764 | 24889997 | 23354470 | 31235 | 7464046 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7637 | 2052706 | 0 | 2097152 | 1157479879 | 2086565643 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15196901 | 12075112094500675 | 12075122890325460 | 12075122890451379 | 12075112095093226 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 64 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 35840 | 0x7fa3f9496300 | 0x7fa2ed8358c0 | 1705904 | 1541998 | 22862505 | 65536 | 138953018 | 213237 | 213237 | 23545577 | 22336297 | 31781 | 12640329 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7627 | 2052440 | 0 | 2097152 | 1219353052 | 2257775611 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16462982 | 12075112095427737 | 12075122890501458 | 12075122890626097 | 12075112095978331 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 66 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 37312 | 0x7fa3f9496a00 | 0x7fa2ed835900 | 3191856 | 3034981 | 45263974 | 65536 | 351370853 | 398981 | 398981 | 45940337 | 44661747 | 0 | 22878290 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14547 | 8232786 | 0 | 8388608 | 1090758362 | 4001391533 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 33520660 | 12075112096303094 | 12075122890665777 | 12075122890955055 | 12075112097052427 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 68 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 38272 | 0x7fa3f9496900 | 0x7fa2ed835940 | 3166040 | 3007850 | 44894872 | 65536 | 354089674 | 395754 | 395754 | 45533357 | 44420705 | 0 | 28567500 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 13238 | 8248448 | 0 | 8388608 | 1122343478 | 4166980561 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 35708865 | 12075112097313582 | 12075122891057454 | 12075122891302092 | 12075112097989488 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 70 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 39232 | 0x7fa3f9496800 | 0x7fa2ed835980 | 1780120 | 1613522 | 23845672 | 65536 | 141517427 | 222514 | 222514 | 24652877 | 23286368 | 30479 | 8848990 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7625 | 2045704 | 0 | 2097152 | 1244187542 | 2362876984 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 17204107 | 12075112098256434 | 12075122891390731 | 12075122891515530 | 12075112098751324 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 72 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 40192 | 0x7fa3f9496700 | 0x7fa2ed8359c0 | 1787584 | 1621787 | 23984392 | 65536 | 141066147 | 223447 | 223447 | 24743387 | 23100198 | 30094 | 7999926 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7640 | 2055124 | 0 | 2097152 | 1038642500 | 1749389014 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 17973059 | 12075112099081407 | 12075122891557129 | 12075122891682248 | 12075112099624627 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 74 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 41152 | 0x7fa3f9496600 | 0x7fa2ed835a00 | 1679272 | 1517600 | 22473605 | 65536 | 166122882 | 209908 | 209908 | 23179577 | 21964998 | 33598 | 14221458 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7635 | 2049593 | 0 | 2097152 | 1291332600 | 2386052355 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16706978 | 12075112099932429 | 12075122891733128 | 12075122891857767 | 12075112100472693 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 76 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 42624 | 0x7fa3f9496500 | 0x7fa2ed835a40 | 3201776 | 3044062 | 45391751 | 65536 | 353476012 | 400221 | 400221 | 46115741 | 44822215 | 0 | 24531073 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 13934 | 8241618 | 0 | 8388608 | 1137793735 | 3958809109 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 35740143 | 12075112100795162 | 12075122891898727 | 12075122892146885 | 12075112101530419 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 78 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 43584 | 0x7fa3f9496400 | 0x7fa2ed835a80 | 3218480 | 3060210 | 45700474 | 65536 | 349441426 | 402309 | 402309 | 46353317 | 45264363 | 0 | 25401115 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 13830 | 8241999 | 0 | 8388608 | 1113267463 | 4086883702 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 34217217 | 12075112101790111 | 12075122892234564 | 12075122892479042 | 12075112102471057 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 80 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 44544 | 0x7fa3f9496300 | 0x7fa2ed835ac0 | 1741784 | 1581223 | 23403003 | 65536 | 142753826 | 217722 | 217722 | 24133997 | 22792185 | 32262 | 10093189 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7645 | 2048862 | 0 | 2097152 | 1289333480 | 2517268769 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 14345754 | 12075112102731681 | 12075122892536641 | 12075122892660800 | 12075112103221652 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 82 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 45760 | 0x7fa3f9496a00 | 0x7fa2ed835b00 | 1843184 | 1647617 | 24265692 | 65536 | 149524211 | 230397 | 230397 | 25169081 | 23419709 | 31578 | 7595780 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7644 | 2051030 | 0 | 2097152 | 1067851835 | 1889586052 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 17208976 | 12075112103543269 | 12075122892702080 | 12075122892828159 | 12075112104088312 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 84 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 46720 | 0x7fa3f9496900 | 0x7fa2ed835b40 | 1737776 | 1572280 | 23267057 | 65536 | 145317802 | 217221 | 217221 | 23999927 | 22686829 | 32130 | 10144908 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7600 | 2049455 | 0 | 2097152 | 1065092995 | 1810044840 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16695973 | 12075112104406834 | 12075122892878558 | 12075122893003677 | 12075112104936468 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 86 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 48448 | 0x7fa3f9496800 | 0x7fa2ed835b80 | 3368384 | 3199297 | 47650999 | 65536 | 326286516 | 421047 | 421047 | 48407177 | 47075022 | 0 | 26922600 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 12942 | 8250514 | 0 | 8388608 | 1045745170 | 4059206138 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 35901446 | 12075112105243489 | 12075122893044637 | 12075122893302555 | 12075112105986059 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 88 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 49408 | 0x7fa3f9496700 | 0x7fa2ed835bc0 | 3264512 | 3104319 | 46275371 | 65536 | 297603095 | 408063 | 408063 | 46996577 | 45845599 | 0 | 21943005 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14665 | 8233757 | 0 | 8388608 | 1132661716 | 3988093798 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 33536303 | 12075112106230774 | 12075122893363514 | 12075122893607992 | 12075112106886041 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 90 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 50368 | 0x7fa3f9496600 | 0x7fa2ed835c00 | 1738480 | 1576585 | 23311292 | 65536 | 144949437 | 217309 | 217309 | 24065447 | 22769983 | 32220 | 10289361 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7625 | 2050703 | 0 | 2097152 | 1068157889 | 1850297287 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16500620 | 12075112107161433 | 12075122893662072 | 12075122893786551 | 12075112107645773 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 92 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 51584 | 0x7fa3f9496500 | 0x7fa2ed835c40 | 1849688 | 1658219 | 24455019 | 65536 | 136325474 | 231210 | 231210 | 25289147 | 23116074 | 30768 | 5350051 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7732 | 2048155 | 0 | 2097152 | 1152631608 | 2122817444 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16592008 | 12075112107954387 | 12075122893828470 | 12075122893954549 | 12075112108506523 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 94 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 52544 | 0x7fa3f9496400 | 0x7fa2ed835c80 | 1704704 | 1544590 | 22837464 | 65536 | 150843200 | 213087 | 213087 | 23584637 | 22244272 | 32366 | 12030871 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7638 | 2050755 | 0 | 2097152 | 1166905916 | 2141608051 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16208296 | 12075112108825355 | 12075122894004469 | 12075122894129588 | 12075112109357134 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 96 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 54272 | 0x7fa3f9496300 | 0x7fa2ed835cc0 | 3368432 | 3193423 | 47537514 | 65536 | 348354822 | 421053 | 421053 | 48356537 | 46953567 | 0 | 25753238 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 12626 | 8254466 | 0 | 8388608 | 1139315922 | 4289918571 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 36435800 | 12075112109660587 | 12075122894170388 | 12075122894426065 | 12075112110387518 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 98 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 55488 | 0x7fa3f9496a00 | 0x7fa2ed835d00 | 3229464 | 3071097 | 45713926 | 65536 | 300829122 | 403682 | 403682 | 46482197 | 45272166 | 0 | 26553134 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 13996 | 8240035 | 0 | 8388608 | 1142402393 | 4121303000 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 35169839 | 12075112110649726 | 12075122894481425 | 12075122894724303 | 12075112111314220 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 100 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 56704 | 0x7fa3f9496900 | 0x7fa2ed835d40 | 1727040 | 1564930 | 23155197 | 65536 | 146015427 | 215879 | 215879 | 23889677 | 22562499 | 32254 | 10746143 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7643 | 2047567 | 0 | 2097152 | 1141326150 | 1948538648 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15063441 | 12075112111552523 | 12075122894779182 | 12075122894904141 | 12075112112065406 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 102 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 58176 | 0x7fa3f9496800 | 0x7fa2ed835d80 | 1792128 | 1615232 | 23754366 | 65536 | 154831644 | 224015 | 224015 | 24644237 | 22394194 | 31150 | 6185417 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7843 | 2045060 | 0 | 2097152 | 1081540907 | 1960662307 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15613384 | 12075112112352229 | 12075122894945421 | 12075122895073740 | 12075112112891121 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 104 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 59392 | 0x7fa3f9496700 | 0x7fa2ed835dc0 | 1734896 | 1571655 | 23262768 | 65536 | 148118523 | 216861 | 216861 | 23991587 | 22695696 | 32223 | 10341142 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7635 | 2048202 | 0 | 2097152 | 1189926587 | 2123425145 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 17453014 | 12075112113193353 | 12075122895124460 | 12075122895249579 | 12075112113717336 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 106 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 61376 | 0x7fa3f9496600 | 0x7fa2ed835e00 | 3180648 | 3019824 | 44908531 | 65536 | 349937491 | 397580 | 397580 | 45713057 | 44242791 | 0 | 30610349 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14379 | 8235970 | 0 | 8388608 | 1160169758 | 4053776125 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 34962594 | 12075112113999421 | 12075122895290698 | 12075122895547656 | 12075112114752059 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 108 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 62592 | 0x7fa3f9496500 | 0x7fa2ed835e40 | 3694056 | 3056664 | 45473246 | 65536 | 275920618 | 461756 | 461756 | 46524107 | 45012857 | 0 | 24042157 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14299 | 8236545 | 0 | 8388608 | 1091209695 | 3842395825 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 34185717 | 12075112114990041 | 12075122895598536 | 12075122895843014 | 12075112115729446 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 110 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 63808 | 0x7fa3f9496400 | 0x7fa2ed835e80 | 1750784 | 1575939 | 23305171 | 65536 | 148353625 | 218847 | 218847 | 24054857 | 22684650 | 32736 | 10563801 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7642 | 2049035 | 0 | 2097152 | 1159183877 | 2117717445 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16786792 | 12075112115960405 | 12075122895899333 | 12075122896024612 | 12075112116487474 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 112 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 65280 | 0x7fa3f9496300 | 0x7fa2ed835ec0 | 1801472 | 1631223 | 23868183 | 65536 | 163901753 | 225183 | 225183 | 24883967 | 21807245 | 31518 | 5086387 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7889 | 2034643 | 0 | 2097152 | 819695078 | 1384563644 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 14372549 | 12075112116767785 | 12075122896065252 | 12075122896193891 | 12075112117316635 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 114 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 66496 | 0x7fa3f9496a00 | 0x7fa2ed835f00 | 1731096 | 1575785 | 23280032 | 65536 | 143551285 | 216386 | 216386 | 24052517 | 22613586 | 32136 | 9968773 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7661 | 2043352 | 0 | 2097152 | 1091871377 | 1899765435 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15219356 | 12075112117606474 | 12075122896244770 | 12075122896369249 | 12075112118133072 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 116 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 68480 | 0x7fa3f9496900 | 0x7fa2ed835f40 | 3365080 | 3170230 | 47161764 | 65536 | 297944052 | 420634 | 420634 | 48003557 | 46492043 | 0 | 20452238 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14183 | 8237529 | 0 | 8388608 | 1195854177 | 4587024238 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 36356756 | 12075112118408063 | 12075122896409409 | 12075122896661887 | 12075112119108225 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 118 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 69696 | 0x7fa3f9496800 | 0x7fa2ed835f80 | 3198488 | 3041392 | 45397010 | 65536 | 289566223 | 399810 | 399810 | 46036577 | 44945546 | 0 | 23043254 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 13543 | 8244404 | 0 | 8388608 | 1174341361 | 4256954871 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 34424393 | 12075112119366174 | 12075122896716286 | 12075122896960284 | 12075112120049614 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 120 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 70912 | 0x7fa3f9496700 | 0x7fa2ed835fc0 | 1714200 | 1553696 | 22980975 | 65536 | 151096323 | 214274 | 214274 | 23732237 | 22426628 | 32867 | 11558372 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7608 | 2047692 | 0 | 2097152 | 1215296397 | 2222405370 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15446406 | 12075112120312703 | 12075122897018364 | 12075122897143163 | 12075112120848469 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 122 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 72384 | 0x7fa3f9496600 | 0x7fa2ed836000 | 1779632 | 1619944 | 23999193 | 65536 | 188498678 | 222453 | 222453 | 24714887 | 20333831 | 31191 | 4846844 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7818 | 2029421 | 0 | 2097152 | 865590226 | 1907485195 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15176779 | 12075112121130162 | 12075122897184602 | 12075122897321721 | 12075112121683130 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 124 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 73600 | 0x7fa3f9496500 | 0x7fa2ed836040 | 1750888 | 1577496 | 23365799 | 65536 | 148695024 | 218860 | 218860 | 24078257 | 22776875 | 32196 | 10475436 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7641 | 2054930 | 0 | 2097152 | 1082313610 | 1908733462 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15611000 | 12075112121971365 | 12075122897371961 | 12075122897497240 | 12075112122513954 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 126 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 75840 | 0x7fa3f9496400 | 0x7fa2ed836080 | 3373760 | 3205079 | 47645040 | 65536 | 324641723 | 421719 | 421719 | 48492017 | 47014201 | 0 | 26072854 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14200 | 8237832 | 0 | 8388608 | 1102664268 | 4024641047 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 35631886 | 12075112122794896 | 12075122897539159 | 12075122897797077 | 12075112123507390 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 128 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 77056 | 0x7fa3f9496300 | 0x7fa2ed8360c0 | 3244264 | 3085434 | 46044511 | 65536 | 324138879 | 405532 | 405532 | 46697237 | 45575014 | 0 | 24080816 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14434 | 8236674 | 0 | 8388608 | 1116859707 | 4170244295 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 33331034 | 12075112123765650 | 12075122897852117 | 12075122898097875 | 12075112124466041 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 130 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 78272 | 0x7fa3f9496a00 | 0x7fa2ed836100 | 1729448 | 1562207 | 23151449 | 65536 | 156694503 | 216180 | 216180 | 23848877 | 22550286 | 31637 | 11438302 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7652 | 2054746 | 0 | 2097152 | 1208532807 | 2250463333 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15919341 | 12075112124721386 | 12075122898156914 | 12075122898281713 | 12075112125269494 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 132 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 80000 | 0x7fa3f9496900 | 0x7fa2ed836140 | 1874024 | 1706268 | 25242102 | 65536 | 192159615 | 234252 | 234252 | 26009597 | 19503466 | 31021 | 5183190 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7793 | 2026652 | 0 | 2097152 | 807451458 | 1859228619 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16404243 | 12075112125548844 | 12075122898324113 | 12075122898466672 | 12075112126103514 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 134 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 81216 | 0x7fa3f9496800 | 0x7fa2ed836180 | 1773680 | 1569608 | 23262490 | 65536 | 141820904 | 221709 | 221709 | 23959967 | 22637440 | 30838 | 10325247 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7639 | 2045285 | 0 | 2097152 | 1077748649 | 1853272827 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 17274416 | 12075112126391980 | 12075122898517231 | 12075122898642190 | 12075112126924660 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 136 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 83456 | 0x7fa3f9496700 | 0x7fa2ed8361c0 | 3356928 | 3168472 | 47076370 | 65536 | 340773200 | 419615 | 419615 | 47942837 | 46351595 | 0 | 26092121 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 13767 | 8241081 | 0 | 8388608 | 997346509 | 3540769600 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 34249907 | 12075112127211864 | 12075122898682350 | 12075122898938988 | 12075112127906345 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 138 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 84928 | 0x7fa3f9496600 | 0x7fa2ed836200 | 3226736 | 3052634 | 45453680 | 65536 | 288232246 | 403341 | 403341 | 46205327 | 44937300 | 0 | 24059265 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14805 | 8231086 | 0 | 8388608 | 1185700083 | 4247838887 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 35383344 | 12075112128166678 | 12075122898993227 | 12075122899241065 | 12075112128853916 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 140 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 86400 | 0x7fa3f9496500 | 0x7fa2ed836240 | 1745456 | 1574933 | 23291506 | 65536 | 155766130 | 218181 | 218181 | 24039677 | 22677854 | 32558 | 10305642 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7643 | 2047801 | 0 | 2097152 | 1086328906 | 1876273046 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16566421 | 12075112129121663 | 12075122899298665 | 12075122899424264 | 12075112129655906 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 142 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 88128 | 0x7fa3f9496400 | 0x7fa2ed836280 | 1919320 | 1764342 | 26167155 | 65536 | 209431904 | 239914 | 239914 | 26880827 | 18630888 | 29649 | 3589763 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7810 | 2023840 | 0 | 2097152 | 657314751 | 1491283988 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 14995709 | 12075112129938130 | 12075122899464423 | 12075122899613862 | 12075112130494735 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 144 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 89600 | 0x7fa3f9496300 | 0x7fa2ed8362c0 | 1748784 | 1571295 | 23197128 | 65536 | 152040574 | 218597 | 218597 | 23985107 | 22524569 | 32235 | 10012396 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7652 | 2044981 | 0 | 2097152 | 1129648123 | 2078105652 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15479035 | 12075112130800954 | 12075122899682181 | 12075122899807940 | 12075112131317935 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 146 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 92096 | 0x7fa3f9496a00 | 0x7fa2ed836300 | 3339584 | 3158863 | 46895400 | 65536 | 325660057 | 417447 | 417447 | 47798567 | 46187813 | 0 | 24018469 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 13026 | 8250016 | 0 | 8388608 | 1033769962 | 3927185730 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 35401388 | 12075112131597594 | 12075122899847140 | 12075122900100418 | 12075112132301523 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 148 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 93568 | 0x7fa3f9496900 | 0x7fa2ed836340 | 3189416 | 3033499 | 45274585 | 65536 | 346512832 | 398676 | 398676 | 45924257 | 44775776 | 0 | 25594018 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 13298 | 8248045 | 0 | 8388608 | 1175606906 | 4408916785 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 34144021 | 12075112132560864 | 12075122900160257 | 12075122900408255 | 12075112133252299 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 150 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 95040 | 0x7fa3f9496800 | 0x7fa2ed836380 | 1766448 | 1606021 | 23698090 | 65536 | 148758832 | 220805 | 220805 | 24506057 | 22996224 | 31421 | 8248158 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7648 | 2046770 | 0 | 2097152 | 1181734319 | 2119860418 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 14820489 | 12075112133509247 | 12075122900461215 | 12075122900587134 | 12075112134047627 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 152 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 96768 | 0x7fa3f9496700 | 0x7fa2ed8363c0 | 2018880 | 1858874 | 27607774 | 65536 | 223393924 | 252359 | 252359 | 28298807 | 17189951 | 29487 | 2762113 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7735 | 2023340 | 0 | 2097152 | 517240696 | 1227861098 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 13373000 | 12075112134321416 | 12075122900628094 | 12075122900785532 | 12075112134885354 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 154 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 98240 | 0x7fa3f9496600 | 0x7fa2ed836400 | 1796392 | 1629905 | 24131044 | 65536 | 139718110 | 224548 | 224548 | 24865277 | 23513680 | 31214 | 6408388 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7639 | 2043963 | 0 | 2097152 | 969140065 | 1583648887 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15941862 | 12075112135187215 | 12075122900835132 | 12075122900960731 | 12075112135718813 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 156 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 100736 | 0x7fa3f9496500 | 0x7fa2ed836440 | 3378624 | 3174717 | 47094600 | 65536 | 324530477 | 422327 | 422327 | 48036587 | 46251039 | 0 | 19751031 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 13529 | 8244271 | 0 | 8388608 | 918372761 | 3507737250 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 32039075 | 12075112136006478 | 12075122901000730 | 12075122901263928 | 12075112136703323 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 158 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 102208 | 0x7fa3f9496400 | 0x7fa2ed836480 | 3173160 | 3006335 | 44705042 | 65536 | 302352440 | 396644 | 396644 | 45510737 | 44199473 | 0 | 29322265 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 13925 | 8240357 | 0 | 8388608 | 1117962424 | 4007495587 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 32836856 | 12075112136964628 | 12075122901322328 | 12075122901568246 | 12075112137656795 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 160 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 103680 | 0x7fa3f9496300 | 0x7fa2ed8364c0 | 1834560 | 1655840 | 24530132 | 65536 | 151927914 | 229319 | 229319 | 25253387 | 23845394 | 31057 | 5501431 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7652 | 2046881 | 0 | 2097152 | 1021182394 | 1745710427 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 14653964 | 12075112137915806 | 12075122901620725 | 12075122901746004 | 12075112138475516 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 162 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 105664 | 0x7fa3f9496a00 | 0x7fa2ed836500 | 2105392 | 1946940 | 28950212 | 65536 | 234424425 | 263173 | 263173 | 29619947 | 16762086 | 29298 | 2727928 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7717 | 2021123 | 0 | 2097152 | 433630060 | 1078085581 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 13504522 | 12075112138755997 | 12075122901786164 | 12075122901951922 | 12075112139326568 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 164 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 107136 | 0x7fa3f9496900 | 0x7fa2ed836540 | 1843240 | 1666459 | 24626522 | 65536 | 149804837 | 230404 | 230404 | 25412597 | 23889393 | 31120 | 5037866 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7619 | 2047430 | 0 | 2097152 | 1150306324 | 2063548048 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16599486 | 12075112139611297 | 12075122902002002 | 12075122902127761 | 12075112140144177 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 166 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 109888 | 0x7fa3f9496800 | 0x7fa2ed836580 | 3259880 | 3089376 | 45827346 | 65536 | 325413817 | 407484 | 407484 | 46756367 | 44701402 | 0 | 16957836 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14187 | 8235378 | 0 | 8388608 | 1057703122 | 4183180115 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 34269478 | 12075112140418948 | 12075122902168881 | 12075122902431918 | 12075112141105774 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 168 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 111360 | 0x7fa3f9496700 | 0x7fa2ed8365c0 | 3241704 | 3065350 | 45626465 | 65536 | 272216303 | 405212 | 405212 | 46396037 | 45103270 | 0 | 25479978 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14044 | 8239597 | 0 | 8388608 | 1131517529 | 4063283621 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 35475591 | 12075112141362130 | 12075122902487918 | 12075122902732076 | 12075112142046733 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 170 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 112832 | 0x7fa3f9496600 | 0x7fa2ed836600 | 1777752 | 1607394 | 23749735 | 65536 | 154230421 | 222218 | 222218 | 24526667 | 23041826 | 31688 | 8597090 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7617 | 2048110 | 0 | 2097152 | 1089109736 | 1912630794 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15083959 | 12075112142303410 | 12075122902786955 | 12075122902913834 | 12075112142853011 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 172 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 114816 | 0x7fa3f9496500 | 0x7fa2ed836640 | 2195048 | 2038135 | 30288907 | 65536 | 246711301 | 274380 | 274380 | 30987797 | 15294322 | 31120 | 1915406 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7694 | 2020109 | 0 | 2097152 | 418136500 | 1042513291 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 13033454 | 12075112143140125 | 12075122902953514 | 12075122903126313 | 12075112143711567 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 174 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 116288 | 0x7fa3f9496400 | 0x7fa2ed836680 | 1726632 | 1558640 | 23014181 | 65536 | 150374213 | 215828 | 215828 | 23798357 | 22306044 | 32124 | 11458810 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7615 | 2046569 | 0 | 2097152 | 1149433035 | 2025013186 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 14200869 | 12075112144009580 | 12075122903178632 | 12075122903304391 | 12075112144532512 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 176 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 119040 | 0x7fa3f9496300 | 0x7fa2ed8366c0 | 3225600 | 3037438 | 45083222 | 65536 | 341439066 | 403199 | 403199 | 45977207 | 43882763 | 0 | 12082298 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14433 | 8231251 | 0 | 8388608 | 812093919 | 3529732083 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 35230535 | 12075112144811100 | 12075122903344871 | 12075122903596069 | 12075112145508396 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 178 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 120768 | 0x7fa3f9496a00 | 0x7fa2ed836700 | 3242776 | 3064401 | 45535241 | 65536 | 283610930 | 405346 | 405346 | 46381757 | 44993323 | 0 | 27547026 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 13907 | 8241330 | 0 | 8388608 | 1178788387 | 4400798988 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 33481000 | 12075112145765273 | 12075122903645988 | 12075122903893346 | 12075112146460575 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 180 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 122496 | 0x7fa3f9496900 | 0x7fa2ed836740 | 1721128 | 1559457 | 23020161 | 65536 | 150015063 | 215140 | 215140 | 23807687 | 22199339 | 31938 | 11466378 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7617 | 2047466 | 0 | 2097152 | 1025708502 | 1735094699 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 14921311 | 12075112146719717 | 12075122903947746 | 12075122904073345 | 12075112147258728 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 182 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 124736 | 0x7fa3f9496800 | 0x7fa2ed836780 | 2302120 | 2149886 | 31966872 | 65536 | 260698337 | 287764 | 287764 | 32664077 | 16160719 | 29515 | 2373036 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7685 | 2019491 | 0 | 2097152 | 376659316 | 959044309 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 12974490 | 12075112147536825 | 12075122904113984 | 12075122904296063 | 12075112148153702 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 184 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 126464 | 0x7fa3f9496700 | 0x7fa2ed8367c0 | 1772392 | 1590217 | 23497323 | 65536 | 151828019 | 221548 | 221548 | 24268997 | 22747589 | 31859 | 9385652 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7631 | 2044775 | 0 | 2097152 | 982792674 | 1627075513 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 14960590 | 12075112148420357 | 12075122904347262 | 12075122904473181 | 12075112148950432 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 186 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 20 | 24 | 127936 | 0x7fa3f9496600 | 0x7fa2ed836800 | 3660880 | 3101505 | 46109756 | 65536 | 375665578 | 457609 | 457609 | 47185307 | 45635856 | 0 | 1336726 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 15321 | 8220090 | 0 | 8388608 | 453115565 | 2428214639 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 37021856 | 12075112149233288 | 12075122904512381 | 12075122904774459 | 12075112149958616 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 188 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 129664 | 0x7fa3f9496500 | 0x7fa2ed836840 | 3184560 | 2999266 | 44619032 | 65536 | 274689494 | 398069 | 398069 | 45404807 | 44080529 | 0 | 28070152 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14386 | 8235646 | 0 | 8388608 | 1181033515 | 4322833089 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 35038488 | 12075112150222657 | 12075122904834458 | 12075122905083736 | 12075112150904814 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 190 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 131392 | 0x7fa3f9496400 | 0x7fa2ed836880 | 1776000 | 1612174 | 23805606 | 65536 | 150417433 | 221999 | 221999 | 24598397 | 23093177 | 31385 | 7843109 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7597 | 2050201 | 0 | 2097152 | 928261730 | 1502495313 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15443311 | 12075112151162924 | 12075122905149496 | 12075122905275415 | 12075112151711003 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 192 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 133632 | 0x7fa3f9496300 | 0x7fa2ed8368c0 | 2422848 | 2256930 | 33529465 | 65536 | 272127647 | 302855 | 302855 | 34269767 | 16436813 | 28922 | 2434286 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7685 | 2020204 | 0 | 2097152 | 371652713 | 947977632 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 12053937 | 12075112151996273 | 12075122905316694 | 12075122905508053 | 12075112152615844 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 194 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 135360 | 0x7fa3f9496a00 | 0x7fa2ed836900 | 1804144 | 1627990 | 24058575 | 65536 | 147785424 | 225517 | 225517 | 24869897 | 22981706 | 31197 | 7184362 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7611 | 2050613 | 0 | 2097152 | 1067971590 | 1844338600 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 14536946 | 12075112152883181 | 12075122905557492 | 12075122905682451 | 12075112153428645 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 196 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 20 | 24 | 136832 | 0x7fa3f9496900 | 0x7fa2ed836940 | 3487960 | 3320198 | 49431321 | 65536 | 404106215 | 435994 | 435994 | 50218607 | 48871272 | 0 | 878512 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 15358 | 8219961 | 0 | 8388608 | 278103841 | 1732199709 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 40042184 | 12075112153707282 | 12075122905722291 | 12075122906004528 | 12075112154419766 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 198 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 138560 | 0x7fa3f9496800 | 0x7fa2ed836980 | 3229808 | 3059437 | 45561308 | 65536 | 336644645 | 403725 | 403725 | 46307327 | 44979531 | 0 | 27563657 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14650 | 8234107 | 0 | 8388608 | 1078522759 | 3733734594 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 34740334 | 12075112154676534 | 12075122906063888 | 12075122906310126 | 12075112155373138 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 200 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 140288 | 0x7fa3f9496700 | 0x7fa2ed8369c0 | 1783808 | 1600662 | 23593652 | 65536 | 152069126 | 222975 | 222975 | 24425717 | 22450181 | 32275 | 9333888 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7649 | 2050758 | 0 | 2097152 | 914115807 | 1482178401 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16028769 | 12075112155628172 | 12075122906366285 | 12075122906492044 | 12075112156179036 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 202 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 142784 | 0x7fa3f9496600 | 0x7fa2ed836a00 | 2643792 | 2479927 | 36847360 | 65536 | 300576745 | 330473 | 330473 | 37614647 | 16773626 | 28154 | 2495502 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7701 | 2020800 | 0 | 2097152 | 369989881 | 947698516 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 12500910 | 12075112156457453 | 12075122906532204 | 12075122906740842 | 12075112157097332 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 204 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 144768 | 0x7fa3f9496500 | 0x7fa2ed836a40 | 1832240 | 1649838 | 24500672 | 65536 | 144908400 | 229029 | 229029 | 25163267 | 23217754 | 31252 | 5087051 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7717 | 2045307 | 0 | 2097152 | 1053800998 | 1864511716 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15210019 | 12075112157363757 | 12075122906803722 | 12075122906930921 | 12075112157895986 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 206 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 20 | 24 | 145728 | 0x7fa3f9496400 | 0x7fa2ed836a80 | 3760768 | 3598734 | 53637446 | 65536 | 439537560 | 470095 | 470095 | 54396767 | 52824652 | 0 | 370213 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 15359 | 8219731 | 0 | 8388608 | 244841623 | 1602725695 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 43598442 | 12075112158179453 | 12075122906969960 | 12075122907275878 | 12075112158918396 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 208 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 147712 | 0x7fa3f9496300 | 0x7fa2ed836ac0 | 3231744 | 3039977 | 45221053 | 65536 | 299318136 | 403967 | 403967 | 46015367 | 44627748 | 0 | 28860575 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14523 | 8233367 | 0 | 8388608 | 1046793813 | 3661112836 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 33022654 | 12075112159184661 | 12075122907338437 | 12075122907622755 | 12075112159868943 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 210 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 149696 | 0x7fa3f9496a00 | 0x7fa2ed836b00 | 1731760 | 1555533 | 22913503 | 65536 | 160632853 | 216469 | 216469 | 23748647 | 22021917 | 32303 | 11112488 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7704 | 2047219 | 0 | 2097152 | 1023782441 | 1817238755 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15033840 | 12075112160132172 | 12075122907678914 | 12075122907807393 | 12075112160665643 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 212 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 152192 | 0x7fa3f9496900 | 0x7fa2ed836b40 | 2862704 | 2699100 | 40095658 | 65536 | 327289094 | 357837 | 357837 | 40938481 | 16785699 | 28452 | 2574238 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7683 | 2020684 | 0 | 2097152 | 364765523 | 949997143 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 12490239 | 12075112160939943 | 12075122907847713 | 12075122908074591 | 12075112161604188 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 214 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 154176 | 0x7fa3f9496800 | 0x7fa2ed836b80 | 1803056 | 1627528 | 23961431 | 65536 | 152299576 | 225381 | 225381 | 24828647 | 22897587 | 31969 | 7188018 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7708 | 2049314 | 0 | 2097152 | 963758138 | 1632842663 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15407784 | 12075112161871144 | 12075122908141311 | 12075122908268830 | 12075112162408552 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 216 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 20 | 24 | 155648 | 0x7fa3f9496700 | 0x7fa2ed836bc0 | 4029568 | 3865122 | 57600256 | 65536 | 470699007 | 503695 | 503695 | 58392497 | 56765302 | 0 | 532469 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 15360 | 8219696 | 0 | 8388608 | 246841124 | 1605935498 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 47372486 | 12075112162688102 | 12075122908310749 | 12075122908639706 | 12075112163445670 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 218 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 157632 | 0x7fa3f9496600 | 0x7fa2ed836c00 | 3233904 | 3045270 | 45258591 | 65536 | 270296825 | 404237 | 404237 | 46094777 | 44501803 | 0 | 26053490 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 13284 | 8247196 | 0 | 8388608 | 1090033222 | 3895585181 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 35451772 | 12075112163704100 | 12075122908694906 | 12075122908946424 | 12075112164395665 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 220 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 159616 | 0x7fa3f9496500 | 0x7fa2ed836c40 | 1746776 | 1569223 | 23085560 | 65536 | 155955209 | 218346 | 218346 | 23954087 | 22081336 | 32512 | 10648051 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7751 | 2045991 | 0 | 2097152 | 1051643562 | 1881467726 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 14353940 | 12075112164648124 | 12075122909006583 | 12075122909136182 | 12075112165201493 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 222 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 162368 | 0x7fa3f9496400 | 0x7fa2ed836c80 | 3069248 | 2891322 | 43024438 | 65536 | 327387706 | 383655 | 383655 | 43785587 | 16041204 | 29344 | 1850937 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7683 | 2020106 | 0 | 2097152 | 358773139 | 912560980 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 11697396 | 12075112165482495 | 12075122909179062 | 12075122909423380 | 12075112166151809 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 224 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 164608 | 0x7fa3f9496300 | 0x7fa2ed836cc0 | 1781288 | 1593676 | 23367579 | 65536 | 164081968 | 222660 | 222660 | 24321887 | 22020669 | 32684 | 9465441 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7888 | 2044781 | 0 | 2097152 | 1012033738 | 1838660627 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15160910 | 12075112166425247 | 12075122909492659 | 12075122909621938 | 12075112166954360 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 226 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 20 | 24 | 166336 | 0x7fa3f9496a00 | 0x7fa2ed836d00 | 4902848 | 4441231 | 66174134 | 65536 | 540564480 | 612855 | 612855 | 67275467 | 64939279 | 0 | 369676 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 15360 | 8219767 | 0 | 8388608 | 229193486 | 1523826930 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 54452204 | 12075112167237667 | 12075122909661298 | 12075122910035695 | 12075112168064593 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 228 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 168576 | 0x7fa3f9496900 | 0x7fa2ed836d40 | 3243568 | 3006013 | 44633835 | 65536 | 341954512 | 405445 | 405445 | 45505967 | 43892089 | 0 | 27627252 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14581 | 8233270 | 0 | 8388608 | 1073427679 | 3962444424 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 35903935 | 12075112168323775 | 12075122910094894 | 12075122910349932 | 12075112169021201 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 230 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 170816 | 0x7fa3f9496800 | 0x7fa2ed836d80 | 1776088 | 1603921 | 23567240 | 65536 | 168365818 | 222010 | 222010 | 24474497 | 22095743 | 31989 | 7975214 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7923 | 2045960 | 0 | 2097152 | 901786566 | 1588257869 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 14527873 | 12075112169280403 | 12075122910407692 | 12075122910537131 | 12075112169824985 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 232 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 174080 | 0x7fa3f9496700 | 0x7fa2ed836dc0 | 3481368 | 3312170 | 49302809 | 65536 | 402010250 | 435170 | 435170 | 50098277 | 17126544 | 27638 | 2242767 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7684 | 2019967 | 0 | 2097152 | 359138157 | 919157152 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 12557463 | 12075112170109283 | 12075122910578410 | 12075122910856328 | 12075112170813512 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 234 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 176576 | 0x7fa3f9496600 | 0x7fa2ed836e00 | 1800304 | 1622635 | 24039567 | 65536 | 185234824 | 225037 | 225037 | 24755357 | 21924833 | 31366 | 4925345 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7945 | 2043614 | 0 | 2097152 | 946091292 | 1948741368 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 18084457 | 12075112171088593 | 12075122910924807 | 12075122911060006 | 12075112171614330 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 236 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 20 | 24 | 177792 | 0x7fa3f9496500 | 0x7fa2ed836e40 | 5162368 | 4996464 | 74506503 | 65536 | 611300704 | 645295 | 645295 | 75362747 | 71290363 | 0 | 310614 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 15360 | 8219704 | 0 | 8388608 | 228372631 | 1520872282 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 59422972 | 12075112171892507 | 12075122911099046 | 12075122911522082 | 12075112172753938 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 238 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 180288 | 0x7fa3f9496400 | 0x7fa2ed836e80 | 3305752 | 3124814 | 46142407 | 65536 | 326416733 | 413218 | 413218 | 47287937 | 44196763 | 0 | 18244792 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 13776 | 8241014 | 0 | 8388608 | 1063107272 | 4019763852 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 35875465 | 12075112173019111 | 12075122911586722 | 12075122911849280 | 12075112173712700 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 240 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 182784 | 0x7fa3f9496300 | 0x7fa2ed836ec0 | 1795712 | 1627641 | 24003935 | 65536 | 183254473 | 224463 | 224463 | 24830327 | 21541142 | 31178 | 4122125 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7890 | 2041293 | 0 | 2097152 | 869346490 | 1824236503 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 17447367 | 12075112173966962 | 12075122911906559 | 12075122912040638 | 12075112174522104 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 242 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 186304 | 0x7fa3f9496a00 | 0x7fa2ed836f00 | 4241368 | 3730872 | 55567348 | 65536 | 454170751 | 530170 | 530170 | 56651027 | 16490066 | 28695 | 1795884 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7684 | 2020018 | 0 | 2097152 | 356371364 | 902717727 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 12121338 | 12075112174799369 | 12075122912080318 | 12075122912393435 | 12075112175580711 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 244 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 189312 | 0x7fa3f9496900 | 0x7fa2ed836f40 | 1967128 | 1794794 | 26651005 | 65536 | 215547961 | 245890 | 245890 | 27371957 | 22704295 | 24732 | 2547171 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7726 | 2049607 | 0 | 2097152 | 333066084 | 898258908 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 17515129 | 12075112175850042 | 12075122912462554 | 12075122912613913 | 12075112176410653 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 246 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 20 | 24 | 190784 | 0x7fa3f9496800 | 0x7fa2ed836f80 | 6281768 | 6117054 | 91255023 | 65536 | 750458922 | 785220 | 785220 | 92171567 | 85020053 | 0 | 241412 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 15360 | 8220021 | 0 | 8388608 | 221956290 | 1497337577 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 70989552 | 12075112176688199 | 12075122912654233 | 12075122913169908 | 12075112177637123 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 248 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 193792 | 0x7fa3f9496700 | 0x7fa2ed836fc0 | 3696680 | 3507862 | 51725588 | 65536 | 387021134 | 462084 | 462084 | 53033687 | 40841949 | 0 | 11783545 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14445 | 8231470 | 0 | 8388608 | 678504680 | 3021068746 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 30807579 | 12075112177892557 | 12075122913231348 | 12075122913529745 | 12075112178629046 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 250 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 196800 | 0x7fa3f9496600 | 0x7fa2ed837000 | 1970160 | 1813062 | 26938118 | 65536 | 218607954 | 246269 | 246269 | 27611537 | 22759972 | 23932 | 2150181 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7697 | 2047427 | 0 | 2097152 | 341436125 | 905778789 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 17377492 | 12075112178884781 | 12075122913590225 | 12075122913743984 | 12075112179457826 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 252 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 201088 | 0x7fa3f9496500 | 0x7fa2ed837040 | 4747496 | 4581085 | 68297667 | 65536 | 560412371 | 593436 | 593436 | 69131987 | 17356626 | 27993 | 2310665 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7685 | 2021065 | 0 | 2097152 | 371274016 | 938084473 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 12500990 | 12075112179737997 | 12075122913786223 | 12075122914171020 | 12075112180563852 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 254 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 204608 | 0x7fa3f9496400 | 0x7fa2ed837080 | 2214872 | 2060640 | 30660984 | 65536 | 249968652 | 276858 | 276858 | 31326467 | 22601437 | 25614 | 2347638 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7704 | 2046615 | 0 | 2097152 | 353647611 | 948392100 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 19196644 | 12075112180830387 | 12075122914239020 | 12075122914414698 | 12075112181404193 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 256 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 20 | 24 | 206080 | 0x7fa3f9496300 | 0x7fa2ed8370c0 | 7397992 | 7233728 | 107943121 | 65536 | 889723580 | 924748 | 924748 | 108921617 | 94134377 | 0 | 380560 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 15360 | 8219663 | 0 | 8388608 | 219666040 | 1485874588 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 77527393 | 12075112181683102 | 12075122914454378 | 12075122915063973 | 12075112182722964 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 258 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 209600 | 0x7fa3f9496a00 | 0x7fa2ed837100 | 3995184 | 3834321 | 57094617 | 65536 | 462812272 | 499397 | 499397 | 57930437 | 37375226 | 0 | 4001265 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14457 | 8229745 | 0 | 8388608 | 428470816 | 2286823997 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 25710792 | 12075112182980783 | 12075122915123652 | 12075122915451009 | 12075112183744753 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 260 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 213120 | 0x7fa3f9496900 | 0x7fa2ed837140 | 2259432 | 2092619 | 31091405 | 65536 | 253481367 | 282428 | 282428 | 31805147 | 23313427 | 23896 | 2143408 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7697 | 2045145 | 0 | 2097152 | 321970340 | 869470911 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16113458 | 12075112184006910 | 12075122915511329 | 12075122915689087 | 12075112184601535 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 262 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 218176 | 0x7fa3f9496800 | 0x7fa2ed837180 | 5592896 | 5422463 | 80879668 | 65536 | 658455259 | 699111 | 699111 | 81752627 | 16442901 | 28707 | 2317381 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7684 | 2019628 | 0 | 2097152 | 406120245 | 933653334 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 12161745 | 12075112184878450 | 12075122915729407 | 12075122916185563 | 12075112185772271 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 264 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 222208 | 0x7fa3f9496700 | 0x7fa2ed8371c0 | 2509464 | 2347925 | 34923872 | 65536 | 285296913 | 313682 | 313682 | 35634587 | 22528686 | 24463 | 2258386 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7699 | 2045065 | 0 | 2097152 | 339883299 | 904920335 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 17482084 | 12075112186047803 | 12075122916255483 | 12075122916454201 | 12075112186661173 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 266 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 20 | 24 | 223936 | 0x7fa3f9496600 | 0x7fa2ed837200 | 8515504 | 8354657 | 124732629 | 65536 | 1028792220 | 1064437 | 1064437 | 125735597 | 99043743 | 0 | 309620 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 15360 | 8219685 | 0 | 8388608 | 222935109 | 1501847684 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 80116572 | 12075112186919753 | 12075122916511000 | 12075122917213555 | 12075112188050164 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 268 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 227968 | 0x7fa3f9496500 | 0x7fa2ed837240 | 4847464 | 4373794 | 65170358 | 65536 | 533071474 | 605932 | 605932 | 66286487 | 36418732 | 0 | 1857076 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14446 | 8229994 | 0 | 8388608 | 318403972 | 1738947848 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 21738321 | 12075112188310057 | 12075122917276754 | 12075122917648751 | 12075112189140190 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 270 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 232000 | 0x7fa3f9496400 | 0x7fa2ed837280 | 2542064 | 2383627 | 35491818 | 65536 | 290031326 | 317757 | 317757 | 36170117 | 20655188 | 28017 | 2382526 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7688 | 2043560 | 0 | 2097152 | 328924231 | 909463591 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16719068 | 12075112189401415 | 12075122917707790 | 12075122917909709 | 12075112190025184 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 272 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 237824 | 0x7fa3f9496300 | 0x7fa2ed8372c0 | 6441176 | 6262418 | 93463920 | 65536 | 755316340 | 805146 | 805146 | 94351967 | 16054305 | 29402 | 1960657 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7689 | 2018450 | 0 | 2097152 | 479798427 | 1022239069 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15215273 | 12075112190295917 | 12075122917967468 | 12075122918493224 | 12075112191250882 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 274 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 242368 | 0x7fa3f9496a00 | 0x7fa2ed837300 | 2813168 | 2627372 | 39161574 | 65536 | 320138439 | 351645 | 351645 | 39826337 | 22317126 | 27452 | 2270148 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7683 | 2042294 | 0 | 2097152 | 336229274 | 900921293 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16935578 | 12075112191527336 | 12075122918570183 | 12075122918794341 | 12075112192162306 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 276 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 20 | 24 | 243584 | 0x7fa3f9496900 | 0x7fa2ed837340 | 9652456 | 9489811 | 141724614 | 65536 | 1169639270 | 1206556 | 1206556 | 142762937 | 112115304 | 0 | 414423 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 15360 | 8219730 | 0 | 8388608 | 219647932 | 1495401459 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 90522207 | 12075112192423892 | 12075122918853381 | 12075122919651614 | 12075112193655250 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 278 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 248128 | 0x7fa3f9496800 | 0x7fa2ed837380 | 5114840 | 4954148 | 73812560 | 65536 | 604356110 | 639354 | 639354 | 74728037 | 37628369 | 0 | 2577056 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14833 | 8225822 | 0 | 8388608 | 324306777 | 1774399555 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 24490315 | 12075112193910384 | 12075122919718334 | 12075122920136730 | 12075112194771154 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 280 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 252672 | 0x7fa3f9496700 | 0x7fa2ed8373c0 | 2835688 | 2662237 | 39627759 | 65536 | 323429825 | 354460 | 354460 | 40349297 | 21719079 | 27096 | 2258586 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7686 | 2042536 | 0 | 2097152 | 328868600 | 890650200 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 16952008 | 12075112195029634 | 12075122920197210 | 12075122920421208 | 12075112195681876 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 282 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 259264 | 0x7fa3f9496600 | 0x7fa2ed837400 | 7273192 | 7110443 | 106116997 | 65536 | 873607281 | 909148 | 909148 | 107072417 | 18243888 | 29485 | 2366390 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7717 | 2020790 | 0 | 2097152 | 510265299 | 1077673226 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 15001286 | 12075112195942942 | 12075122920481847 | 12075122921078642 | 12075112196962727 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 284 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 264832 | 0x7fa3f9496500 | 0x7fa2ed837440 | 3345728 | 3169658 | 47138506 | 65536 | 384466686 | 418215 | 418215 | 47960537 | 18316670 | 25323 | 2208958 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7697 | 2024979 | 0 | 2097152 | 345389859 | 908137150 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 13060352 | 12075112197247406 | 12075122921148562 | 12075122921418319 | 12075112197923031 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 286 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 20 | 24 | 266304 | 0x7fa3f9496400 | 0x7fa2ed837480 | 11925808 | 11740290 | 175300064 | 65536 | 1447993668 | 1490725 | 1490725 | 176520197 | 119821283 | 0 | 490761 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 15360 | 8219998 | 0 | 8388608 | 221278805 | 1500421756 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 95418239 | 12075112198187924 | 12075122921476559 | 12075122922461351 | 12075112199602803 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 288 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 28 | 24 | 271872 | 0x7fa3f9496300 | 0x7fa2ed8374c0 | 6449752 | 6070744 | 90457686 | 65536 | 726252581 | 806218 | 806218 | 91724987 | 34066963 | 0 | 526528 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14340 | 8231051 | 0 | 8388608 | 320476527 | 1756345292 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 20117222 | 12075112199865331 | 12075122922520070 | 12075122923034146 | 12075112200843399 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 290 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 277440 | 0x7fa3f9496a00 | 0x7fa2ed837500 | 3371736 | 3215870 | 47803692 | 65536 | 390758335 | 421466 | 421466 | 48653777 | 18457768 | 25368 | 2187083 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7686 | 2025441 | 0 | 2097152 | 347733053 | 903040620 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 13773235 | 12075112201106117 | 12075122923092865 | 12075122923364223 | 12075112201840572 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 292 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 12 | 24 | 279936 | 0x7fa3f9496900 | 0x7fa2ed837540 | 9029488 | 8846440 | 132082011 | 65536 | 1088784860 | 1128685 | 1128685 | 133112507 | 107463629 | 338 | 69376 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 8113 | 2058877 | 0 | 2097152 | 222799515 | 776593449 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 90922211 | 12075112202105324 | 12075122923423583 | 12075122924160696 | 12075112203285878 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 294 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 286528 | 0x7fa3f9496800 | 0x7fa2ed837580 | 3896168 | 3745952 | 55753626 | 65536 | 456915207 | 487020 | 487020 | 56604977 | 17688363 | 26923 | 2265084 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7701 | 2025761 | 0 | 2097152 | 340677064 | 894188300 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 14524488 | 12075112203540320 | 12075122924230616 | 12075122924546133 | 12075112204275757 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 296 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 20 | 24 | 288000 | 0x7fa3f9496700 | 0x7fa2ed8375c0 | 14149568 | 13980292 | 208818231 | 65536 | 1725694367 | 1768695 | 1768695 | 210120107 | 142802282 | 0 | 44195 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 15360 | 8219660 | 0 | 8388608 | 216999775 | 1507131743 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 114182564 | 12075112204511686 | 12075122924604213 | 12075122925776363 | 12075112206131656 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 298 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 32 | 24 | 294592 | 0x7fa3f9496600 | 0x7fa2ed837600 | 7348008 | 7181249 | 107212692 | 65536 | 802691100 | 918500 | 918500 | 108134537 | 34810580 | 0 | 776891 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 14926 | 8224828 | 0 | 8388608 | 333607160 | 1801530328 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 24600781 | 12075112206397330 | 12075122925837642 | 12075122926447237 | 12075112207420251 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 300 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 301184 | 0x7fa3f9496500 | 0x7fa2ed837640 | 3949696 | 3789261 | 56371910 | 65536 | 462437215 | 493711 | 493711 | 57254627 | 16859019 | 28255 | 2188287 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7690 | 2025591 | 0 | 2097152 | 359567799 | 911812364 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 12808420 | 12075112207686445 | 12075122926514277 | 12075122926832674 | 12075112208433975 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 302 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 12 | 24 | 304960 | 0x7fa3f9496400 | 0x7fa2ed837680 | 10679472 | 10528366 | 157233654 | 65536 | 1297520356 | 1334933 | 1334933 | 158341157 | 114074551 | 393 | 159335 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 8120 | 2060464 | 0 | 2097152 | 221164026 | 774054022 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 96651736 | 12075112208700259 | 12075122926892194 | 12075122927769626 | 12075112210004122 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 304 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 24 | 24 | 313600 | 0x7fa3f9496300 | 0x7fa2ed8376c0 | 5297840 | 4866749 | 72528042 | 65536 | 595121875 | 662229 | 662229 | 73666067 | 16980322 | 27748 | 2237847 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7702 | 2025353 | 0 | 2097152 | 344434075 | 882886587 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 13135937 | 12075112210276208 | 12075122927840186 | 12075122928250582 | 12075112211122190 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 306 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 20 | 24 | 314816 | 0x7fa3f9496a00 | 0x7fa2ed837700 | 18653232 | 18491608 | 276273657 | 65536 | 2284577896 | 2331653 | 2331653 | 277789937 | 137239533 | 0 | 203609 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 15360 | 8219881 | 0 | 8388608 | 220897171 | 1514800879 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 111678703 | 12075112211387373 | 12075122928310582 | 12075122929860809 | 12075112213349239 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 308 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 20 | 24 | 317568 | 0x7fa3f9496900 | 0x7fa2ed837740 | 9602920 | 9444978 | 141004402 | 65536 | 1161392460 | 1200364 | 1200364 | 142091387 | 117749050 | 0 | 31659 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 15360 | 8219831 | 0 | 8388608 | 219933885 | 1498732014 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 97668258 | 12075112213615464 | 12075122929964277 | 12075122930760431 | 12075112214828207 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 310 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 12 | 24 | 320320 | 0x7fa3f9496800 | 0x7fa2ed837780 | 5127512 | 4970221 | 74032159 | 65536 | 609008366 | 640938 | 640938 | 74968997 | 70838514 | 578 | 77389 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 8341 | 2060700 | 0 | 2097152 | 216317105 | 748350856 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 63470743 | 12075112215097598 | 12075122930870669 | 12075122931284586 | 12075112215929043 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 312 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 12 | 24 | 324096 | 0x7fa3f9496700 | 0x7fa2ed8377c0 | 14053160 | 13900317 | 207605438 | 65536 | 1713950330 | 1756644 | 1756644 | 208921367 | 119506637 | 422 | 128725 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7932 | 2059906 | 0 | 2097152 | 224927443 | 784442973 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 106816310 | 12075112216198925 | 12075122931392105 | 12075122932550817 | 12075112217772549 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 314 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 12 | 24 | 328896 | 0x7fa3f9496600 | 0x7fa2ed837800 | 9578216 | 9425617 | 140773916 | 65536 | 1162955071 | 1197276 | 1197276 | 141800957 | 97863926 | 816 | 65836 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 8329 | 2060519 | 0 | 2097152 | 230450753 | 784241627 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 90022684 | 12075112218058010 | 12075122932663136 | 12075122933448410 | 12075112219261326 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 316 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 20 | 24 | 330112 | 0x7fa3f9496500 | 0x7fa2ed837840 | 36664768 | 36506985 | 545566322 | 65536 | 4515500938 | 4583095 | 4583095 | 548020547 | 188233604 | 0 | 412755 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 15360 | 8219684 | 0 | 8388608 | 251206408 | 1678114107 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 151756167 | 12075112219525577 | 12075122933550170 | 12075122936603747 | 12075112222990155 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 318 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 20 | 24 | 332864 | 0x7fa3f9496400 | 0x7fa2ed837880 | 18579480 | 18429779 | 275299910 | 65536 | 2276716682 | 2322434 | 2322434 | 276900189 | 150373774 | 0 | 429389 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 15360 | 8219866 | 0 | 8388608 | 222687857 | 1529763869 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 121084126 | 12075112223251621 | 12075122936707107 | 12075122938251255 | 12075112225206193 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 320 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 12 | 24 | 335616 | 0x7fa3f9496300 | 0x7fa2ed8378c0 | 9632064 | 9461898 | 141246229 | 65536 | 1164793957 | 1204007 | 1204007 | 142344257 | 101633061 | 503 | 57934 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 8341 | 2060479 | 0 | 2097152 | 222554033 | 769464485 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 88915338 | 12075112225472288 | 12075122938353975 | 12075122939143729 | 12075112226679010 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 322 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 12 | 24 | 339392 | 0x7fa3f9496a00 | 0x7fa2ed837900 | 27537664 | 27378045 | 409140666 | 65536 | 3385328402 | 3442207 | 3442207 | 411086447 | 148590729 | 383 | 78530 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7902 | 2058440 | 0 | 2097152 | 263139133 | 879457667 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 119116769 | 12075112226940567 | 12075122939247248 | 12075122941528671 | 12075112229645393 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 324 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 12 | 24 | 344192 | 0x7fa3f9496900 | 0x7fa2ed837940 | 18588720 | 18419099 | 275197788 | 65536 | 2275185993 | 2323589 | 2323589 | 276702257 | 140439268 | 1051 | 64059 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 8355 | 2060031 | 0 | 2097152 | 238475169 | 820218999 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 119709633 | 12075112229922458 | 12075122941641631 | 12075122943176819 | 12075112231884454 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 326 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 20 | 24 | 345408 | 0x7fa3f9496800 | 0x7fa2ed837980 | 72694416 | 72530359 | 1084078982 | 65536 | 8976933927 | 9086801 | 9086801 | 1088371007 | 190458003 | 0 | 248295 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 15360 | 8220053 | 0 | 8388608 | 266945343 | 1743960648 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 158797782 | 12075112232143906 | 12075122943280179 | 12075122949335334 | 12075112238610483 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 328 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 20 | 24 | 348160 | 0x7fa3f9496700 | 0x7fa2ed8379c0 | 36591656 | 36417081 | 544098163 | 65536 | 4500956607 | 4573956 | 4573956 | 546671837 | 199796097 | 0 | 5715 | 0 | 0 | 0 | 524288 | 33554432 | 33554432 | 33554432 | 0 | 0 | 0 | 360 | 8388608 | 15360 | 8219942 | 0 | 8388608 | 300166864 | 1832902104 | 0 | 4194304 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194304 | 0 | 0 | 157865822 | 12075112238875426 | 12075122949440934 | 12075122952483471 | 12075112242345424 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 330 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 12 | 24 | 350912 | 0x7fa3f9496600 | 0x7fa2ed837a00 | 18633136 | 18468018 | 275867385 | 65536 | 2281144670 | 2329141 | 2329141 | 277436057 | 136604562 | 666 | 62329 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 8457 | 2059743 | 0 | 2097152 | 236319571 | 815206985 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 119912739 | 12075112242605087 | 12075122952586191 | 12075122954125060 | 12075112244558497 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 332 | 861653 | 861658 | 4194304 | 256 | 0 | 0 | 12 | 24 | 0 | 0x7fa3f9496500 | 0x7fa2ed837a40 | 54529904 | 54338487 | 812042289 | 65536 | 6723174465 | 6816237 | 6816237 | 815493017 | 179511398 | 292 | 61258 | 0 | 0 | 0 | 524288 | 8388608 | 8388608 | 8388608 | 0 | 0 | 0 | 360 | 2097152 | 7871 | 2061229 | 0 | 2097152 | 259693021 | 897462295 | 0 | 2097152 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097152 | 0 | 0 | 134215973 | 12075112244818009 | 12075122954227139 | 12075122958753826 | 12075112249767537 |