71 KiB
71 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | SQ_CYCLES | SQ_BUSY_CYCLES | SQ_BUSY_CU_CYCLES | SQ_WAVES | SQ_WAVE_CYCLES | GRBM_COUNT | GRBM_GUI_ACTIVE | TA_TA_BUSY_sum | TA_SH_FIFO_BUSY_sum | TA_BUFFER_COALESCED_READ_CYCLES_sum | TA_BUFFER_COALESCED_WRITE_CYCLES_sum | TA_ADDR_STALLED_BY_TC_CYCLES_sum | TA_TOTAL_WAVEFRONTS_sum | TA_ADDR_STALLED_BY_TD_CYCLES_sum | TA_DATA_STALLED_BY_TC_CYCLES_sum | TA_FLAT_WAVEFRONTS_sum | TA_FLAT_READ_WAVEFRONTS_sum | TA_FLAT_WRITE_WAVEFRONTS_sum | TA_FLAT_ATOMIC_WAVEFRONTS_sum | TA_FLAT_COALESCEABLE_WAVEFRONTS_sum | TA_SH_FIFO_CMD_BUSY_sum | TA_SH_FIFO_ADDR_BUSY_sum | TA_SH_FIFO_DATA_BUSY_sum | TA_SH_FIFO_DATA_SFIFO_BUSY_sum | TA_SH_FIFO_DATA_TFIFO_BUSY_sum | TA_SQ_TA_CMD_CYCLES_sum | TA_SP_TA_ADDR_CYCLES_sum | TA_SP_TA_DATA_CYCLES_sum | TA_SH_FIFO_ADDR_STARVED_WHILE_BUSY_CYCLES_sum | TA_SH_FIFO_CMD_STARVED_WHILE_BUSY_CYCLES_sum | TA_SH_FIFO_DATA_STARVED_WHILE_BUSY_CYCLES_sum | TA_TA_SH_FIFO_STARVED_sum | TA_BUFFER_WAVEFRONTS_sum | TA_BUFFER_READ_WAVEFRONTS_sum | TA_BUFFER_WRITE_WAVEFRONTS_sum | TA_BUFFER_ATOMIC_WAVEFRONTS_sum | TA_BUFFER_TOTAL_CYCLES_sum | TA_BUFFER_COALESCABLE_WAVEFRONTS_sum | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBuffer.kd | 0 | 0 | 0 | 857583 | 857588 | 33554432 | 256 | 0 | 0 | 8 | 32 | 6464 | 0x0 | 0x7fbf7a204180 | 4016848 | 3808248 | 56922495 | 524288 | 372711809 | 502105 | 502105 | 54315858 | 54220114 | 0 | 0 | 17584769 | 1048576 | 0 | 37475666 | 1048576 | 0 | 1048576 | 0 | 1048576 | 52267001 | 52021897 | 53959648 | 53632699 | 54157650 | 4194304 | 4194304 | 4194304 | 0 | 0 | 0 | 365635 | 0 | 0 | 0 | 0 | 0 | 1048576 | 12075005624787169 | 12075022762977568 | 12075022763304925 | 12075005872218276 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 857583 | 857588 | 32768 | 256 | 0 | 0 | 24 | 24 | 12480 | 0x0 | 0x7fbf7a235100 | 224120 | 77149 | 1079368 | 512 | 1150204 | 28014 | 28014 | 131904 | 113142 | 0 | 0 | 60588 | 4096 | 0 | 0 | 4096 | 4096 | 0 | 0 | 4096 | 68890 | 65845 | 0 | 0 | 0 | 16384 | 16384 | 0 | 0 | 0 | 0 | 2296 | 0 | 0 | 0 | 0 | 0 | 4096 | 12075005887090134 | 12075022779046022 | 12075022779052582 | 12075005887527727 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 4 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 12928 | 0x7fc085d83900 | 0x7fbf7a235140 | 1746856 | 1582962 | 23486854 | 65536 | 136653633 | 218356 | 218356 | 22023321 | 21895572 | 0 | 0 | 21003847 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 22455427 | 22452384 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 16385 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005887702171 | 12075022779130981 | 12075022779261700 | 12075005888204024 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 6 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 36 | 24 | 13632 | 0x7fc085d83800 | 0x7fbf7a235180 | 3158128 | 2994388 | 44594679 | 65536 | 234428790 | 394765 | 394765 | 43483285 | 43356199 | 0 | 0 | 35077190 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 43981458 | 43980032 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 5013 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005888446564 | 12075022779337060 | 12075022779580097 | 12075005889056388 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 8 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 14080 | 0x7fc085d83700 | 0x7fbf7a2351c0 | 3218928 | 3038277 | 45294147 | 65536 | 295165771 | 402365 | 402365 | 43995417 | 43829919 | 0 | 0 | 35880669 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 40989097 | 40915528 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 60942 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005889210845 | 12075022779723936 | 12075022779971454 | 12075005889827551 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 10 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14528 | 0x7fc085d83600 | 0x7fbf7a235200 | 1766360 | 1603597 | 23728642 | 65536 | 126960455 | 220794 | 220794 | 21732040 | 21555880 | 0 | 0 | 21006653 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 22047123 | 22041949 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 10277 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005889992628 | 12075022780119933 | 12075022780245691 | 12075005890419561 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 12 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14976 | 0x7fc085d83500 | 0x7fbf7a235240 | 1787968 | 1624420 | 24036973 | 65536 | 122660029 | 223495 | 223495 | 21983333 | 21758539 | 0 | 0 | 20156793 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 23462409 | 23457584 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 8418 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005890651241 | 12075022780295771 | 12075022780420410 | 12075005891077644 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 14 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 15424 | 0x7fc085d83400 | 0x7fbf7a235280 | 1722800 | 1551923 | 23088228 | 65536 | 150365539 | 215349 | 215349 | 21808205 | 21714816 | 0 | 0 | 20062753 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21743724 | 21732385 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 21526 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005891324903 | 12075022780494649 | 12075022780618808 | 12075005891738793 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 16 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16128 | 0x7fc085d83300 | 0x7fbf7a2352c0 | 3685104 | 3096599 | 46047940 | 65536 | 254539243 | 460637 | 460637 | 43296752 | 42885721 | 0 | 0 | 34868728 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 43188143 | 43183104 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 59874 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005891962188 | 12075022780668568 | 12075022780914005 | 12075005892621613 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 18 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16832 | 0x7fc085d83a00 | 0x7fbf7a235300 | 3276288 | 3090418 | 46069354 | 65536 | 249980475 | 409535 | 409535 | 43521220 | 43115896 | 0 | 0 | 36717248 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 44020231 | 44013426 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3440 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005892777383 | 12075022781058804 | 12075022781308082 | 12075005893394289 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 20 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 17536 | 0x7fc085d83900 | 0x7fbf7a235340 | 1742360 | 1580166 | 23380542 | 65536 | 145706409 | 217794 | 217794 | 22352679 | 22249134 | 0 | 0 | 19794542 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21268265 | 21248456 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 29652 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005893549828 | 12075022781451121 | 12075022781576239 | 12075005893964188 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 22 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18240 | 0x7fc085d83800 | 0x7fbf7a235380 | 1764136 | 1603301 | 23712760 | 65536 | 136739194 | 220516 | 220516 | 22473479 | 22371150 | 0 | 0 | 20451841 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 22216881 | 22210935 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 14629 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005894191971 | 12075022781626799 | 12075022781752878 | 12075005894606732 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 24 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18944 | 0x7fc085d83700 | 0x7fbf7a2353c0 | 1696944 | 1532706 | 22721932 | 65536 | 161338903 | 212117 | 212117 | 21897636 | 21842613 | 0 | 0 | 20103639 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 22203001 | 22199142 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 17780 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005894840577 | 12075022781812877 | 12075022781937356 | 12075005895257592 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 26 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 19904 | 0x7fc085d83600 | 0x7fbf7a235400 | 3228440 | 3072790 | 45867779 | 65536 | 339593910 | 403554 | 403554 | 44740877 | 44660450 | 0 | 0 | 35390737 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 43450492 | 43445719 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 24612 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005895480957 | 12075022781992236 | 12075022782242473 | 12075005896083096 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 28 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 20608 | 0x7fc085d83500 | 0x7fbf7a235440 | 3201960 | 3018183 | 45006819 | 65536 | 326788294 | 400244 | 400244 | 44389128 | 44358457 | 0 | 0 | 36337166 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 43404478 | 43399303 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 4293 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005896250818 | 12075022782385192 | 12075022782631590 | 12075005896852806 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 30 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 21312 | 0x7fc085d83400 | 0x7fbf7a235480 | 1715632 | 1545141 | 22942894 | 65536 | 158024264 | 214453 | 214453 | 22165043 | 22083595 | 0 | 0 | 20905541 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 22131593 | 22118648 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 36672 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005897024686 | 12075022782743749 | 12075022782868068 | 12075005897433405 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 32 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22016 | 0x7fc085d83300 | 0x7fbf7a2354c0 | 1701104 | 1537060 | 22755396 | 65536 | 160928564 | 212637 | 212637 | 21955094 | 21909434 | 0 | 0 | 21150242 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21480779 | 21466613 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 5562 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005897659295 | 12075022782919267 | 12075022783044066 | 12075005898067103 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 34 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22720 | 0x7fc085d83a00 | 0x7fbf7a235500 | 1681088 | 1514545 | 22450138 | 65536 | 156443171 | 210135 | 210135 | 21685853 | 21637967 | 0 | 0 | 19748910 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 22080120 | 22067550 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 32347 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005898302931 | 12075022783106466 | 12075022783230785 | 12075005898712122 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 36 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 23680 | 0x7fc085d83900 | 0x7fbf7a235540 | 3249008 | 3080288 | 45820785 | 65536 | 271139833 | 406125 | 406125 | 43979519 | 43739394 | 0 | 0 | 35705123 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 42375452 | 42345758 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 4858 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005898928964 | 12075022783284224 | 12075022783534782 | 12075005899549778 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 38 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 24384 | 0x7fc085d83800 | 0x7fbf7a235580 | 3226240 | 3068101 | 45663435 | 65536 | 306355749 | 403279 | 403279 | 44665932 | 44591097 | 0 | 0 | 35111495 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 40164387 | 40071820 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 8096 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005899699496 | 12075022783646621 | 12075022783892059 | 12075005900302497 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 40 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25088 | 0x7fc085d83700 | 0x7fbf7a2355c0 | 1670848 | 1508599 | 22354347 | 65536 | 163334014 | 208855 | 208855 | 21575255 | 21512241 | 0 | 0 | 19391779 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 22099168 | 22083609 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 33495 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005900458767 | 12075022784038297 | 12075022784162456 | 12075005900866475 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 42 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25792 | 0x7fc085d83600 | 0x7fbf7a235600 | 1807808 | 1643342 | 24329487 | 65536 | 134950683 | 225975 | 225975 | 22252356 | 22031684 | 0 | 0 | 20513041 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21959185 | 21950564 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 16778 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005901084620 | 12075022784217016 | 12075022784342135 | 12075005901514089 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 44 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 26496 | 0x7fc085d83500 | 0x7fbf7a235640 | 1678360 | 1512247 | 22412343 | 65536 | 152830669 | 209794 | 209794 | 21519434 | 21429103 | 0 | 0 | 19503030 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 22802227 | 22791163 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 23428 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005901752121 | 12075022784406454 | 12075022784530773 | 12075005902173474 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 46 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 27712 | 0x7fc085d83400 | 0x7fbf7a235680 | 3248616 | 3083487 | 45892483 | 65536 | 301483634 | 406076 | 406076 | 45001912 | 44940921 | 0 | 0 | 36039359 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 42695940 | 42648051 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 33010 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005902397190 | 12075022784591732 | 12075022784839570 | 12075005903008536 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 48 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 28416 | 0x7fc085d83300 | 0x7fbf7a2356c0 | 3289512 | 3103558 | 46199909 | 65536 | 288286975 | 411188 | 411188 | 45146712 | 45043856 | 0 | 0 | 36591586 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 42088082 | 42032130 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 30818 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005903171098 | 12075022784953009 | 12075022785196527 | 12075005903787503 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 50 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 29120 | 0x7fc085d83a00 | 0x7fbf7a235700 | 1720792 | 1559670 | 23112545 | 65536 | 148278712 | 215098 | 215098 | 21821651 | 21666397 | 0 | 0 | 19543972 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 22075220 | 22066608 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 24994 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005903950627 | 12075022785336686 | 12075022785461325 | 12075005904365978 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 52 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30080 | 0x7fc085d83900 | 0x7fbf7a235740 | 1785128 | 1622472 | 24004213 | 65536 | 141160869 | 223140 | 223140 | 22576981 | 22394112 | 0 | 0 | 19822995 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 22116356 | 22093170 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 44804 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005904595886 | 12075022785509484 | 12075022785634603 | 12075005905009274 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 54 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30784 | 0x7fc085d83800 | 0x7fbf7a235780 | 1695128 | 1536196 | 22766250 | 65536 | 157840477 | 211890 | 211890 | 21817608 | 21740949 | 0 | 0 | 19401427 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21976369 | 21962900 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 18862 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005905243359 | 12075022785692042 | 12075022785816361 | 12075005905648492 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 56 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32000 | 0x7fc085d83700 | 0x7fbf7a2357c0 | 3242136 | 3074869 | 45703877 | 65536 | 276394765 | 405266 | 405266 | 44297891 | 44140182 | 0 | 0 | 34338927 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 43373347 | 43368539 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 7455 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005905851379 | 12075022785865161 | 12075022786112999 | 12075005906470069 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 58 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32960 | 0x7fc085d83600 | 0x7fbf7a235800 | 3243248 | 3077592 | 45920860 | 65536 | 341196045 | 405405 | 405405 | 45063030 | 45020588 | 0 | 0 | 36061411 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 41758451 | 41712188 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3525 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005906625808 | 12075022786252837 | 12075022786499075 | 12075005907229570 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 60 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 33920 | 0x7fc085d83500 | 0x7fbf7a235840 | 1721576 | 1561580 | 23110202 | 65536 | 157862651 | 215196 | 215196 | 22306909 | 22222602 | 0 | 0 | 19723457 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21298697 | 21285666 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 29908 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005907386802 | 12075022786636994 | 12075022786761633 | 12075005907805551 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 62 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 34880 | 0x7fc085d83400 | 0x7fbf7a235880 | 1731176 | 1566027 | 23130674 | 65536 | 143289879 | 216396 | 216396 | 21241534 | 20959029 | 0 | 0 | 19147589 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 22383598 | 22369879 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 29469 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005908036299 | 12075022786817312 | 12075022786943551 | 12075005908454446 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 64 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 35840 | 0x7fc085d83300 | 0x7fbf7a2358c0 | 1652392 | 1490186 | 22110317 | 65536 | 157923617 | 206548 | 206548 | 21028980 | 20925648 | 0 | 0 | 19795605 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 22398390 | 22378820 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 18716 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005908686127 | 12075022787004831 | 12075022787129630 | 12075005909086621 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 66 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 37312 | 0x7fc085d83a00 | 0x7fbf7a235900 | 3191896 | 3032526 | 45164766 | 65536 | 305563317 | 398986 | 398986 | 43988572 | 43917813 | 0 | 0 | 36294939 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 43266571 | 43260174 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 71807 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005909311710 | 12075022787181629 | 12075022787428027 | 12075005909910382 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 68 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 38272 | 0x7fc085d83900 | 0x7fbf7a235940 | 3219632 | 3060598 | 45582875 | 65536 | 327095917 | 402453 | 402453 | 44973536 | 44955787 | 0 | 0 | 36297803 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 42295681 | 42239500 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3865 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005910078224 | 12075022787531706 | 12075022787777464 | 12075005910684300 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 70 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 39232 | 0x7fc085d83800 | 0x7fbf7a235980 | 1755688 | 1594648 | 23648477 | 65536 | 139012013 | 219460 | 219460 | 22507175 | 22350901 | 0 | 0 | 19884327 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 22263902 | 22243256 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 41565 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005910850660 | 12075022787874263 | 12075022787999222 | 12075005911269688 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 72 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 40192 | 0x7fc085d83700 | 0x7fbf7a2359c0 | 1753072 | 1583715 | 23424269 | 65536 | 152752592 | 219133 | 219133 | 21731791 | 21450726 | 0 | 0 | 20041843 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21365892 | 21345256 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 47934 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005911500307 | 12075022788050421 | 12075022788176980 | 12075005911911791 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 74 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 41152 | 0x7fc085d83600 | 0x7fbf7a235a00 | 1708848 | 1547596 | 22906443 | 65536 | 147671569 | 213605 | 213605 | 21638798 | 21484961 | 0 | 0 | 20240967 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21958634 | 21942702 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 36220 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005912152379 | 12075022788237780 | 12075022788362578 | 12075005912560477 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 76 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 42624 | 0x7fc085d83500 | 0x7fbf7a235a40 | 3227520 | 3063969 | 45678885 | 65536 | 322042300 | 403439 | 403439 | 44403280 | 44321860 | 0 | 0 | 33526840 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 44143339 | 44137991 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 15609 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005912777590 | 12075022788413778 | 12075022788662256 | 12075005913391121 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 78 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 43584 | 0x7fc085d83400 | 0x7fbf7a235a80 | 3135808 | 2975105 | 44391210 | 65536 | 355718332 | 391975 | 391975 | 43694472 | 43677219 | 0 | 0 | 35672507 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 42320857 | 42264361 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 4113 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005913541771 | 12075022788731695 | 12075022788975213 | 12075005914135263 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 80 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 44544 | 0x7fc085d83300 | 0x7fbf7a235ac0 | 1725504 | 1562969 | 23120434 | 65536 | 155980514 | 215687 | 215687 | 22405972 | 22326692 | 0 | 0 | 19574384 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21841528 | 21830717 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 15189 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005914290312 | 12075022789053612 | 12075022789179851 | 12075005914702287 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 82 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 45760 | 0x7fc085d83a00 | 0x7fbf7a235b00 | 1838312 | 1665952 | 24551483 | 65536 | 149445007 | 229788 | 229788 | 22176771 | 21713297 | 0 | 0 | 19339611 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 20693077 | 20655673 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 105451 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005914927065 | 12075022789228491 | 12075022789355210 | 12075005915350662 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 84 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 46720 | 0x7fc085d83900 | 0x7fbf7a235b40 | 1720936 | 1535452 | 22730844 | 65536 | 164961615 | 215116 | 215116 | 21929577 | 21865090 | 0 | 0 | 19654518 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 22197244 | 22179072 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 32967 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005915586180 | 12075022789414889 | 12075022789539688 | 12075005915990782 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 86 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 48448 | 0x7fc085d83800 | 0x7fbf7a235b80 | 3287592 | 3116388 | 46453920 | 65536 | 332490405 | 410948 | 410948 | 45382436 | 45257198 | 0 | 0 | 36677075 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 44869722 | 44850632 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 105224 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005916216201 | 12075022789588647 | 12075022789842725 | 12075005916819582 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 88 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 49408 | 0x7fc085d83700 | 0x7fbf7a235bc0 | 3232320 | 3061786 | 45597135 | 65536 | 307576783 | 404039 | 404039 | 44602512 | 44495022 | 0 | 0 | 34494917 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 44121367 | 44115367 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 13358 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005916953060 | 12075022789910245 | 12075022790153602 | 12075005917566350 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 90 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 50368 | 0x7fc085d83600 | 0x7fbf7a235c00 | 1728112 | 1557549 | 23098003 | 65536 | 143908765 | 216013 | 216013 | 21721540 | 21524599 | 0 | 0 | 19677846 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21942946 | 21926328 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 14577 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005917715697 | 12075022790222882 | 12075022790347361 | 12075005918132802 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 92 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 51584 | 0x7fc085d83500 | 0x7fbf7a235c40 | 1831360 | 1662175 | 24444186 | 65536 | 158631300 | 228919 | 228919 | 22326310 | 21704583 | 0 | 0 | 18102326 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21351019 | 21300718 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 44307 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005918351980 | 12075022790398560 | 12075022790524959 | 12075005918779985 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 94 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 52544 | 0x7fc085d83400 | 0x7fbf7a235c80 | 1669464 | 1505924 | 22320163 | 65536 | 155197066 | 208682 | 208682 | 21031893 | 20896387 | 0 | 0 | 19471494 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21320069 | 21305926 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 16943 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005919027305 | 12075022790585119 | 12075022790710397 | 12075005919435503 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 96 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 54272 | 0x7fc085d83300 | 0x7fbf7a235cc0 | 3305192 | 3141111 | 46724031 | 65536 | 350053896 | 413148 | 413148 | 45169555 | 44962422 | 0 | 0 | 35272593 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 43684189 | 43628416 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 77235 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005919664178 | 12075022790760317 | 12075022791016635 | 12075005920281906 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 98 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 55488 | 0x7fc085d83a00 | 0x7fbf7a235d00 | 3204528 | 3044484 | 45385560 | 65536 | 272565155 | 400565 | 400565 | 42809141 | 42433504 | 0 | 0 | 35898415 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 44254528 | 44250127 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 23884 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005920437776 | 12075022791085914 | 12075022791333112 | 12075005921041458 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 100 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 56704 | 0x7fc085d83900 | 0x7fbf7a235d40 | 1767400 | 1606150 | 23733963 | 65536 | 139443643 | 220924 | 220924 | 21843814 | 21561803 | 0 | 0 | 19067482 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21524098 | 21509674 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 13334 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005921201215 | 12075022791400151 | 12075022791524950 | 12075005921612689 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 102 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 58176 | 0x7fc085d83800 | 0x7fbf7a235d80 | 1847208 | 1668905 | 24638808 | 65536 | 152810046 | 230900 | 230900 | 20508263 | 19475820 | 0 | 0 | 17657094 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 18637054 | 18474963 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 189879 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005921842096 | 12075022791574550 | 12075022791703508 | 12075005922279368 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 104 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 59392 | 0x7fc085d83700 | 0x7fbf7a235dc0 | 1724784 | 1565858 | 23148737 | 65536 | 155591690 | 215597 | 215597 | 22302233 | 22195159 | 0 | 0 | 19681896 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 22411357 | 22391271 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 20565 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005922490761 | 12075022791763348 | 12075022791888947 | 12075005922905482 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 106 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 61376 | 0x7fc085d83600 | 0x7fbf7a235e00 | 3317608 | 3154352 | 46905740 | 65536 | 357447014 | 414700 | 414700 | 45493810 | 45321224 | 0 | 0 | 36626323 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 40583605 | 40384770 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 108967 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005923137112 | 12075022791939346 | 12075022792196144 | 12075005923752536 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 108 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 62592 | 0x7fc085d83500 | 0x7fbf7a235e40 | 3169920 | 3002021 | 44684045 | 65536 | 284973065 | 396239 | 396239 | 43695799 | 43592124 | 0 | 0 | 36236002 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 41608840 | 41539927 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3788 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005923913285 | 12075022792266383 | 12075022792513581 | 12075005924513320 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 110 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 63808 | 0x7fc085d83400 | 0x7fbf7a235e80 | 1714840 | 1541548 | 22878330 | 65536 | 154172416 | 214354 | 214354 | 21839068 | 21686763 | 0 | 0 | 20288303 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21736861 | 21724586 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 20509 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005924678797 | 12075022792587661 | 12075022792713259 | 12075005925084311 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 112 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 65280 | 0x7fc085d83300 | 0x7fbf7a235ec0 | 1796056 | 1609465 | 23801010 | 65536 | 166180836 | 224506 | 224506 | 17170033 | 15549702 | 0 | 0 | 15774460 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 17060128 | 16818484 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 303778 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005925313236 | 12075022792762219 | 12075022792891498 | 12075005925727997 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 114 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 66496 | 0x7fc085d83a00 | 0x7fbf7a235f00 | 1692184 | 1518994 | 22484277 | 65536 | 159628097 | 211522 | 211522 | 21096257 | 20953627 | 0 | 0 | 19187726 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21591301 | 21577898 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 34297 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005925965488 | 12075022792951817 | 12075022793076776 | 12075005926386511 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 116 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 68480 | 0x7fc085d83900 | 0x7fbf7a235f40 | 3197232 | 3035665 | 45109115 | 65536 | 352114199 | 399653 | 399653 | 44002636 | 43900189 | 0 | 0 | 31776543 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 39724689 | 39559787 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 45972 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005926609114 | 12075022793125736 | 12075022793380613 | 12075005927210051 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 118 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 69696 | 0x7fc085d83800 | 0x7fbf7a235f80 | 3601856 | 3019182 | 45079167 | 65536 | 326893953 | 450231 | 450231 | 44266535 | 44220460 | 0 | 0 | 36710002 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 43452678 | 43447917 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 35908 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005927370860 | 12075022793449573 | 12075022793731650 | 12075005928023793 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 120 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 70912 | 0x7fc085d83700 | 0x7fbf7a235fc0 | 1718184 | 1537506 | 22746906 | 65536 | 160048865 | 214772 | 214772 | 21701534 | 21569595 | 0 | 0 | 20594288 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21764735 | 21733418 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 33174 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005928177659 | 12075022793795490 | 12075022793921729 | 12075005928583854 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 122 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 72384 | 0x7fc085d83600 | 0x7fbf7a236000 | 1767792 | 1606889 | 23787614 | 65536 | 171790622 | 220973 | 220973 | 11628430 | 9694434 | 0 | 0 | 8544749 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 9569383 | 8981481 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 448448 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005928790578 | 12075022793971008 | 12075022794107967 | 12075005929221489 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 124 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 73600 | 0x7fc085d83500 | 0x7fbf7a236040 | 1709056 | 1542838 | 22811538 | 65536 | 144146143 | 213631 | 213631 | 21328166 | 21096286 | 0 | 0 | 18960691 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 22066451 | 22042419 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 19902 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005929456816 | 12075022794171486 | 12075022794295965 | 12075005929863272 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 126 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 75840 | 0x7fc085d83400 | 0x7fbf7a236080 | 3355072 | 3179750 | 47275882 | 65536 | 338436837 | 419383 | 419383 | 44827693 | 44449315 | 0 | 0 | 36129223 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 39410574 | 39206823 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 64136 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005930089873 | 12075022794347005 | 12075022794605562 | 12075005930710506 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 128 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 77056 | 0x7fc085d83300 | 0x7fbf7a2360c0 | 3177072 | 3010177 | 44964841 | 65536 | 351318563 | 397133 | 397133 | 44296341 | 44271167 | 0 | 0 | 35223188 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 43281823 | 43259004 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 38786 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005930871275 | 12075022794677402 | 12075022794928919 | 12075005931475318 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 130 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 78272 | 0x7fc085d83a00 | 0x7fbf7a236100 | 1726320 | 1552675 | 23103046 | 65536 | 156769919 | 215789 | 215789 | 21615059 | 21403208 | 0 | 0 | 19638528 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21264384 | 21238350 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 25215 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005931635545 | 12075022795000119 | 12075022795124758 | 12075005932041710 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 132 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 80000 | 0x7fc085d83900 | 0x7fbf7a236140 | 1842712 | 1674673 | 24814186 | 65536 | 195411335 | 230338 | 230338 | 10482358 | 8663247 | 0 | 0 | 8025903 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 9463308 | 8856443 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 647321 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005932270966 | 12075022795174997 | 12075022795317236 | 12075005932688242 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 134 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 81216 | 0x7fc085d83800 | 0x7fbf7a236180 | 1697024 | 1536673 | 22717993 | 65536 | 149215468 | 212127 | 212127 | 21021328 | 20723555 | 0 | 0 | 20334953 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21562215 | 21536372 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 30378 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005932926014 | 12075022795376755 | 12075022795502834 | 12075005933337268 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 136 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 83456 | 0x7fc085d83700 | 0x7fbf7a2361c0 | 3316736 | 3138432 | 46638235 | 65536 | 348806679 | 414591 | 414591 | 44012782 | 43635234 | 0 | 0 | 34394433 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 38630388 | 38432939 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 200807 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005933566604 | 12075022795553234 | 12075022795811472 | 12075005934175045 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 138 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 84928 | 0x7fc085d83600 | 0x7fbf7a236200 | 3187560 | 3006125 | 44802929 | 65536 | 329550303 | 398444 | 398444 | 44095252 | 44073265 | 0 | 0 | 35845605 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 41525893 | 41466176 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 66586 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005934335092 | 12075022795879471 | 12075022796126989 | 12075005934907366 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 140 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 86400 | 0x7fc085d83500 | 0x7fbf7a236240 | 1831080 | 1645116 | 24306896 | 65536 | 143495842 | 228884 | 228884 | 21668965 | 21228297 | 0 | 0 | 20739506 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21909072 | 21877277 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 26192 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005935075027 | 12075022796199148 | 12075022796324907 | 12075005935486732 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 142 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 88128 | 0x7fc085d83400 | 0x7fbf7a236280 | 1909784 | 1751262 | 25986968 | 65536 | 208442249 | 238722 | 238722 | 8486152 | 6812868 | 0 | 0 | 4949511 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 7331175 | 6627581 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 722834 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005935714506 | 12075022796373386 | 12075022796522825 | 12075005936146408 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 144 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 89600 | 0x7fc085d83300 | 0x7fbf7a2362c0 | 1687216 | 1513646 | 22344148 | 65536 | 163316259 | 210901 | 210901 | 21517746 | 21418175 | 0 | 0 | 19600944 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21698576 | 21672447 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 38831 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005936405149 | 12075022796600584 | 12075022796725863 | 12075005936821883 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 146 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 92096 | 0x7fc085d83a00 | 0x7fbf7a236300 | 3334144 | 3151701 | 46900156 | 65536 | 312452811 | 416767 | 416767 | 39646305 | 38429265 | 0 | 0 | 34930124 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 40281918 | 40108966 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 154657 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005937053604 | 12075022796775143 | 12075022797039461 | 12075005937672825 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 148 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 93568 | 0x7fc085d83900 | 0x7fbf7a236340 | 3217000 | 3051698 | 45420850 | 65536 | 242813830 | 402124 | 402124 | 42915672 | 42495483 | 0 | 0 | 35575740 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 41787365 | 41736460 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 9946 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005937832962 | 12075022797112740 | 12075022797394337 | 12075005938442445 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 150 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 95040 | 0x7fc085d83800 | 0x7fbf7a236380 | 1791656 | 1626056 | 24000402 | 65536 | 151469364 | 223956 | 223956 | 22617680 | 22335267 | 0 | 0 | 19506440 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 22368985 | 22338690 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 42583 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005938603484 | 12075022797460417 | 12075022797586176 | 12075005939025919 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 152 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 96768 | 0x7fc085d83700 | 0x7fbf7a2363c0 | 2011952 | 1851372 | 27503031 | 65536 | 219872977 | 251493 | 251493 | 7949292 | 6325999 | 0 | 0 | 3200061 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5627471 | 4876667 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 778940 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005939248623 | 12075022797636735 | 12075022797793854 | 12075005939684543 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 154 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 98240 | 0x7fc085d83600 | 0x7fbf7a236400 | 1743832 | 1585911 | 23379680 | 65536 | 154666230 | 217978 | 217978 | 21716512 | 21440991 | 0 | 0 | 18933557 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21988458 | 21965996 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 41950 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005939917606 | 12075022797855133 | 12075022797980252 | 12075005940343859 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 156 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 100736 | 0x7fc085d83500 | 0x7fbf7a236440 | 3314304 | 3119634 | 46345970 | 65536 | 308150215 | 414287 | 414287 | 38846502 | 37457263 | 0 | 0 | 25642106 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 30900306 | 30397411 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 394034 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005940546205 | 12075022798035612 | 12075022798302009 | 12075005941164373 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 158 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 102208 | 0x7fc085d83400 | 0x7fbf7a236480 | 3207512 | 3044213 | 45342501 | 65536 | 340505625 | 400938 | 400938 | 44620667 | 44589433 | 0 | 0 | 35043595 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 43300614 | 43279118 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 11055 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005941318269 | 12075022798380888 | 12075022798626806 | 12075005941919096 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 160 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 103680 | 0x7fc085d83300 | 0x7fbf7a2364c0 | 1766680 | 1600858 | 23657160 | 65536 | 148001292 | 220834 | 220834 | 21623490 | 21217199 | 0 | 0 | 19117636 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21103844 | 21084015 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 48136 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005942085225 | 12075022798699446 | 12075022798825204 | 12075005942500376 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 162 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 105664 | 0x7fc085d83a00 | 0x7fbf7a236500 | 2095728 | 1943555 | 28891095 | 65536 | 233063384 | 261965 | 261965 | 7132011 | 5572829 | 0 | 0 | 2829009 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5901798 | 5149389 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 778334 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005942729272 | 12075022798873044 | 12075022799038003 | 12075005943167265 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 164 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 107136 | 0x7fc085d83900 | 0x7fbf7a236540 | 1779368 | 1588621 | 23518706 | 65536 | 155216706 | 222420 | 222420 | 21736172 | 21461621 | 0 | 0 | 19245016 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21935879 | 21922257 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 46319 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005943406290 | 12075022799098802 | 12075022799224401 | 12075005943827763 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 166 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 109888 | 0x7fc085d83800 | 0x7fbf7a236580 | 3238400 | 3046863 | 45314409 | 65536 | 318859622 | 404799 | 404799 | 33328889 | 31267997 | 0 | 0 | 24540254 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 30447875 | 29953043 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 558522 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005944054013 | 12075022799273360 | 12075022799537358 | 12075005944667934 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 168 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 111360 | 0x7fc085d83700 | 0x7fbf7a2365c0 | 3220120 | 3040559 | 45309764 | 65536 | 274449308 | 402514 | 402514 | 42929933 | 42567508 | 0 | 0 | 35744959 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 43458872 | 43454145 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 18988 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005944830276 | 12075022799602637 | 12075022799848875 | 12075005945440450 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 170 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 112832 | 0x7fc085d83600 | 0x7fbf7a236600 | 1799232 | 1642970 | 24193193 | 65536 | 153379680 | 224903 | 224903 | 22663124 | 22291835 | 0 | 0 | 19013943 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21942156 | 21913501 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 79707 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005945595388 | 12075022799913675 | 12075022800039594 | 12075005946022061 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 172 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 114816 | 0x7fc085d83500 | 0x7fbf7a236640 | 2208984 | 2041112 | 30328157 | 65536 | 242341776 | 276122 | 276122 | 6823931 | 5315060 | 0 | 0 | 2472477 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5327539 | 4547993 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 802870 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005946241098 | 12075022800092553 | 12075022800265512 | 12075005946700291 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 174 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 116288 | 0x7fc085d83400 | 0x7fbf7a236680 | 1668224 | 1509077 | 22301440 | 65536 | 155897023 | 208527 | 208527 | 21074212 | 20880168 | 0 | 0 | 19041948 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21734065 | 21700187 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 49001 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005946910482 | 12075022800325831 | 12075022800451430 | 12075005947321796 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 176 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 119040 | 0x7fc085d83300 | 0x7fbf7a2366c0 | 3345216 | 3137583 | 46602701 | 65536 | 337043121 | 418151 | 418151 | 31386007 | 29080169 | 0 | 0 | 15259297 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 22866011 | 22104929 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 854507 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005947547265 | 12075022800501829 | 12075022800753347 | 12075005948161186 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 178 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 120768 | 0x7fc085d83a00 | 0x7fbf7a236700 | 3260272 | 3096189 | 46045327 | 65536 | 294706248 | 407533 | 407533 | 45181075 | 45120431 | 0 | 0 | 35826654 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 39857644 | 39763160 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 4059 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005948314952 | 12075022800819907 | 12075022801065824 | 12075005948928031 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 180 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 122496 | 0x7fc085d83900 | 0x7fbf7a236740 | 1850344 | 1655325 | 24438423 | 65536 | 152187859 | 231292 | 231292 | 22330148 | 21912975 | 0 | 0 | 19478049 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 22453503 | 22418668 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 65657 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005949092877 | 12075022801138144 | 12075022801264703 | 12075005949513830 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 182 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 124736 | 0x7fc085d83800 | 0x7fbf7a236780 | 2299176 | 2144561 | 31835543 | 65536 | 258177450 | 287396 | 287396 | 6678975 | 5219096 | 0 | 0 | 2277182 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 4911455 | 4143793 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 753328 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005949743627 | 12075022801313182 | 12075022801496060 | 12075005950214372 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 184 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 126464 | 0x7fc085d83700 | 0x7fbf7a2367c0 | 1815784 | 1650611 | 24334542 | 65536 | 153980289 | 226972 | 226972 | 22152792 | 21716372 | 0 | 0 | 20411598 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 22232986 | 22191224 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 36349 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005950450350 | 12075022801557180 | 12075022801683739 | 12075005950872895 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 186 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 20 | 24 | 127936 | 0x7fc085d83600 | 0x7fbf7a236800 | 3272816 | 3117654 | 46377523 | 65536 | 378476046 | 409101 | 409101 | 22266984 | 14960947 | 0 | 0 | 2686366 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 9689001 | 6239194 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3593009 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005951097913 | 12075022801735418 | 12075022801997976 | 12075005951703840 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 188 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 129664 | 0x7fc085d83500 | 0x7fbf7a236840 | 3256240 | 3063148 | 45484578 | 65536 | 257610925 | 407029 | 407029 | 43197739 | 42907387 | 0 | 0 | 34689473 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 42327800 | 42287911 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3952 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005951863436 | 12075022802068375 | 12075022802320213 | 12075005952481465 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 190 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 131392 | 0x7fc085d83400 | 0x7fbf7a236880 | 1783168 | 1619720 | 23938233 | 65536 | 161563097 | 222895 | 222895 | 22452748 | 22083305 | 0 | 0 | 18434947 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21984132 | 21944399 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 48029 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005952641422 | 12075022802384372 | 12075022802509971 | 12075005953053788 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 192 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 133632 | 0x7fc085d83300 | 0x7fbf7a2368c0 | 2412208 | 2251293 | 33452417 | 65536 | 270721802 | 301525 | 301525 | 6572445 | 5138466 | 0 | 0 | 2238968 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5113362 | 4340108 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 826304 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005953285559 | 12075022802558131 | 12075022802748849 | 12075005953819511 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 194 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 135360 | 0x7fc085d83a00 | 0x7fbf7a236900 | 1755432 | 1599594 | 23581065 | 65536 | 156147148 | 219428 | 219428 | 21740624 | 21284493 | 0 | 0 | 19792576 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21649309 | 21593269 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 67954 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005953989137 | 12075022802808209 | 12075022802934768 | 12075005954418815 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 196 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 20 | 24 | 136832 | 0x7fc085d83900 | 0x7fbf7a236940 | 3489880 | 3322479 | 49492097 | 65536 | 402143400 | 436234 | 436234 | 22751916 | 14790963 | 0 | 0 | 1831595 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 9381016 | 5792331 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3720249 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005954641479 | 12075022802984047 | 12075022803266125 | 12075005955275407 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 198 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 138560 | 0x7fc085d83800 | 0x7fbf7a236980 | 3220656 | 3039678 | 45224728 | 65536 | 321270484 | 402581 | 402581 | 43957438 | 43894299 | 0 | 0 | 36078152 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 41577660 | 41515305 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 16830 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005955430916 | 12075022803338604 | 12075022803587242 | 12075005956035289 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 200 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 140288 | 0x7fc085d83700 | 0x7fbf7a2369c0 | 1818944 | 1645647 | 24295799 | 65536 | 156756647 | 227367 | 227367 | 22573148 | 22073949 | 0 | 0 | 19401731 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21406947 | 21359720 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 41391 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005956198953 | 12075022803660521 | 12075022803786600 | 12075005956622540 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 202 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 142784 | 0x7fc085d83600 | 0x7fbf7a236a00 | 2649792 | 2488170 | 36956001 | 65536 | 299773052 | 331223 | 331223 | 6931014 | 5393446 | 0 | 0 | 2405306 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5183667 | 4365974 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 819389 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005956849683 | 12075022803836519 | 12075022804046118 | 12075005957411176 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 204 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 144768 | 0x7fc085d83500 | 0x7fbf7a236a40 | 1816752 | 1641991 | 24166324 | 65536 | 153611561 | 227093 | 227093 | 21870611 | 21176182 | 0 | 0 | 18688741 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21791921 | 21735430 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 110355 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005957582424 | 12075022804124197 | 12075022804252996 | 12075005958011201 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 206 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 20 | 24 | 145728 | 0x7fc085d83400 | 0x7fbf7a236a80 | 3762088 | 3606954 | 53739774 | 65536 | 440257439 | 470260 | 470260 | 23402589 | 14975774 | 0 | 0 | 1732655 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 8885465 | 5201016 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3793323 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005958227964 | 12075022804303235 | 12075022804609472 | 12075005958879204 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 208 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 147712 | 0x7fc085d83300 | 0x7fbf7a236ac0 | 3250152 | 3069490 | 45633402 | 65536 | 312423124 | 406268 | 406268 | 44651615 | 44596214 | 0 | 0 | 35943550 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 39709759 | 39609964 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 14087 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005959025917 | 12075022804677152 | 12075022804925310 | 12075005959640599 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 210 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 149696 | 0x7fc085d83a00 | 0x7fbf7a236b00 | 1758832 | 1592699 | 23437726 | 65536 | 152967862 | 219853 | 219853 | 20498400 | 19771975 | 0 | 0 | 19086859 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21344337 | 21309480 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 108331 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005959794335 | 12075022804991709 | 12075022805118748 | 12075005960213243 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 212 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 152192 | 0x7fc085d83900 | 0x7fbf7a236b40 | 2849776 | 2686138 | 39917471 | 65536 | 325494963 | 356221 | 356221 | 6849151 | 5280703 | 0 | 0 | 2394629 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5103952 | 4312730 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 810376 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005960437029 | 12075022805170107 | 12075022805397145 | 12075005961023419 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 214 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 154176 | 0x7fc085d83800 | 0x7fbf7a236b80 | 1790272 | 1626863 | 23907971 | 65536 | 158379339 | 223783 | 223783 | 20574267 | 19709607 | 0 | 0 | 17843752 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 21317868 | 21248817 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 137596 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005961188225 | 12075022805480985 | 12075022805607064 | 12075005961609258 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 216 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 20 | 24 | 155648 | 0x7fc085d83700 | 0x7fbf7a236bc0 | 4050536 | 3867092 | 57625512 | 65536 | 471219196 | 506316 | 506316 | 23306076 | 14787840 | 0 | 0 | 1628221 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 9140858 | 5453242 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3804514 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005961837612 | 12075022805657463 | 12075022806015060 | 12075005962517896 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 218 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 157632 | 0x7fc085d83600 | 0x7fbf7a236c00 | 3301352 | 3090383 | 45974789 | 65536 | 275123306 | 412668 | 412668 | 42810791 | 42349666 | 0 | 0 | 36176328 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 39643260 | 39525851 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 35954 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005962680969 | 12075022806086899 | 12075022806337137 | 12075005963297816 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 220 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 159616 | 0x7fc085d83500 | 0x7fbf7a236c40 | 1814488 | 1638677 | 24241407 | 65536 | 157828416 | 226810 | 226810 | 20443727 | 19497941 | 0 | 0 | 17788717 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 20152930 | 20052592 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 142002 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005963453625 | 12075022806405936 | 12075022806533775 | 12075005963868616 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 222 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 162368 | 0x7fc085d83400 | 0x7fbf7a236c80 | 3052672 | 2896483 | 43033157 | 65536 | 351129440 | 381583 | 381583 | 6839157 | 5231346 | 0 | 0 | 2434642 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5104873 | 4311114 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 799412 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005964098884 | 12075022806582895 | 12075022806825773 | 12075005964689903 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 224 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 164608 | 0x7fc085d83300 | 0x7fbf7a236cc0 | 1785496 | 1604820 | 23711531 | 65536 | 160764841 | 223186 | 223186 | 18853431 | 17490528 | 0 | 0 | 16391219 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 19911546 | 19768951 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 191235 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005964855831 | 12075022806901612 | 12075022807030091 | 12075005965278857 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 226 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 20 | 24 | 166336 | 0x7fc085d83a00 | 0x7fbf7a236d00 | 4879664 | 4424893 | 65930357 | 65536 | 540893208 | 609957 | 609957 | 23940392 | 14990267 | 0 | 0 | 1288547 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 8991015 | 5223564 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3877045 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005965496632 | 12075022807084330 | 12075022807459527 | 12075005966238290 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 228 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 168576 | 0x7fc085d83900 | 0x7fbf7a236d40 | 3282968 | 3083952 | 45792943 | 65536 | 321161914 | 410370 | 410370 | 43697461 | 43438119 | 0 | 0 | 30916798 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 39225015 | 39086599 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 134062 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005966392647 | 12075022807541126 | 12075022807803044 | 12075005967019872 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 230 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 170816 | 0x7fc085d83800 | 0x7fbf7a236d80 | 1761728 | 1595225 | 23466139 | 65536 | 164587023 | 220215 | 220215 | 17903796 | 16387570 | 0 | 0 | 15284959 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 19270666 | 19097982 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 208063 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005967174780 | 12075022807875203 | 12075022808004642 | 12075005967587858 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 232 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 174080 | 0x7fc085d83700 | 0x7fbf7a236dc0 | 3493864 | 3305397 | 49174730 | 65536 | 400489590 | 436732 | 436732 | 6691691 | 5182732 | 0 | 0 | 2287278 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5026031 | 4247404 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 824307 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005967817274 | 12075022808063521 | 12075022808341599 | 12075005968448187 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 234 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 176576 | 0x7fc085d83600 | 0x7fbf7a236e00 | 1775216 | 1596792 | 23642619 | 65536 | 174892646 | 221901 | 221901 | 14294808 | 12348852 | 0 | 0 | 11904536 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 13908517 | 13490642 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 533512 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005968615197 | 12075022808421118 | 12075022808556477 | 12075005969030709 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 236 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 20 | 24 | 177792 | 0x7fc085d83500 | 0x7fbf7a236e40 | 5144088 | 4981360 | 74322266 | 65536 | 611618431 | 643010 | 643010 | 24001286 | 14793359 | 0 | 0 | 1520547 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 9086755 | 5301826 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3881236 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005969255076 | 12075022808605277 | 12075022809028153 | 12075005970031379 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 238 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 180288 | 0x7fc085d83400 | 0x7fbf7a236e80 | 3252352 | 3068163 | 45438484 | 65536 | 322822556 | 406543 | 406543 | 34722498 | 33643189 | 0 | 0 | 23111939 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 32770428 | 32533866 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 362666 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005970191366 | 12075022809117112 | 12075022809377430 | 12075005970800949 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 240 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 182784 | 0x7fc085d83300 | 0x7fbf7a236ec0 | 1818688 | 1633396 | 24173356 | 65536 | 160070692 | 227335 | 227335 | 12417682 | 10101197 | 0 | 0 | 10201189 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 13635056 | 13206060 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 453729 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005970961568 | 12075022809449109 | 12075022809584308 | 12075005971393630 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 242 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 186304 | 0x7fc085d83a00 | 0x7fbf7a236f00 | 3883440 | 3727694 | 55554837 | 65536 | 452200366 | 485429 | 485429 | 6603350 | 5131552 | 0 | 0 | 2384435 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5034130 | 4239269 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 838956 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005971622526 | 12075022809631027 | 12075022809944785 | 12075005972282352 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 244 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 189312 | 0x7fc085d83900 | 0x7fbf7a236f40 | 1940440 | 1778471 | 26439915 | 65536 | 214164578 | 242554 | 242554 | 7588036 | 5244646 | 0 | 0 | 1562133 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5183774 | 4342926 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 832948 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005972449042 | 12075022810028624 | 12075022810181582 | 12075005972885212 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 246 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 20 | 24 | 190784 | 0x7fc085d83800 | 0x7fbf7a236f80 | 6265768 | 6109244 | 91132642 | 65536 | 750596006 | 783220 | 783220 | 24656150 | 14854643 | 0 | 0 | 1476946 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 8935775 | 5076021 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3939068 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005973110822 | 12075022810233902 | 12075022810749257 | 12075005973975929 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 248 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 193792 | 0x7fc085d83700 | 0x7fbf7a236fc0 | 3654976 | 3468026 | 51259454 | 65536 | 380338940 | 456871 | 456871 | 25974298 | 24510900 | 0 | 0 | 16927078 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 23564073 | 23117922 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 457760 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005974139614 | 12075022810831337 | 12075022811129254 | 12075005974790393 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 250 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 196800 | 0x7fc085d83600 | 0x7fbf7a237000 | 1967920 | 1814693 | 26962853 | 65536 | 218963289 | 245989 | 245989 | 7592508 | 5277522 | 0 | 0 | 1798008 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 4752566 | 3918726 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 875217 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005974939500 | 12075022811213733 | 12075022811368772 | 12075005975382263 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 252 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 201088 | 0x7fc085d83500 | 0x7fbf7a237040 | 4743040 | 4578474 | 68268748 | 65536 | 560426029 | 592879 | 592879 | 6841679 | 5364058 | 0 | 0 | 2340373 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5041521 | 4288144 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 942721 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005975612280 | 12075022811415811 | 12075022811799968 | 12075005976342076 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 254 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 204608 | 0x7fc085d83400 | 0x7fbf7a237080 | 2214952 | 2057602 | 30616934 | 65536 | 249380136 | 276868 | 276868 | 7152683 | 4911785 | 0 | 0 | 1883049 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 4957003 | 4115251 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 859820 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005976513846 | 12075022811885087 | 12075022812061726 | 12075005976965675 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 256 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 20 | 24 | 206080 | 0x7fc085d83300 | 0x7fbf7a2370c0 | 7396736 | 7232040 | 107923760 | 65536 | 889659965 | 924591 | 924591 | 24946932 | 15146669 | 0 | 0 | 949375 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 8935866 | 5094082 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3904229 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005977195593 | 12075022812113885 | 12075022812721400 | 12075005978149175 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 258 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 209600 | 0x7fc085d83a00 | 0x7fbf7a237100 | 4000128 | 3848075 | 57218356 | 65536 | 460770582 | 500015 | 500015 | 18085120 | 16223097 | 0 | 0 | 5813215 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 12855533 | 12187239 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 663848 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005978309763 | 12075022812793719 | 12075022813119956 | 12075005978986922 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 260 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 213120 | 0x7fc085d83900 | 0x7fbf7a237140 | 2269528 | 2105373 | 31305839 | 65536 | 254684458 | 283690 | 283690 | 7619963 | 5340395 | 0 | 0 | 1556236 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5082545 | 4237113 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 811249 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005979152810 | 12075022813196435 | 12075022813374674 | 12075005979610530 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 262 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 218176 | 0x7fc085d83800 | 0x7fbf7a237180 | 5831168 | 5421207 | 80868904 | 65536 | 665189854 | 728895 | 728895 | 6924600 | 5456905 | 0 | 0 | 2313598 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5193943 | 4416374 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 952891 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005979837863 | 12075022813423633 | 12075022813878989 | 12075005980661944 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 264 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 222208 | 0x7fc085d83700 | 0x7fbf7a2371c0 | 2519128 | 2343189 | 34849336 | 65536 | 284804950 | 314890 | 314890 | 7329695 | 5021160 | 0 | 0 | 1615507 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 4897994 | 4057005 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 852488 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005980833263 | 12075022813959788 | 12075022814158827 | 12075005981383726 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 266 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 20 | 24 | 223936 | 0x7fc085d83600 | 0x7fbf7a237200 | 8505264 | 8353851 | 124675059 | 65536 | 1028316673 | 1063157 | 1063157 | 25009267 | 14925509 | 0 | 0 | 1559193 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 9009982 | 5119375 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3967547 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005981545376 | 12075022814230506 | 12075022814940420 | 12075005982597972 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 268 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 227968 | 0x7fc085d83500 | 0x7fbf7a237240 | 4545840 | 4373485 | 65146484 | 65536 | 516693374 | 568229 | 568229 | 15519707 | 13683597 | 0 | 0 | 4283885 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 12091628 | 11382802 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 684845 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005982759262 | 12075022815019299 | 12075022815391776 | 12075005983491813 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 270 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 232000 | 0x7fc085d83400 | 0x7fbf7a237280 | 2529008 | 2372839 | 35287997 | 65536 | 286350536 | 316125 | 316125 | 7227890 | 4975822 | 0 | 0 | 1608710 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5035610 | 4245089 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 847328 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005983647463 | 12075022815468895 | 12075022815670813 | 12075005984197084 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 272 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 237824 | 0x7fc085d83300 | 0x7fbf7a2372c0 | 6632552 | 6264251 | 93518938 | 65536 | 768003430 | 829068 | 829068 | 6986668 | 5513001 | 0 | 0 | 2352080 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5225670 | 4451369 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 967387 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005984353475 | 12075022815740572 | 12075022816265688 | 12075005985243268 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 274 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 242368 | 0x7fc085d83a00 | 0x7fbf7a237300 | 2808856 | 2632275 | 39070548 | 65536 | 315494889 | 351106 | 351106 | 7156634 | 4912052 | 0 | 0 | 1524674 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 4726055 | 3934995 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 853551 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005985418854 | 12075022816351287 | 12075022816574485 | 12075005985985407 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 276 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 20 | 24 | 243584 | 0x7fc085d83900 | 0x7fbf7a237340 | 9657904 | 9484900 | 141633547 | 65536 | 1169492729 | 1207237 | 1207237 | 25764684 | 15520081 | 0 | 0 | 1383355 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 8497133 | 4585577 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3966019 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005986147589 | 12075022816645844 | 12075022817442797 | 12075005987295582 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 278 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 248128 | 0x7fc085d83800 | 0x7fbf7a237380 | 5122672 | 4951541 | 73816224 | 65536 | 593278714 | 640333 | 640333 | 15659350 | 13862661 | 0 | 0 | 3939638 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 12157249 | 11440752 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 664698 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005987459867 | 12075022817516557 | 12075022817933513 | 12075005988229918 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 280 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 252672 | 0x7fc085d83700 | 0x7fbf7a2373c0 | 2817688 | 2660972 | 39609324 | 65536 | 323561989 | 352210 | 352210 | 7261642 | 4949745 | 0 | 0 | 1503769 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 4829872 | 3975136 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 844993 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005988397470 | 12075022818007272 | 12075022818232070 | 12075005988971677 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 282 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 259264 | 0x7fc085d83600 | 0x7fbf7a237400 | 7268416 | 7111130 | 106180797 | 65536 | 874994100 | 908551 | 908551 | 7453991 | 5863597 | 0 | 0 | 2297026 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5549453 | 4770473 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 895699 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005989137154 | 12075022818307749 | 12075022818904064 | 12075005990075999 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 284 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 264832 | 0x7fc085d83500 | 0x7fbf7a237440 | 3341032 | 3181358 | 47310216 | 65536 | 387267264 | 417628 | 417628 | 7550584 | 5421111 | 0 | 0 | 2096104 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5181200 | 4280807 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 1007447 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005990250864 | 12075022818983583 | 12075022819252221 | 12075005990859615 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 286 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 20 | 24 | 266304 | 0x7fc085d83400 | 0x7fbf7a237480 | 11916136 | 11734261 | 175208279 | 65536 | 1445613726 | 1489516 | 1489516 | 25248089 | 14896466 | 0 | 0 | 1159310 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 8482200 | 4567285 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3976479 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005991031725 | 12075022819330140 | 12075022820313811 | 12075005992372928 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 288 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 28 | 24 | 271872 | 0x7fc085d83300 | 0x7fbf7a2374c0 | 6206232 | 6048559 | 90234685 | 65536 | 738006319 | 775778 | 775778 | 15471027 | 13429137 | 0 | 0 | 3344601 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 11932067 | 11214699 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 654662 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005992534668 | 12075022820391091 | 12075022820921486 | 12075005993397482 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 290 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 277440 | 0x7fc085d83a00 | 0x7fbf7a237500 | 3372464 | 3210648 | 47749667 | 65536 | 390589074 | 421557 | 421557 | 7702724 | 5484587 | 0 | 0 | 2068803 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5248098 | 4381279 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 1044390 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005993544224 | 12075022820997325 | 12075022821269483 | 12075005994159167 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 292 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 12 | 24 | 279936 | 0x7fc085d83900 | 0x7fbf7a237540 | 9006064 | 8842732 | 132051232 | 65536 | 1089959540 | 1125757 | 1125757 | 18925058 | 8820726 | 0 | 0 | 770423 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 8146760 | 4377622 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3766516 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005994323973 | 12075022821342122 | 12075022822079236 | 12075005995404331 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 294 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 286528 | 0x7fc085d83800 | 0x7fbf7a237580 | 3907584 | 3745395 | 55757514 | 65536 | 456120811 | 488447 | 488447 | 7582673 | 5418575 | 0 | 0 | 1986596 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5132812 | 4283105 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 982637 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005995578795 | 12075022822163555 | 12075022822478112 | 12075005996244923 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 296 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 20 | 24 | 288000 | 0x7fc085d83700 | 0x7fbf7a2375c0 | 14146392 | 13982792 | 208768670 | 65536 | 1724103451 | 1768298 | 1768298 | 26136461 | 15141568 | 0 | 0 | 1403518 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 9033867 | 5076821 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 4024974 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005996400031 | 12075022822549151 | 12075022823719061 | 12075005997925677 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 298 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 32 | 24 | 294592 | 0x7fc085d83600 | 0x7fbf7a237600 | 7336808 | 7175372 | 107136728 | 65536 | 813806095 | 917100 | 917100 | 14617496 | 12899251 | 0 | 0 | 3934182 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 11866603 | 11109994 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 659043 | 0 | 0 | 0 | 0 | 0 | 0 | 12075005998085003 | 12075022823790900 | 12075022824403215 | 12075005999032964 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 300 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 301184 | 0x7fc085d83500 | 0x7fbf7a237640 | 3951656 | 3768290 | 56125326 | 65536 | 459755031 | 493956 | 493956 | 7406586 | 5354323 | 0 | 0 | 2100541 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5216109 | 4345305 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 1019421 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075005999194635 | 12075022824477614 | 12075022824795211 | 12075005999863077 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 302 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 12 | 24 | 304960 | 0x7fc085d83400 | 0x7fbf7a237680 | 10691648 | 10529032 | 157228147 | 65536 | 1297295641 | 1336455 | 1336455 | 18360498 | 8531485 | 0 | 0 | 551840 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 7806368 | 4162250 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3707164 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075006000037712 | 12075022824867050 | 12075022825743523 | 12075006001264822 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 304 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 24 | 24 | 313600 | 0x7fc085d83300 | 0x7fbf7a2376c0 | 5059688 | 4888064 | 72776046 | 65536 | 597636159 | 632460 | 632460 | 7267830 | 5283113 | 0 | 0 | 2012613 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 5053711 | 4216855 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 1006279 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075006001437844 | 12075022825832800 | 12075022826240637 | 12075006002199469 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 306 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 20 | 24 | 314816 | 0x7fc085d83a00 | 0x7fbf7a237700 | 18641256 | 18491185 | 276245180 | 65536 | 2284489216 | 2330156 | 2330156 | 25482668 | 14987134 | 0 | 0 | 1327731 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 9004510 | 5045691 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 4012533 | 0 | 0 | 0 | 0 | 0 | 0 | 12075006002363313 | 12075022826324955 | 12075022827871823 | 12075006004252124 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 308 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 20 | 24 | 317568 | 0x7fc085d83900 | 0x7fbf7a237740 | 9647016 | 9437199 | 140878941 | 65536 | 1162278861 | 1205876 | 1205876 | 25290652 | 14940161 | 0 | 0 | 1545370 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 8776312 | 4870023 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3960702 | 0 | 0 | 0 | 0 | 0 | 0 | 12075006004418072 | 12075022827987662 | 12075022828778376 | 12075006005559543 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 310 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 12 | 24 | 320320 | 0x7fc085d83800 | 0x7fbf7a237780 | 5116776 | 4955206 | 73919005 | 65536 | 607999134 | 639596 | 639596 | 18615186 | 8827252 | 0 | 0 | 698775 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 7995100 | 4258357 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3853036 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075006005721164 | 12075022828894855 | 12075022829307972 | 12075006006476357 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 312 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 12 | 24 | 324096 | 0x7fc085d83700 | 0x7fbf7a2377c0 | 14052208 | 13893517 | 207519166 | 65536 | 1713605869 | 1756525 | 1756525 | 19101397 | 8720359 | 0 | 0 | 719577 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 7836927 | 4182617 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3696928 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075006006638849 | 12075022829424611 | 12075022830582202 | 12075006008121564 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 314 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 12 | 24 | 328896 | 0x7fc085d83600 | 0x7fbf7a237800 | 9576872 | 9419004 | 140682739 | 65536 | 1161828410 | 1197108 | 1197108 | 18276229 | 8563562 | 0 | 0 | 686686 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 7879704 | 4229835 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3724312 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075006008294897 | 12075022830711321 | 12075022831495635 | 12075006009426980 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 316 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 20 | 24 | 330112 | 0x7fc085d83500 | 0x7fbf7a237840 | 36666600 | 36505627 | 545516366 | 65536 | 4515584965 | 4583324 | 4583324 | 26732671 | 15680623 | 0 | 0 | 1189222 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 8921344 | 4955847 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 4016478 | 0 | 0 | 0 | 0 | 0 | 0 | 12075006009592919 | 12075022831612914 | 12075022834660890 | 12075006012991695 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 318 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 20 | 24 | 332864 | 0x7fc085d83400 | 0x7fbf7a237880 | 18577904 | 18420076 | 275191475 | 65536 | 2274978871 | 2322237 | 2322237 | 25996098 | 15352813 | 0 | 0 | 878592 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 8451926 | 4542556 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3983240 | 0 | 0 | 0 | 0 | 0 | 0 | 12075006013153536 | 12075022834779449 | 12075022836320397 | 12075006015036325 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 320 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 12 | 24 | 335616 | 0x7fc085d83300 | 0x7fbf7a2378c0 | 9606464 | 9453392 | 141193708 | 65536 | 1165874168 | 1200807 | 1200807 | 18658872 | 8614354 | 0 | 0 | 558769 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 8068651 | 4278302 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3828524 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075006015203525 | 12075022836438956 | 12075022837227750 | 12075006016344155 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 322 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 12 | 24 | 339392 | 0x7fc085d83a00 | 0x7fbf7a237900 | 27535552 | 27380203 | 409119298 | 65536 | 3385564351 | 3441943 | 3441943 | 17586376 | 8069772 | 0 | 0 | 724129 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 7503100 | 4043728 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3418540 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075006016506988 | 12075022837345349 | 12075022839625971 | 12075006019136574 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 324 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 12 | 24 | 344192 | 0x7fc085d83900 | 0x7fbf7a237940 | 18573096 | 18415648 | 275105620 | 65536 | 2274095117 | 2321636 | 2321636 | 19214848 | 8729640 | 0 | 0 | 907589 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 7918932 | 4206284 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3782724 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075006019308915 | 12075022839751890 | 12075022841287078 | 12075006021187385 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 326 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 20 | 24 | 345408 | 0x7fc085d83800 | 0x7fbf7a237980 | 72680984 | 72526022 | 1083989120 | 65536 | 8976801495 | 9085122 | 9085122 | 26615228 | 15629447 | 0 | 0 | 871946 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 9020088 | 5069353 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 4008601 | 0 | 0 | 0 | 0 | 0 | 0 | 12075006021356800 | 12075022841405797 | 12075022847457750 | 12075006027749420 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 328 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 20 | 24 | 348160 | 0x7fc085d83700 | 0x7fbf7a2379c0 | 36566888 | 36410988 | 544129327 | 65536 | 4502382453 | 4570860 | 4570860 | 23987671 | 14344527 | 0 | 0 | 798106 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 0 | 8741346 | 4986700 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3883155 | 0 | 0 | 0 | 0 | 0 | 0 | 12075006027962326 | 12075022847573589 | 12075022850614685 | 12075006031373635 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 330 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 12 | 24 | 350912 | 0x7fc085d83600 | 0x7fbf7a237a00 | 18623552 | 18463206 | 275765883 | 65536 | 2280343217 | 2327943 | 2327943 | 19302720 | 8833215 | 0 | 0 | 814984 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 8036122 | 4234543 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3833288 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075006031529034 | 12075022850731324 | 12075022852270352 | 12075006033407555 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 332 | 857583 | 857588 | 4194304 | 256 | 0 | 0 | 12 | 24 | 0 | 0x7fc085d83500 | 0x7fbf7a237a40 | 54488344 | 54333224 | 812017734 | 65536 | 6724078728 | 6811042 | 6811042 | 17511835 | 8053962 | 0 | 0 | 750942 | 524288 | 0 | 0 | 524288 | 524288 | 0 | 0 | 524288 | 7618790 | 4177761 | 0 | 0 | 0 | 2097152 | 2097152 | 0 | 0 | 0 | 0 | 3588652 | 0 | 0 | 0 | 0 | 0 | 524288 | 12075006033565048 | 12075022852384751 | 12075022856909836 | 12075006038439386 |