73 KiB
73 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | SQ_CYCLES | SQ_BUSY_CYCLES | SQ_WAVES | GRBM_COUNT | GRBM_GUI_ACTIVE | TCC_CYCLE_sum | TCC_BUSY_sum | TCC_PROBE_sum | TCC_PROBE_ALL_sum | TCC_EA_ATOMIC_LEVEL_sum | TCC_NC_REQ_sum | TCC_UC_REQ_sum | TCC_CC_REQ_sum | TCC_RW_REQ_sum | TCC_REQ_sum | TCC_STREAMING_REQ_sum | TCC_HIT_sum | TCC_MISS_sum | TCC_READ_sum | TCC_WRITE_sum | TCC_ATOMIC_sum | TCC_WRITEBACK_sum | TCC_EA_WRREQ_sum | TCC_EA_WRREQ_64B_sum | TCC_EA_WR_UNCACHED_32B_sum | TCC_EA_WRREQ_STALL_sum | TCC_EA_WRREQ_IO_CREDIT_STALL_sum | TCC_EA_WRREQ_GMI_CREDIT_STALL_sum | TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum | TCC_TOO_MANY_EA_WRREQS_STALL_sum | TCC_EA_ATOMIC_sum | TCC_EA_RDREQ_sum | TCC_EA_RDREQ_32B_sum | TCC_EA_RD_UNCACHED_32B_sum | TCC_EA_RDREQ_IO_CREDIT_STALL_sum | TCC_EA_RDREQ_GMI_CREDIT_STALL_sum | TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum | TCC_TAG_STALL_sum | TCC_NORMAL_WRITEBACK_sum | TCC_ALL_TC_OP_WB_WRITEBACK_sum | TCC_NORMAL_EVICT_sum | TCC_ALL_TC_OP_INV_EVICT_sum | TCC_EA_RDREQ_DRAM_sum | TCC_EA_WRREQ_DRAM_sum | TCC_EA_RDREQ_LEVEL_sum | TCC_EA_WRREQ_LEVEL_sum | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBuffer.kd | 0 | 0 | 0 | 863873 | 863878 | 33554432 | 256 | 0 | 0 | 8 | 32 | 6464 | 0x0 | 0x7f6c94804180 | 4043984 | 3841536 | 524288 | 505497 | 505497 | 16175904 | 15326395 | 0 | 0 | 0 | 96 | 10687 | 0 | 8388944 | 8399871 | 0 | 4194727 | 4205144 | 11097 | 8388608 | 0 | 4194394 | 4194364 | 4194364 | 0 | 223527 | 0 | 0 | 215899 | 0 | 0 | 10563 | 0 | 21108 | 0 | 0 | 0 | 151892 | 4128825 | 65535 | 4063244 | 0 | 10517 | 4194368 | 13171725 | 611665146 | 12075166098232254 | 12075177938979153 | 12075177939304590 | 12075166346728601 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 863873 | 863878 | 32768 | 256 | 0 | 0 | 24 | 24 | 12480 | 0x0 | 0x7f6c94835100 | 218536 | 76907 | 512 | 27316 | 27316 | 874112 | 194008 | 0 | 0 | 0 | 48 | 220 | 0 | 8624 | 8893 | 0 | 470 | 8423 | 8891 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8422 | 0 | 440 | 0 | 0 | 0 | 49837 | 0 | 0 | 0 | 0 | 8372 | 0 | 3391711 | 0 | 12075166361193422 | 12075177954367351 | 12075177954373911 | 12075166361704922 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 4 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 12928 | 0x7f6da043e900 | 0x7f6c94835140 | 1818352 | 1657128 | 65536 | 227293 | 227293 | 7273376 | 6556470 | 0 | 0 | 0 | 48 | 3128 | 0 | 2097536 | 2101971 | 0 | 423 | 2101548 | 2102687 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2101790 | 0 | 9258 | 0 | 0 | 351401 | 404242 | 0 | 0 | 1966096 | 0 | 2099593 | 0 | 1102585342 | 0 | 12075166361821559 | 12075177954432950 | 12075177954570549 | 12075166362465015 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 6 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 36 | 24 | 13632 | 0x7f6da043e800 | 0x7f6c94835180 | 3087688 | 2934161 | 65536 | 385960 | 385960 | 12350720 | 11657365 | 0 | 0 | 0 | 48 | 2372 | 0 | 4194784 | 4197468 | 0 | 517 | 4196951 | 4197250 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4196341 | 0 | 4052 | 0 | 0 | 1062214 | 1115860 | 0 | 0 | 4063245 | 0 | 4196224 | 0 | 2270154340 | 0 | 12075166362659186 | 12075177954632149 | 12075177954877427 | 12075166363362974 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 8 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 14080 | 0x7f6da043e700 | 0x7f6c948351c0 | 3218688 | 3038462 | 65536 | 402335 | 402335 | 12874720 | 12190105 | 0 | 0 | 0 | 48 | 1439 | 0 | 4194688 | 4196955 | 0 | 423 | 4196532 | 4197097 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195866 | 0 | 3106 | 0 | 0 | 558234 | 612318 | 0 | 0 | 4063243 | 0 | 4196022 | 0 | 2372733597 | 0 | 12075166363479641 | 12075177955022866 | 12075177955266704 | 12075166364175614 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 10 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14528 | 0x7f6da043e600 | 0x7f6c94835200 | 1750896 | 1596866 | 65536 | 218861 | 218861 | 7003552 | 6290232 | 0 | 0 | 0 | 48 | 4998 | 0 | 2097584 | 2100957 | 0 | 470 | 2100487 | 2103043 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2101814 | 0 | 9304 | 0 | 0 | 238584 | 291575 | 0 | 0 | 1966102 | 0 | 2099745 | 0 | 1169734548 | 0 | 12075166364265030 | 12075177955405103 | 12075177955530542 | 12075166364776330 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 12 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14976 | 0x7f6da043e500 | 0x7f6c94835240 | 1731056 | 1567800 | 65536 | 216381 | 216381 | 6924192 | 6206694 | 0 | 0 | 0 | 48 | 2669 | 0 | 2097584 | 2102368 | 0 | 470 | 2101898 | 2102104 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100471 | 0 | 6618 | 0 | 0 | 487110 | 540203 | 0 | 0 | 1966109 | 0 | 2100181 | 0 | 1120919721 | 0 | 12075166364945104 | 12075177955572141 | 12075177955697260 | 12075166365446395 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 14 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 15424 | 0x7f6da043e400 | 0x7f6c94835280 | 1671816 | 1511915 | 65536 | 208976 | 208976 | 6687232 | 5993587 | 0 | 0 | 0 | 48 | 2690 | 0 | 2097584 | 2100824 | 0 | 470 | 2100354 | 2100024 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2101133 | 0 | 7942 | 0 | 0 | 522982 | 575061 | 0 | 0 | 1966098 | 0 | 2101712 | 0 | 920434884 | 0 | 12075166365643462 | 12075177955758700 | 12075177955883019 | 12075166366130667 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 16 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16128 | 0x7f6da043e300 | 0x7f6c948352c0 | 3154696 | 2995182 | 65536 | 394336 | 394336 | 12618752 | 11883928 | 0 | 0 | 0 | 48 | 3497 | 0 | 4194928 | 4197056 | 0 | 658 | 4196398 | 4196861 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195335 | 0 | 2034 | 0 | 0 | 1629015 | 1682420 | 0 | 0 | 4063252 | 0 | 4196673 | 0 | 2237227763 | 0 | 12075166366302326 | 12075177955923659 | 12075177956173417 | 12075166366978934 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 18 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16832 | 0x7f6da043ea00 | 0x7f6c94835300 | 3192520 | 3011692 | 65536 | 399064 | 399064 | 12770048 | 12083870 | 0 | 0 | 0 | 48 | 2350 | 0 | 4194784 | 4196640 | 0 | 517 | 4196123 | 4196146 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4196726 | 0 | 4822 | 0 | 0 | 1386192 | 1443559 | 0 | 0 | 4063245 | 0 | 4195311 | 0 | 1911266775 | 0 | 12075166367079711 | 12075177956311016 | 12075177956558054 | 12075166367760706 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 20 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 17536 | 0x7f6da043e900 | 0x7f6c94835340 | 1706432 | 1540716 | 65536 | 213303 | 213303 | 6825696 | 6103082 | 0 | 0 | 0 | 48 | 2162 | 0 | 2097632 | 2100695 | 0 | 517 | 2100178 | 2100151 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2099678 | 0 | 5030 | 0 | 0 | 539432 | 590422 | 0 | 0 | 1966106 | 0 | 2098712 | 0 | 1275215497 | 0 | 12075166367868026 | 12075177956690373 | 12075177956815172 | 12075166368366312 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 22 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18240 | 0x7f6da043e800 | 0x7f6c94835380 | 1710408 | 1559562 | 65536 | 213800 | 213800 | 6841600 | 6141141 | 0 | 0 | 0 | 48 | 4422 | 0 | 2097632 | 2102501 | 0 | 517 | 2101984 | 2100271 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100252 | 0 | 6178 | 0 | 0 | 558812 | 612498 | 0 | 0 | 1966099 | 0 | 2098899 | 0 | 1201832770 | 0 | 12075166368526259 | 12075177956855171 | 12075177956981090 | 12075166369014697 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 24 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18944 | 0x7f6da043e700 | 0x7f6c948353c0 | 1705288 | 1547945 | 65536 | 213160 | 213160 | 6821120 | 6112185 | 0 | 0 | 0 | 48 | 2022 | 0 | 2097632 | 2100354 | 0 | 517 | 2099837 | 2100993 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2099842 | 0 | 5358 | 0 | 0 | 955463 | 1006741 | 0 | 0 | 1966101 | 0 | 2098527 | 0 | 1405033861 | 0 | 12075166369198849 | 12075177957044770 | 12075177957169409 | 12075166369688018 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 26 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 19904 | 0x7f6da043e600 | 0x7f6c94835400 | 3160688 | 2997500 | 65536 | 395085 | 395085 | 12642720 | 11926529 | 0 | 0 | 0 | 48 | 824 | 0 | 4195024 | 4197597 | 0 | 752 | 4196845 | 4196196 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195148 | 0 | 1656 | 0 | 0 | 1224722 | 1278164 | 0 | 0 | 4063250 | 0 | 4196570 | 0 | 1969280769 | 0 | 12075166369861400 | 12075177957210848 | 12075177957462686 | 12075166370548537 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 28 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 20608 | 0x7f6da043e500 | 0x7f6c94835440 | 3196080 | 3031434 | 65536 | 399509 | 399509 | 12784288 | 12083248 | 0 | 0 | 0 | 48 | 1038 | 0 | 4194832 | 4197220 | 0 | 564 | 4196656 | 4197021 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4196159 | 0 | 3686 | 0 | 0 | 1402553 | 1456562 | 0 | 0 | 4063246 | 0 | 4195455 | 0 | 2112146863 | 0 | 12075166370629087 | 12075177957574046 | 12075177957824124 | 12075166371311515 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 30 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 21312 | 0x7f6da043e400 | 0x7f6c94835480 | 1687216 | 1531280 | 65536 | 210901 | 210901 | 6748832 | 6042797 | 0 | 0 | 0 | 48 | 2409 | 0 | 2097680 | 2099529 | 0 | 564 | 2098965 | 2100995 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2099664 | 0 | 5000 | 0 | 0 | 577135 | 627645 | 0 | 0 | 1966099 | 0 | 2099312 | 0 | 1213848423 | 0 | 12075166371423393 | 12075177957931163 | 12075177958056282 | 12075166371907743 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 32 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22016 | 0x7f6da043e300 | 0x7f6c948354c0 | 1704368 | 1543782 | 65536 | 213045 | 213045 | 6817440 | 6077396 | 0 | 0 | 0 | 48 | 4495 | 0 | 2097680 | 2099907 | 0 | 564 | 2099343 | 2100590 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2099027 | 0 | 3726 | 0 | 0 | 398518 | 451661 | 0 | 0 | 1966096 | 0 | 2098863 | 0 | 1224660781 | 0 | 12075166372086856 | 12075177958095801 | 12075177958221080 | 12075166372571106 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 34 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22720 | 0x7f6da043ea00 | 0x7f6c94835500 | 1702960 | 1547684 | 65536 | 212869 | 212869 | 6811808 | 6115064 | 0 | 0 | 0 | 48 | 2272 | 0 | 2097680 | 2101014 | 0 | 564 | 2100450 | 2100346 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098406 | 0 | 2484 | 0 | 0 | 735143 | 786839 | 0 | 0 | 1966102 | 0 | 2099164 | 0 | 1337648844 | 0 | 12075166372752353 | 12075177958271000 | 12075177958395959 | 12075166373240179 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 36 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 23680 | 0x7f6da043e900 | 0x7f6c94835540 | 3258632 | 3088540 | 65536 | 407328 | 407328 | 13034496 | 12295905 | 0 | 0 | 0 | 48 | 1394 | 0 | 4195120 | 4196940 | 0 | 846 | 4196094 | 4195823 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195576 | 0 | 2508 | 0 | 0 | 1437597 | 1491877 | 0 | 0 | 4063252 | 0 | 4195388 | 0 | 2081167191 | 0 | 12075166373413652 | 12075177958436439 | 12075177958685397 | 12075166374094707 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 38 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 24384 | 0x7f6da043e800 | 0x7f6c94835580 | 3208472 | 3046475 | 65536 | 401058 | 401058 | 12833856 | 12123505 | 0 | 0 | 0 | 48 | 2219 | 0 | 4194880 | 4195982 | 0 | 611 | 4195371 | 4195786 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195597 | 0 | 2560 | 0 | 0 | 757669 | 810492 | 0 | 0 | 4063247 | 0 | 4195324 | 0 | 2085660515 | 0 | 12075166374204592 | 12075177958762996 | 12075177959011794 | 12075166374878794 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 40 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25088 | 0x7f6da043e700 | 0x7f6c948355c0 | 1676360 | 1517922 | 65536 | 209544 | 209544 | 6705408 | 6006877 | 0 | 0 | 0 | 48 | 2727 | 0 | 2097728 | 2100520 | 0 | 611 | 2099909 | 2100037 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098309 | 0 | 2288 | 0 | 0 | 414375 | 468726 | 0 | 0 | 1966098 | 0 | 2098994 | 0 | 1199400305 | 0 | 12075166374985803 | 12075177959119153 | 12075177959243632 | 12075166375473099 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 42 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25792 | 0x7f6da043e600 | 0x7f6c94835600 | 1651568 | 1496886 | 65536 | 206445 | 206445 | 6606240 | 5867419 | 0 | 0 | 0 | 48 | 1809 | 0 | 2097776 | 2100276 | 0 | 658 | 2099618 | 2101592 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098028 | 0 | 1724 | 0 | 0 | 599996 | 654052 | 0 | 0 | 1966096 | 0 | 2099121 | 0 | 1293175124 | 0 | 12075166375649717 | 12075177959281712 | 12075177959407471 | 12075166376126944 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 44 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 26496 | 0x7f6da043e500 | 0x7f6c94835640 | 1721624 | 1561840 | 65536 | 215202 | 215202 | 6886464 | 6158043 | 0 | 0 | 0 | 48 | 2530 | 0 | 2097728 | 2100114 | 0 | 611 | 2099503 | 2100873 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2099689 | 0 | 5048 | 0 | 0 | 778948 | 830824 | 0 | 0 | 1966102 | 0 | 2099701 | 0 | 1136296501 | 0 | 12075166376248901 | 12075177959457711 | 12075177959582190 | 12075166376772975 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 46 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 27712 | 0x7f6da043e400 | 0x7f6c94835680 | 3217608 | 3048791 | 65536 | 402200 | 402200 | 12870400 | 12145488 | 0 | 0 | 0 | 48 | 2879 | 0 | 4195216 | 4198226 | 0 | 940 | 4197286 | 4198156 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195034 | 0 | 1420 | 0 | 0 | 1539126 | 1582387 | 0 | 0 | 4063254 | 0 | 4196718 | 0 | 2313701182 | 0 | 12075166376949643 | 12075177959623789 | 12075177959872747 | 12075166377630568 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 48 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 28416 | 0x7f6da043e300 | 0x7f6c948356c0 | 3222848 | 3033812 | 65536 | 402855 | 402855 | 12891360 | 12188004 | 0 | 0 | 0 | 48 | 2842 | 0 | 4194928 | 4196864 | 0 | 658 | 4196206 | 4196407 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4196422 | 0 | 4208 | 0 | 0 | 960562 | 1014059 | 0 | 0 | 4063256 | 0 | 4195743 | 0 | 2478128429 | 0 | 12075166377716468 | 12075177959950987 | 12075177960196585 | 12075166378402833 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 50 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 29120 | 0x7f6da043ea00 | 0x7f6c94835700 | 1683824 | 1527628 | 65536 | 210477 | 210477 | 6735264 | 6038868 | 0 | 0 | 0 | 48 | 2846 | 0 | 2097776 | 2099802 | 0 | 658 | 2099144 | 2100792 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2099813 | 0 | 5294 | 0 | 0 | 483747 | 531020 | 0 | 0 | 1966103 | 0 | 2099568 | 0 | 1210480957 | 0 | 12075166378509532 | 12075177960302504 | 12075177960426823 | 12075166378990155 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 52 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30080 | 0x7f6da043e900 | 0x7f6c94835740 | 2280752 | 1594275 | 65536 | 285093 | 285093 | 9122976 | 6280038 | 0 | 0 | 0 | 48 | 2505 | 0 | 2097824 | 2099939 | 0 | 705 | 2099234 | 2101782 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098043 | 0 | 1752 | 0 | 0 | 574466 | 627990 | 0 | 0 | 1966101 | 0 | 2100476 | 0 | 1025616436 | 0 | 12075166379141236 | 12075177960466663 | 12075177960592102 | 12075166379667313 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 54 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30784 | 0x7f6da043e800 | 0x7f6c94835780 | 1703472 | 1532234 | 65536 | 212933 | 212933 | 6813856 | 6072623 | 0 | 0 | 0 | 48 | 1731 | 0 | 2097728 | 2099982 | 0 | 611 | 2099371 | 2100972 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100331 | 0 | 6332 | 0 | 0 | 605392 | 657807 | 0 | 0 | 1966104 | 0 | 2099221 | 0 | 1244823051 | 0 | 12075166379847769 | 12075177960640741 | 12075177960765540 | 12075166380337338 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 56 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32000 | 0x7f6da043e700 | 0x7f6c948357c0 | 3217072 | 3041335 | 65536 | 402133 | 402133 | 12868256 | 12136622 | 0 | 0 | 0 | 48 | 1975 | 0 | 4195312 | 4197777 | 0 | 1034 | 4196743 | 4198381 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195696 | 0 | 2740 | 0 | 0 | 668361 | 722360 | 0 | 0 | 4063256 | 0 | 4196652 | 0 | 2285168935 | 0 | 12075166380510120 | 12075177960804100 | 12075177961053378 | 12075166381182980 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 58 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32960 | 0x7f6da043e600 | 0x7f6c94835800 | 3197168 | 3022702 | 65536 | 399645 | 399645 | 12788640 | 12066930 | 0 | 0 | 0 | 48 | 990 | 0 | 4194976 | 4196170 | 0 | 705 | 4195465 | 4196713 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195569 | 0 | 2500 | 0 | 0 | 743270 | 796769 | 0 | 0 | 4063249 | 0 | 4195469 | 0 | 2361079236 | 0 | 12075166381295739 | 12075177961160257 | 12075177961406655 | 12075166381966927 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 60 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 33920 | 0x7f6da043e500 | 0x7f6c94835840 | 1679944 | 1520864 | 65536 | 209992 | 209992 | 6719744 | 5987743 | 0 | 0 | 0 | 48 | 3009 | 0 | 2097824 | 2100787 | 0 | 705 | 2100082 | 2100719 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098770 | 0 | 3206 | 0 | 0 | 1054962 | 1107409 | 0 | 0 | 1966106 | 0 | 2098356 | 0 | 1345194221 | 0 | 12075166382081169 | 12075177961517535 | 12075177961642654 | 12075166382562514 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 62 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 34880 | 0x7f6da043e400 | 0x7f6c94835880 | 1694832 | 1529829 | 65536 | 211853 | 211853 | 6779296 | 6008977 | 0 | 0 | 0 | 48 | 3347 | 0 | 2097920 | 2102248 | 0 | 799 | 2101449 | 2102405 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2099359 | 0 | 4380 | 0 | 0 | 494087 | 546498 | 0 | 0 | 1966102 | 0 | 2098861 | 0 | 1257641387 | 0 | 12075166382731067 | 12075177961682173 | 12075177961808572 | 12075166383219034 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 64 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 35840 | 0x7f6da043e300 | 0x7f6c948358c0 | 1720560 | 1562981 | 65536 | 215069 | 215069 | 6882208 | 6161502 | 0 | 0 | 0 | 48 | 2785 | 0 | 2097824 | 2100871 | 0 | 705 | 2100166 | 2099422 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2099041 | 0 | 3748 | 0 | 0 | 472698 | 524973 | 0 | 0 | 1966097 | 0 | 2098099 | 0 | 1390545351 | 0 | 12075166383404228 | 12075177961859772 | 12075177961984891 | 12075166383890361 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 66 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 37312 | 0x7f6da043ea00 | 0x7f6c94835900 | 3224456 | 3050783 | 65536 | 403056 | 403056 | 12897792 | 12178291 | 0 | 0 | 0 | 48 | 1220 | 0 | 4195408 | 4198526 | 0 | 1128 | 4197398 | 4198042 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4196679 | 0 | 4702 | 0 | 0 | 1033511 | 1086814 | 0 | 0 | 4063258 | 0 | 4195622 | 0 | 2584468493 | 0 | 12075166384064475 | 12075177962036090 | 12075177962288888 | 12075166384743807 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 68 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 38272 | 0x7f6da043e900 | 0x7f6c94835940 | 3160496 | 2995965 | 65536 | 395061 | 395061 | 12641952 | 11947640 | 0 | 0 | 0 | 48 | 1631 | 0 | 4195024 | 4198064 | 0 | 752 | 4197312 | 4196124 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195649 | 0 | 2658 | 0 | 0 | 547517 | 601333 | 0 | 0 | 4063246 | 0 | 4196428 | 0 | 2154389429 | 0 | 12075166384851868 | 12075177962410168 | 12075177962653526 | 12075166385529648 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 70 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 39232 | 0x7f6da043e800 | 0x7f6c94835980 | 1751152 | 1595046 | 65536 | 218893 | 218893 | 7004576 | 6298145 | 0 | 0 | 0 | 48 | 1968 | 0 | 2097872 | 2102338 | 0 | 752 | 2101586 | 2101817 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2101128 | 0 | 7920 | 0 | 0 | 606408 | 658892 | 0 | 0 | 1966114 | 0 | 2099742 | 0 | 1185027903 | 0 | 12075166385632950 | 12075177962716565 | 12075177962841364 | 12075166386117971 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 72 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 40192 | 0x7f6da043e700 | 0x7f6c948359c0 | 1735112 | 1568417 | 65536 | 216888 | 216888 | 6940416 | 6124281 | 0 | 0 | 0 | 48 | 3759 | 0 | 2097968 | 2100915 | 0 | 846 | 2100069 | 2100696 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100607 | 0 | 6874 | 0 | 0 | 435243 | 488616 | 0 | 0 | 1966105 | 0 | 2100141 | 0 | 1061690722 | 0 | 12075166386296032 | 12075177962883444 | 12075177963009203 | 12075166386777206 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 74 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 41152 | 0x7f6da043e600 | 0x7f6c94835a00 | 1696896 | 1542896 | 65536 | 212111 | 212111 | 6787552 | 6088464 | 0 | 0 | 0 | 48 | 3867 | 0 | 2097872 | 2101457 | 0 | 752 | 2100705 | 2100777 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098414 | 0 | 2492 | 0 | 0 | 562797 | 612936 | 0 | 0 | 1966110 | 0 | 2099861 | 0 | 1100382410 | 0 | 12075166386935761 | 12075177963063762 | 12075177963188401 | 12075166387420351 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 76 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 42624 | 0x7f6da043e500 | 0x7f6c94835a40 | 3200048 | 3035291 | 65536 | 400005 | 400005 | 12800160 | 12053235 | 0 | 0 | 0 | 48 | 3380 | 0 | 4195504 | 4198429 | 0 | 1222 | 4197207 | 4198264 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4196048 | 0 | 3436 | 0 | 0 | 1331527 | 1384668 | 0 | 0 | 4063260 | 0 | 4195648 | 0 | 2032647754 | 0 | 12075166387592842 | 12075177963229681 | 12075177963474639 | 12075166388265101 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 78 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 43584 | 0x7f6da043e400 | 0x7f6c94835a80 | 3085808 | 2911449 | 65536 | 385725 | 385725 | 12343200 | 11644047 | 0 | 0 | 0 | 48 | 3191 | 0 | 4195072 | 4197168 | 0 | 799 | 4196369 | 4197833 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195474 | 0 | 2306 | 0 | 0 | 785572 | 836260 | 0 | 0 | 4063251 | 0 | 4195417 | 0 | 2523912986 | 0 | 12075166388366810 | 12075177963562958 | 12075177963808557 | 12075166389030774 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 80 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 44544 | 0x7f6da043e300 | 0x7f6c94835ac0 | 1688240 | 1525781 | 65536 | 211029 | 211029 | 6752928 | 6008904 | 0 | 0 | 0 | 48 | 960 | 0 | 2097920 | 2099114 | 0 | 799 | 2098315 | 2100676 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2099926 | 0 | 5514 | 0 | 0 | 718693 | 770261 | 0 | 0 | 1966102 | 0 | 2099015 | 0 | 1285082894 | 0 | 12075166389143243 | 12075177963891756 | 12075177964016715 | 12075166389622013 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 82 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 45760 | 0x7f6da043ea00 | 0x7f6c94835b00 | 1783472 | 1621120 | 65536 | 222933 | 222933 | 7133856 | 6346914 | 0 | 0 | 0 | 48 | 3440 | 0 | 2098064 | 2103089 | 0 | 940 | 2102149 | 2103136 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100933 | 0 | 7522 | 0 | 0 | 383460 | 436745 | 0 | 0 | 1966108 | 0 | 2099800 | 0 | 1134722018 | 0 | 12075166389796046 | 12075177964056715 | 12075177964182634 | 12075166390291707 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 84 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 46720 | 0x7f6da043e900 | 0x7f6c94835b40 | 1731656 | 1570441 | 65536 | 216456 | 216456 | 6926592 | 6185266 | 0 | 0 | 0 | 48 | 3021 | 0 | 2097920 | 2100572 | 0 | 799 | 2099773 | 2100091 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100613 | 0 | 6888 | 0 | 0 | 648992 | 701501 | 0 | 0 | 1966100 | 0 | 2099425 | 0 | 1167685898 | 0 | 12075166390477332 | 12075177964231913 | 12075177964356552 | 12075166390957565 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 86 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 48448 | 0x7f6da043e800 | 0x7f6c94835b80 | 3282632 | 3127587 | 65536 | 410328 | 410328 | 13130496 | 12419554 | 0 | 0 | 0 | 48 | 1908 | 0 | 4195600 | 4197838 | 0 | 1316 | 4196522 | 4198043 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4196728 | 0 | 4792 | 0 | 0 | 802918 | 857147 | 0 | 0 | 4063262 | 0 | 4198068 | 0 | 2197067910 | 0 | 12075166391137299 | 12075177964396232 | 12075177964654470 | 12075166391817523 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 88 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 49408 | 0x7f6da043e700 | 0x7f6c94835bc0 | 3198344 | 3020419 | 65536 | 399792 | 399792 | 12793344 | 12077952 | 0 | 0 | 0 | 48 | 2336 | 0 | 4195120 | 4197964 | 0 | 846 | 4197118 | 4197992 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195876 | 0 | 3108 | 0 | 0 | 431906 | 485834 | 0 | 0 | 4063252 | 0 | 4195510 | 0 | 2408854526 | 0 | 12075166391922798 | 12075177964745669 | 12075177964990947 | 12075166392595739 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 90 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 50368 | 0x7f6da043e600 | 0x7f6c94835c00 | 1710704 | 1555068 | 65536 | 213837 | 213837 | 6842784 | 6097756 | 0 | 0 | 0 | 48 | 1288 | 0 | 2097968 | 2100720 | 0 | 846 | 2099874 | 2099278 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2099240 | 0 | 4140 | 0 | 0 | 686439 | 738367 | 0 | 0 | 1966111 | 0 | 2098340 | 0 | 1331214544 | 0 | 12075166392701926 | 12075177965080546 | 12075177965205026 | 12075166393188571 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 92 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 51584 | 0x7f6da043e500 | 0x7f6c94835c40 | 1730376 | 1562928 | 65536 | 216296 | 216296 | 6921472 | 6095228 | 0 | 0 | 0 | 48 | 2913 | 0 | 2098112 | 2102958 | 0 | 987 | 2101971 | 2102894 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100088 | 0 | 5830 | 0 | 0 | 410063 | 463979 | 0 | 0 | 1966109 | 0 | 2099116 | 0 | 1183104654 | 0 | 12075166393328531 | 12075177965244545 | 12075177965371264 | 12075166393823410 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 94 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 52544 | 0x7f6da043e400 | 0x7f6c94835c80 | 1687920 | 1533811 | 65536 | 210989 | 210989 | 6751648 | 6043914 | 0 | 0 | 0 | 48 | 3218 | 0 | 2097968 | 2100831 | 0 | 846 | 2099985 | 2100916 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098722 | 0 | 3104 | 0 | 0 | 597073 | 648715 | 0 | 0 | 1966103 | 0 | 2098727 | 0 | 1263445222 | 0 | 12075166393977286 | 12075177965421344 | 12075177965546463 | 12075166394463550 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 96 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 54272 | 0x7f6da043e300 | 0x7f6c94835cc0 | 3184712 | 3014108 | 65536 | 398088 | 398088 | 12738816 | 11970435 | 0 | 0 | 0 | 48 | 2053 | 0 | 4195696 | 4197885 | 0 | 1410 | 4196475 | 4197961 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4197515 | 0 | 6362 | 0 | 0 | 600017 | 654287 | 0 | 0 | 4063264 | 0 | 4197387 | 0 | 2169093228 | 0 | 12075166394633716 | 12075177965586143 | 12075177965836381 | 12075166395308080 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 98 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 55488 | 0x7f6da043ea00 | 0x7f6c94835d00 | 3136472 | 2974756 | 65536 | 392058 | 392058 | 12545856 | 11859044 | 0 | 0 | 0 | 48 | 2066 | 0 | 4195168 | 4197309 | 0 | 893 | 4196416 | 4196397 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4197454 | 0 | 6262 | 0 | 0 | 1311115 | 1364021 | 0 | 0 | 4063253 | 0 | 4195773 | 0 | 2393449906 | 0 | 12075166395416441 | 12075177965891900 | 12075177966135738 | 12075166396084172 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 100 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 56704 | 0x7f6da043e900 | 0x7f6c94835d40 | 1670768 | 1510662 | 65536 | 208845 | 208845 | 6683040 | 5943349 | 0 | 0 | 0 | 48 | 3256 | 0 | 2098016 | 2101192 | 0 | 893 | 2100299 | 2101370 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098254 | 0 | 2166 | 0 | 0 | 445692 | 496492 | 0 | 0 | 1966112 | 0 | 2099479 | 0 | 1143016270 | 0 | 12075166396192102 | 12075177966201018 | 12075177966325817 | 12075166396676002 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 102 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 58176 | 0x7f6da043e800 | 0x7f6c94835d80 | 1719280 | 1558458 | 65536 | 214909 | 214909 | 6877088 | 6073128 | 0 | 0 | 0 | 48 | 3983 | 0 | 2098208 | 2103451 | 0 | 1081 | 2102370 | 2102310 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2101684 | 0 | 9018 | 0 | 0 | 705417 | 758024 | 0 | 0 | 1966109 | 0 | 2100560 | 0 | 1017930555 | 0 | 12075166396786076 | 12075177966368216 | 12075177966498295 | 12075166397325899 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 104 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 59392 | 0x7f6da043e700 | 0x7f6c94835dc0 | 1682160 | 1520840 | 65536 | 210269 | 210269 | 6728608 | 5990884 | 0 | 0 | 0 | 48 | 2822 | 0 | 2098016 | 2101429 | 0 | 893 | 2100536 | 2100621 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098919 | 0 | 3496 | 0 | 0 | 681453 | 732768 | 0 | 0 | 1966105 | 0 | 2099812 | 0 | 1209756848 | 0 | 12075166397512686 | 12075177966548855 | 12075177966674454 | 12075166397994191 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 106 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 61376 | 0x7f6da043e600 | 0x7f6c94835e00 | 3256496 | 3097198 | 65536 | 407061 | 407061 | 13025952 | 12270035 | 0 | 0 | 0 | 48 | 3869 | 0 | 4195792 | 4197563 | 0 | 1504 | 4196059 | 4199164 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4196605 | 0 | 4538 | 0 | 0 | 1111909 | 1166321 | 0 | 0 | 4063266 | 0 | 4195170 | 0 | 1965743945 | 0 | 12075166398170489 | 12075177966714934 | 12075177966975252 | 12075166398847537 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 108 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 62592 | 0x7f6da043e500 | 0x7f6c94835e40 | 3181552 | 3007180 | 65536 | 397693 | 397693 | 12726176 | 12019796 | 0 | 0 | 0 | 48 | 2005 | 0 | 4195216 | 4196441 | 0 | 940 | 4195501 | 4197393 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195376 | 0 | 2104 | 0 | 0 | 1050893 | 1104376 | 0 | 0 | 4063251 | 0 | 4196220 | 0 | 2525999369 | 0 | 12075166398953935 | 12075177967042451 | 12075177967288849 | 12075166399629360 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 110 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 63808 | 0x7f6da043e400 | 0x7f6c94835e80 | 1689544 | 1533726 | 65536 | 211192 | 211192 | 6758144 | 6033296 | 0 | 0 | 0 | 48 | 2148 | 0 | 2098064 | 2101215 | 0 | 940 | 2100275 | 2101326 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100523 | 0 | 6702 | 0 | 0 | 508803 | 560271 | 0 | 0 | 1966106 | 0 | 2099187 | 0 | 1260966959 | 0 | 12075166399735187 | 12075177967352369 | 12075177967477168 | 12075166400218815 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 112 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 65280 | 0x7f6da043e300 | 0x7f6c94835ec0 | 1729608 | 1565688 | 65536 | 216200 | 216200 | 6918400 | 6091472 | 0 | 0 | 0 | 48 | 2487 | 0 | 2098256 | 2102082 | 0 | 1128 | 2100954 | 2100970 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100907 | 0 | 7462 | 0 | 0 | 468041 | 524671 | 0 | 0 | 1966109 | 0 | 2101386 | 0 | 913961846 | 0 | 12075166400361350 | 12075177967518447 | 12075177967648846 | 12075166400852764 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 114 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 66496 | 0x7f6da043ea00 | 0x7f6c94835f00 | 1699952 | 1538097 | 65536 | 212493 | 212493 | 6799776 | 6060115 | 0 | 0 | 0 | 48 | 1929 | 0 | 2098064 | 2100160 | 0 | 940 | 2099220 | 2101446 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098713 | 0 | 3082 | 0 | 0 | 576589 | 628748 | 0 | 0 | 1966102 | 0 | 2100103 | 0 | 1106569696 | 0 | 12075166401014334 | 12075177967699886 | 12075177967825325 | 12075166401494216 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 116 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 68480 | 0x7f6da043e900 | 0x7f6c94835f40 | 3198768 | 3031927 | 65536 | 399845 | 399845 | 12795040 | 12020777 | 0 | 0 | 0 | 48 | 1447 | 0 | 4195888 | 4199304 | 0 | 1598 | 4197706 | 4197779 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4196496 | 0 | 4316 | 0 | 0 | 584995 | 636616 | 0 | 0 | 4063268 | 0 | 4195429 | 0 | 2205382636 | 0 | 12075166401668259 | 12075177967865485 | 12075177968117323 | 12075166402341741 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 118 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 69696 | 0x7f6da043e800 | 0x7f6c94835f80 | 3233496 | 3069193 | 65536 | 404186 | 404186 | 12933952 | 12237116 | 0 | 0 | 0 | 48 | 1909 | 0 | 4195264 | 4197370 | 0 | 987 | 4196383 | 4197732 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4197138 | 0 | 5626 | 0 | 0 | 1192185 | 1245566 | 0 | 0 | 4063255 | 0 | 4196961 | 0 | 2280004173 | 0 | 12075166402448359 | 12075177968180202 | 12075177968427880 | 12075166403126339 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 120 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 70912 | 0x7f6da043e700 | 0x7f6c94835fc0 | 1668488 | 1508320 | 65536 | 208560 | 208560 | 6673920 | 5940030 | 0 | 0 | 0 | 48 | 2443 | 0 | 2098112 | 2100700 | 0 | 987 | 2099713 | 2102053 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098707 | 0 | 3068 | 0 | 0 | 346358 | 398628 | 0 | 0 | 1966103 | 0 | 2100588 | 0 | 1007124541 | 0 | 12075166403233959 | 12075177968485160 | 12075177968610919 | 12075166403710424 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 122 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 72384 | 0x7f6da043e600 | 0x7f6c94836000 | 1796568 | 1639766 | 65536 | 224570 | 224570 | 7186240 | 6368847 | 0 | 0 | 0 | 48 | 1826 | 0 | 2098352 | 2100140 | 0 | 1222 | 2098918 | 2099355 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100982 | 0 | 7608 | 0 | 0 | 394978 | 448313 | 0 | 0 | 1966108 | 0 | 2098462 | 0 | 1040479242 | 0 | 12075166403880701 | 12075177968651718 | 12075177968788677 | 12075166404378335 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 124 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 73600 | 0x7f6da043e500 | 0x7f6c94836040 | 1727944 | 1560084 | 65536 | 215992 | 215992 | 6911744 | 6141281 | 0 | 0 | 0 | 48 | 3185 | 0 | 2098112 | 2102151 | 0 | 987 | 2101164 | 2100212 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2099238 | 0 | 4130 | 0 | 0 | 561498 | 613436 | 0 | 0 | 1966106 | 0 | 2099133 | 0 | 1113953356 | 0 | 12075166404560965 | 12075177968839237 | 12075177968964196 | 12075166405040516 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 126 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 75840 | 0x7f6da043e400 | 0x7f6c94836080 | 3256624 | 3097863 | 65536 | 407077 | 407077 | 13026464 | 12294143 | 0 | 0 | 0 | 48 | 3478 | 0 | 4195984 | 4198267 | 0 | 1692 | 4196575 | 4199477 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4196620 | 0 | 4560 | 0 | 0 | 1037256 | 1090562 | 0 | 0 | 4063270 | 0 | 4196133 | 0 | 1946674739 | 0 | 12075166405212846 | 12075177969004036 | 12075177969256354 | 12075166405893731 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 128 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 77056 | 0x7f6da043e300 | 0x7f6c948360c0 | 3085232 | 2899783 | 65536 | 385653 | 385653 | 12340896 | 11623796 | 0 | 0 | 0 | 48 | 1764 | 0 | 4195312 | 4197383 | 0 | 1034 | 4196349 | 4197578 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195428 | 0 | 2204 | 0 | 0 | 899961 | 951853 | 0 | 0 | 4063256 | 0 | 4195602 | 0 | 2243867105 | 0 | 12075166405998847 | 12075177969315873 | 12075177969564511 | 12075166406672268 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 130 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 78272 | 0x7f6da043ea00 | 0x7f6c94836100 | 1739952 | 1582177 | 65536 | 217493 | 217493 | 6959776 | 6223341 | 0 | 0 | 0 | 48 | 3326 | 0 | 2098160 | 2101410 | 0 | 1034 | 2100376 | 2102364 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098675 | 0 | 3002 | 0 | 0 | 697980 | 750076 | 0 | 0 | 1966112 | 0 | 2099792 | 0 | 1121045338 | 0 | 12075166406779157 | 12075177969617791 | 12075177969743230 | 12075166407270209 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 132 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 80000 | 0x7f6da043e900 | 0x7f6c94836140 | 1842224 | 1675132 | 65536 | 230277 | 230277 | 7368864 | 6549337 | 0 | 0 | 0 | 48 | 1206 | 0 | 2098400 | 2099750 | 0 | 1269 | 2098481 | 2099602 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098347 | 0 | 2336 | 0 | 0 | 695023 | 747214 | 0 | 0 | 1966109 | 0 | 2098445 | 0 | 1123639073 | 0 | 12075166407416531 | 12075177969784509 | 12075177969926428 | 12075166407918594 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 134 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 81216 | 0x7f6da043e800 | 0x7f6c94836180 | 1741000 | 1580501 | 65536 | 217624 | 217624 | 6963968 | 6223204 | 0 | 0 | 0 | 48 | 3135 | 0 | 2098160 | 2100818 | 0 | 1034 | 2099784 | 2101946 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100709 | 0 | 7070 | 0 | 0 | 650222 | 702609 | 0 | 0 | 1966109 | 0 | 2100391 | 0 | 1070657561 | 0 | 12075166408080595 | 12075177969974748 | 12075177970099707 | 12075166408566488 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 136 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 83456 | 0x7f6da043e700 | 0x7f6c948361c0 | 3282176 | 3119481 | 65536 | 410271 | 410271 | 13128672 | 12353087 | 0 | 0 | 0 | 48 | 3688 | 0 | 4196080 | 4201066 | 0 | 1786 | 4199280 | 4198478 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4196220 | 0 | 3756 | 0 | 0 | 754196 | 807928 | 0 | 0 | 4063272 | 0 | 4195435 | 0 | 2093039670 | 0 | 12075166408738127 | 12075177970140507 | 12075177970396345 | 12075166409423862 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 138 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 84928 | 0x7f6da043e600 | 0x7f6c94836200 | 3191896 | 3024803 | 65536 | 398986 | 398986 | 12767552 | 12012177 | 0 | 0 | 0 | 48 | 3128 | 0 | 4195360 | 4198075 | 0 | 1081 | 4196994 | 4197366 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4196193 | 0 | 3732 | 0 | 0 | 781980 | 835353 | 0 | 0 | 4063257 | 0 | 4195494 | 0 | 2457279004 | 0 | 12075166409525350 | 12075177970456504 | 12075177970700342 | 12075166410205414 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 140 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 86400 | 0x7f6da043e500 | 0x7f6c94836240 | 1704688 | 1542070 | 65536 | 213085 | 213085 | 6818720 | 6071138 | 0 | 0 | 0 | 48 | 4227 | 0 | 2098208 | 2102495 | 0 | 1081 | 2101414 | 2101968 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100043 | 0 | 5736 | 0 | 0 | 496109 | 547673 | 0 | 0 | 1966105 | 0 | 2098329 | 0 | 1317932491 | 0 | 12075166410282698 | 12075177970757942 | 12075177970883061 | 12075166410774963 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 142 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 88128 | 0x7f6da043e400 | 0x7f6c94836280 | 1915824 | 1752112 | 65536 | 239477 | 239477 | 7663264 | 6845352 | 0 | 0 | 0 | 48 | 880 | 0 | 2098496 | 2099202 | 0 | 1363 | 2097839 | 2099271 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098090 | 0 | 1818 | 0 | 0 | 331000 | 383526 | 0 | 0 | 1966111 | 0 | 2098220 | 0 | 1011637135 | 0 | 12075166410946622 | 12075177970922261 | 12075177971072019 | 12075166411505380 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 144 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 89600 | 0x7f6da043e300 | 0x7f6c948362c0 | 1703600 | 1541501 | 65536 | 212949 | 212949 | 6814368 | 6036376 | 0 | 0 | 0 | 48 | 4774 | 0 | 2098208 | 2101447 | 0 | 1081 | 2100366 | 2101011 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098348 | 0 | 2346 | 0 | 0 | 452112 | 503686 | 0 | 0 | 1966107 | 0 | 2098613 | 0 | 1273030201 | 0 | 12075166411708017 | 12075177971140979 | 12075177971266898 | 12075166412200602 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 146 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 92096 | 0x7f6da043ea00 | 0x7f6c94836300 | 3234312 | 3025669 | 65536 | 404288 | 404288 | 12937216 | 12116691 | 0 | 0 | 0 | 48 | 2402 | 0 | 4196176 | 4200292 | 0 | 1880 | 4198412 | 4199456 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4196329 | 0 | 3970 | 0 | 0 | 484733 | 538774 | 0 | 0 | 4063274 | 0 | 4196797 | 0 | 2236855822 | 0 | 12075166412376228 | 12075177971306577 | 12075177971559696 | 12075166413059749 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 148 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 93568 | 0x7f6da043e900 | 0x7f6c94836340 | 3128752 | 2968739 | 65536 | 391093 | 391093 | 12514976 | 11776736 | 0 | 0 | 0 | 48 | 1927 | 0 | 4195408 | 4197300 | 0 | 1128 | 4196172 | 4196449 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195376 | 0 | 2096 | 0 | 0 | 417600 | 471697 | 0 | 0 | 4063258 | 0 | 4197134 | 0 | 2150191709 | 0 | 12075166413174903 | 12075177971624175 | 12075177971873933 | 12075166413842573 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 150 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 95040 | 0x7f6da043e800 | 0x7f6c94836380 | 1675184 | 1518543 | 65536 | 209397 | 209397 | 6700704 | 5961776 | 0 | 0 | 0 | 48 | 4186 | 0 | 2098256 | 2102682 | 0 | 1128 | 2101554 | 2102096 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100634 | 0 | 6916 | 0 | 0 | 759296 | 810998 | 0 | 0 | 1966106 | 0 | 2098489 | 0 | 1299867837 | 0 | 12075166413943321 | 12075177971934413 | 12075177972060172 | 12075166414427530 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 152 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 96768 | 0x7f6da043e700 | 0x7f6c948363c0 | 1988168 | 1833672 | 65536 | 248520 | 248520 | 7952640 | 7185221 | 0 | 0 | 0 | 48 | 617 | 0 | 2098544 | 2099271 | 0 | 1410 | 2097861 | 2099300 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097795 | 0 | 1226 | 0 | 0 | 172399 | 225769 | 0 | 0 | 1966112 | 0 | 2097744 | 0 | 884392165 | 0 | 12075166414599690 | 12075177972100811 | 12075177972258090 | 12075166415105651 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 154 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 98240 | 0x7f6da043e600 | 0x7f6c94836400 | 1716464 | 1553473 | 65536 | 214557 | 214557 | 6865824 | 6102528 | 0 | 0 | 0 | 48 | 4801 | 0 | 2098256 | 2102618 | 0 | 1128 | 2101490 | 2100484 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098461 | 0 | 2570 | 0 | 0 | 326006 | 377121 | 0 | 0 | 1966106 | 0 | 2099471 | 0 | 1178467239 | 0 | 12075166415286958 | 12075177972308010 | 12075177972433609 | 12075166415769043 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 156 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 100736 | 0x7f6da043e500 | 0x7f6c94836440 | 3252720 | 3042397 | 65536 | 406589 | 406589 | 13010848 | 12142868 | 0 | 0 | 0 | 48 | 2423 | 0 | 4196272 | 4199166 | 0 | 1974 | 4197192 | 4199725 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4196479 | 0 | 4266 | 0 | 0 | 692397 | 746045 | 0 | 0 | 4063276 | 0 | 4196780 | 0 | 2372126181 | 0 | 12075166415941494 | 12075177972474408 | 12075177972732326 | 12075166416622980 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 158 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 102208 | 0x7f6da043e400 | 0x7f6c94836480 | 3097584 | 2930821 | 65536 | 387197 | 387197 | 12390304 | 11671217 | 0 | 0 | 0 | 48 | 1709 | 0 | 4195456 | 4197860 | 0 | 1175 | 4196685 | 4197639 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195661 | 0 | 2664 | 0 | 0 | 643913 | 697956 | 0 | 0 | 4063259 | 0 | 4195288 | 0 | 2397690958 | 0 | 12075166416724088 | 12075177972794246 | 12075177973039844 | 12075166417394254 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 160 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 103680 | 0x7f6da043e300 | 0x7f6c948364c0 | 1702152 | 1537911 | 65536 | 212768 | 212768 | 6808576 | 6047920 | 0 | 0 | 0 | 48 | 1631 | 0 | 2098304 | 2102971 | 0 | 1175 | 2101796 | 2102491 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100097 | 0 | 5840 | 0 | 0 | 979635 | 1029485 | 0 | 0 | 1966107 | 0 | 2100974 | 0 | 971778172 | 0 | 12075166417503577 | 12075177973103043 | 12075177973228482 | 12075166417983819 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 162 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 105664 | 0x7f6da043ea00 | 0x7f6c94836500 | 2102464 | 1946162 | 65536 | 262807 | 262807 | 8409824 | 7531893 | 0 | 0 | 0 | 48 | 731 | 0 | 2098640 | 2099223 | 0 | 1504 | 2097719 | 2099194 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097852 | 0 | 1336 | 0 | 0 | 142449 | 196501 | 0 | 0 | 1966114 | 0 | 2097984 | 0 | 762166006 | 0 | 12075166418155989 | 12075177973269602 | 12075177973434401 | 12075166418671698 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 164 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 107136 | 0x7f6da043e900 | 0x7f6c94836540 | 1724312 | 1563937 | 65536 | 215538 | 215538 | 6897216 | 6143171 | 0 | 0 | 0 | 48 | 2823 | 0 | 2098304 | 2100680 | 0 | 1175 | 2099505 | 2102383 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2099023 | 0 | 3692 | 0 | 0 | 645024 | 700716 | 0 | 0 | 1966120 | 0 | 2098980 | 0 | 1181579440 | 0 | 12075166418853776 | 12075177973484480 | 12075177973609599 | 12075166419339389 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 166 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 109888 | 0x7f6da043e800 | 0x7f6c94836580 | 3268296 | 3086737 | 65536 | 408536 | 408536 | 13073152 | 12264168 | 0 | 0 | 0 | 48 | 2910 | 0 | 4196368 | 4199753 | 0 | 2068 | 4197685 | 4199145 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4196281 | 0 | 3866 | 0 | 0 | 970695 | 1024582 | 0 | 0 | 4063278 | 0 | 4197835 | 0 | 2232996842 | 0 | 12075166419512921 | 12075177973649759 | 12075177973903677 | 12075166420191272 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 168 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 111360 | 0x7f6da043e700 | 0x7f6c948365c0 | 3154736 | 2983480 | 65536 | 394341 | 394341 | 12618912 | 11873331 | 0 | 0 | 0 | 48 | 3084 | 0 | 4195504 | 4197332 | 0 | 1222 | 4196110 | 4197374 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195577 | 0 | 2494 | 0 | 0 | 742931 | 796255 | 0 | 0 | 4063260 | 0 | 4195844 | 0 | 2149101579 | 0 | 12075166420295135 | 12075177973958397 | 12075177974205275 | 12075166420938961 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 170 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 112832 | 0x7f6da043e600 | 0x7f6c94836600 | 1765448 | 1597220 | 65536 | 220680 | 220680 | 7061760 | 6271713 | 0 | 0 | 0 | 48 | 4152 | 0 | 2098352 | 2102559 | 0 | 1222 | 2101337 | 2103557 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100216 | 0 | 6076 | 0 | 0 | 626908 | 679258 | 0 | 0 | 1966108 | 0 | 2099716 | 0 | 1129483731 | 0 | 12075166421043726 | 12075177974267034 | 12075177974394553 | 12075166421555246 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 172 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 114816 | 0x7f6da043e500 | 0x7f6c94836640 | 2215920 | 2058483 | 65536 | 276989 | 276989 | 8863648 | 7963188 | 0 | 0 | 0 | 48 | 502 | 0 | 2098688 | 2099314 | 0 | 1551 | 2097763 | 2099286 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097711 | 0 | 1052 | 0 | 0 | 80569 | 137176 | 0 | 0 | 1966115 | 0 | 2097639 | 0 | 671237805 | 0 | 12075166421733097 | 12075177974434873 | 12075177974607192 | 12075166422264244 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 174 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 116288 | 0x7f6da043e400 | 0x7f6c94836680 | 1732528 | 1577113 | 65536 | 216565 | 216565 | 6930080 | 6181125 | 0 | 0 | 0 | 48 | 2291 | 0 | 2098352 | 2103271 | 0 | 1222 | 2102049 | 2102909 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098551 | 0 | 2746 | 0 | 0 | 543145 | 594914 | 0 | 0 | 1966109 | 0 | 2098933 | 0 | 1223908529 | 0 | 12075166422451983 | 12075177974655671 | 12075177974780950 | 12075166422933939 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 176 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 119040 | 0x7f6da043e300 | 0x7f6c948366c0 | 3178672 | 2995709 | 65536 | 397333 | 397333 | 12714656 | 11885501 | 0 | 0 | 0 | 48 | 1909 | 0 | 4196464 | 4198648 | 0 | 2162 | 4196486 | 4198468 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4196356 | 0 | 4012 | 0 | 0 | 937545 | 990605 | 0 | 0 | 4063280 | 0 | 4196397 | 0 | 2405364313 | 0 | 12075166423102872 | 12075177974821750 | 12075177975074388 | 12075166423772006 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 178 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 120768 | 0x7f6da043ea00 | 0x7f6c94836700 | 3143344 | 2976690 | 65536 | 392917 | 392917 | 12573344 | 11840846 | 0 | 0 | 0 | 48 | 1847 | 0 | 4195552 | 4196660 | 0 | 1269 | 4195391 | 4196687 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4196197 | 0 | 3732 | 0 | 0 | 798720 | 852011 | 0 | 0 | 4063261 | 0 | 4196500 | 0 | 2335949744 | 0 | 12075166423876891 | 12075177975135027 | 12075177975380465 | 12075166424553869 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 180 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 122496 | 0x7f6da043e900 | 0x7f6c94836740 | 1806104 | 1645010 | 65536 | 225762 | 225762 | 7224384 | 6432605 | 0 | 0 | 0 | 48 | 3922 | 0 | 2098400 | 2102779 | 0 | 1269 | 2101510 | 2102872 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100822 | 0 | 7286 | 0 | 0 | 313099 | 366162 | 0 | 0 | 1966122 | 0 | 2100926 | 0 | 960706963 | 0 | 12075166424659245 | 12075177975438545 | 12075177975564304 | 12075166425147382 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 182 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 124736 | 0x7f6da043e800 | 0x7f6c94836780 | 2299696 | 2140479 | 65536 | 287461 | 287461 | 9198752 | 8407169 | 0 | 0 | 0 | 48 | 717 | 0 | 2098784 | 2099264 | 0 | 1645 | 2097619 | 2099413 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097758 | 0 | 1142 | 0 | 0 | 58166 | 111061 | 0 | 0 | 1966117 | 0 | 2097618 | 0 | 622281823 | 0 | 12075166425322217 | 12075177975603824 | 12075177975785902 | 12075166425919417 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 184 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 126464 | 0x7f6da043e700 | 0x7f6c948367c0 | 1758664 | 1587322 | 65536 | 219832 | 219832 | 7034624 | 6225433 | 0 | 0 | 0 | 48 | 4962 | 0 | 2098400 | 2102623 | 0 | 1269 | 2101354 | 2102488 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100193 | 0 | 6028 | 0 | 0 | 376921 | 430243 | 0 | 0 | 1966117 | 0 | 2100985 | 0 | 1001084180 | 0 | 12075166426041253 | 12075177975836782 | 12075177975962061 | 12075166426527497 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 186 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 20 | 24 | 127936 | 0x7f6da043e600 | 0x7f6c94836800 | 3260720 | 3106515 | 65536 | 407589 | 407589 | 13042848 | 12322821 | 0 | 0 | 0 | 48 | 1265 | 0 | 4195504 | 4197072 | 0 | 1222 | 4195850 | 4197089 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195644 | 0 | 2628 | 0 | 0 | 353570 | 411115 | 0 | 0 | 4063260 | 0 | 4195473 | 0 | 1852619008 | 0 | 12075166426704916 | 12075177976001261 | 12075177976264298 | 12075166427394798 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 188 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 129664 | 0x7f6da043e500 | 0x7f6c94836840 | 3145544 | 2973299 | 65536 | 393192 | 393192 | 12582144 | 11829518 | 0 | 0 | 0 | 48 | 2003 | 0 | 4195600 | 4198191 | 0 | 1316 | 4196875 | 4196779 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195417 | 0 | 2170 | 0 | 0 | 882596 | 936313 | 0 | 0 | 4063262 | 0 | 4195332 | 0 | 2056837441 | 0 | 12075166427502639 | 12075177976324618 | 12075177976573096 | 12075166428170159 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 190 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 131392 | 0x7f6da043e400 | 0x7f6c94836880 | 1745992 | 1588075 | 65536 | 218248 | 218248 | 6983936 | 6212017 | 0 | 0 | 0 | 48 | 2327 | 0 | 2098448 | 2102012 | 0 | 1316 | 2100696 | 2101439 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2101015 | 0 | 7670 | 0 | 0 | 402520 | 447041 | 0 | 0 | 1966119 | 0 | 2099427 | 0 | 1218345371 | 0 | 12075166428273672 | 12075177976629576 | 12075177976755015 | 12075166428754756 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 192 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 133632 | 0x7f6da043e300 | 0x7f6c948368c0 | 2413696 | 2254245 | 65536 | 301711 | 301711 | 9654752 | 8815497 | 0 | 0 | 0 | 48 | 436 | 0 | 2098832 | 2099492 | 0 | 1692 | 2097800 | 2099333 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097599 | 0 | 822 | 0 | 0 | 70516 | 123965 | 0 | 0 | 1966118 | 0 | 2097657 | 0 | 629101815 | 0 | 12075166428930202 | 12075177976798534 | 12075177976990213 | 12075166429539995 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 194 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 135360 | 0x7f6da043ea00 | 0x7f6c94836900 | 2230720 | 1610876 | 65536 | 278839 | 278839 | 8922848 | 6309799 | 0 | 0 | 0 | 48 | 4200 | 0 | 2098496 | 2101220 | 0 | 1363 | 2099857 | 2103073 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100891 | 0 | 7420 | 0 | 0 | 264298 | 316758 | 0 | 0 | 1966113 | 0 | 2100625 | 0 | 1024662886 | 0 | 12075166429660068 | 12075177977047652 | 12075177977174691 | 12075166430181076 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 196 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 20 | 24 | 136832 | 0x7f6da043e900 | 0x7f6c94836940 | 3496904 | 3325721 | 65536 | 437112 | 437112 | 13987584 | 13197699 | 0 | 0 | 0 | 48 | 990 | 0 | 4195408 | 4196170 | 0 | 1128 | 4195042 | 4196420 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195400 | 0 | 2144 | 0 | 0 | 37528 | 94535 | 0 | 0 | 4063258 | 0 | 4195070 | 0 | 1266423007 | 0 | 12075166430356172 | 12075177977214531 | 12075177977496449 | 12075166431053508 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 198 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 138560 | 0x7f6da043e800 | 0x7f6c94836980 | 3161200 | 2989149 | 65536 | 395149 | 395149 | 12644768 | 11878781 | 0 | 0 | 0 | 48 | 1570 | 0 | 4195696 | 4197608 | 0 | 1410 | 4196198 | 4197530 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4196983 | 0 | 5298 | 0 | 0 | 1480268 | 1533300 | 0 | 0 | 4063264 | 0 | 4196292 | 0 | 2156992908 | 0 | 12075166431164744 | 12075177977552448 | 12075177977798526 | 12075166431834068 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 200 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 140288 | 0x7f6da043e700 | 0x7f6c948369c0 | 1750256 | 1582251 | 65536 | 218781 | 218781 | 7000992 | 6205773 | 0 | 0 | 0 | 48 | 4114 | 0 | 2098544 | 2100474 | 0 | 1410 | 2099064 | 2103279 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100875 | 0 | 7386 | 0 | 0 | 397373 | 450236 | 0 | 0 | 1966112 | 0 | 2100623 | 0 | 1037527444 | 0 | 12075166431943742 | 12075177977850206 | 12075177977976765 | 12075166432433683 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 202 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 142784 | 0x7f6da043e600 | 0x7f6c94836a00 | 2623744 | 2469015 | 65536 | 327967 | 327967 | 10494944 | 9553302 | 0 | 0 | 0 | 48 | 592 | 0 | 2098976 | 2099620 | 0 | 1833 | 2097787 | 2099494 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097648 | 0 | 914 | 0 | 0 | 108770 | 162254 | 0 | 0 | 1966121 | 0 | 2097618 | 0 | 588289740 | 0 | 12075166432608187 | 12075177978021085 | 12075177978230843 | 12075166433230463 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 204 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 144768 | 0x7f6da043e500 | 0x7f6c94836a40 | 1798768 | 1632156 | 65536 | 224845 | 224845 | 7195040 | 6370635 | 0 | 0 | 0 | 48 | 3049 | 0 | 2098592 | 2102224 | 0 | 1457 | 2100767 | 2102546 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2101594 | 0 | 8822 | 0 | 0 | 514089 | 565966 | 0 | 0 | 1966113 | 0 | 2101521 | 0 | 925572928 | 0 | 12075166433351648 | 12075177978299322 | 12075177978427961 | 12075166433840938 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 206 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 20 | 24 | 145728 | 0x7f6da043e400 | 0x7f6c94836a80 | 3767384 | 3610720 | 65536 | 470922 | 470922 | 15069504 | 14296286 | 0 | 0 | 0 | 48 | 842 | 0 | 4195120 | 4196250 | 0 | 846 | 4195404 | 4195942 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195282 | 0 | 1920 | 0 | 0 | 14612 | 71797 | 0 | 0 | 4063252 | 0 | 4195062 | 0 | 1129369097 | 0 | 12075166434018959 | 12075177978468601 | 12075177978774839 | 12075166434742754 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 208 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 147712 | 0x7f6da043e300 | 0x7f6c94836ac0 | 3207832 | 3026009 | 65536 | 400978 | 400978 | 12831296 | 12081712 | 0 | 0 | 0 | 48 | 1220 | 0 | 4195792 | 4198379 | 0 | 1504 | 4196875 | 4197506 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195352 | 0 | 2032 | 0 | 0 | 1862458 | 1916542 | 0 | 0 | 4063266 | 0 | 4195427 | 0 | 2244740228 | 0 | 12075166434848039 | 12075177978835318 | 12075177979126836 | 12075166435524617 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 210 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 149696 | 0x7f6da043ea00 | 0x7f6c94836b00 | 1743360 | 1587318 | 65536 | 217919 | 217919 | 6973408 | 6202290 | 0 | 0 | 0 | 48 | 3932 | 0 | 2098640 | 2103310 | 0 | 1504 | 2101806 | 2103165 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2099316 | 0 | 4264 | 0 | 0 | 515957 | 571938 | 0 | 0 | 1966114 | 0 | 2100289 | 0 | 1072660921 | 0 | 12075166435630253 | 12075177979188755 | 12075177979316754 | 12075166436110365 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 212 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 152192 | 0x7f6da043e900 | 0x7f6c94836b40 | 2848624 | 2685850 | 65536 | 356077 | 356077 | 11394464 | 10342147 | 0 | 0 | 0 | 48 | 468 | 0 | 2099120 | 2099592 | 0 | 1974 | 2097618 | 2099578 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097730 | 0 | 1072 | 0 | 0 | 82095 | 134694 | 0 | 0 | 1966124 | 0 | 2097574 | 0 | 586611899 | 0 | 12075166436288927 | 12075177979356594 | 12075177979584112 | 12075166436930078 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 214 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 154176 | 0x7f6da043e800 | 0x7f6c94836b80 | 1794800 | 1638192 | 65536 | 224349 | 224349 | 7179168 | 6319231 | 0 | 0 | 0 | 48 | 3985 | 0 | 2098688 | 2101584 | 0 | 1551 | 2100033 | 2102522 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2101244 | 0 | 8118 | 0 | 0 | 368270 | 419989 | 0 | 0 | 1966123 | 0 | 2100455 | 0 | 1094275042 | 0 | 12075166437060861 | 12075177979649392 | 12075177979776751 | 12075166437548337 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 216 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 20 | 24 | 155648 | 0x7f6da043e700 | 0x7f6c94836bc0 | 4028272 | 3873183 | 65536 | 503533 | 503533 | 16113056 | 15362018 | 0 | 0 | 0 | 48 | 662 | 0 | 4195504 | 4196401 | 0 | 1222 | 4195179 | 4196536 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195149 | 0 | 1638 | 0 | 0 | 23 | 57120 | 0 | 0 | 4063260 | 0 | 4195290 | 0 | 1084110433 | 0 | 12075166437723272 | 12075177979817231 | 12075177980145228 | 12075166438471092 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 218 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 157632 | 0x7f6da043e600 | 0x7f6c94836c00 | 3158832 | 2995185 | 65536 | 394853 | 394853 | 12635296 | 11852303 | 0 | 0 | 0 | 48 | 2808 | 0 | 4195888 | 4199250 | 0 | 1598 | 4197652 | 4198203 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4197368 | 0 | 6060 | 0 | 0 | 849223 | 902870 | 0 | 0 | 4063268 | 0 | 4195595 | 0 | 2383977687 | 0 | 12075166438578191 | 12075177980206507 | 12075177980454185 | 12075166439244710 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 220 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 159616 | 0x7f6da043e500 | 0x7f6c94836c40 | 1821488 | 1660742 | 65536 | 227685 | 227685 | 7285920 | 6413286 | 0 | 0 | 0 | 48 | 3065 | 0 | 2098736 | 2102582 | 0 | 1598 | 2100984 | 2103381 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2101267 | 0 | 8162 | 0 | 0 | 283087 | 335121 | 0 | 0 | 1966121 | 0 | 2101480 | 0 | 925249719 | 0 | 12075166439351468 | 12075177980510825 | 12075177980639944 | 12075166439839686 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 222 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 162368 | 0x7f6da043e400 | 0x7f6c94836c80 | 3058736 | 2902033 | 65536 | 382341 | 382341 | 12234912 | 11216827 | 0 | 0 | 0 | 48 | 602 | 0 | 2099264 | 2099919 | 0 | 2115 | 2097804 | 2099719 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097704 | 0 | 1014 | 0 | 0 | 61216 | 114372 | 0 | 0 | 1966127 | 0 | 2097570 | 0 | 615316453 | 0 | 12075166440020782 | 12075177980682824 | 12075177980926342 | 12075166440686209 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 224 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 164608 | 0x7f6da043e300 | 0x7f6c94836cc0 | 1810120 | 1645148 | 65536 | 226264 | 226264 | 7240448 | 6362255 | 0 | 0 | 0 | 48 | 3098 | 0 | 2098880 | 2101904 | 0 | 1739 | 2100165 | 2101546 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100529 | 0 | 6680 | 0 | 0 | 436982 | 487995 | 0 | 0 | 1966121 | 0 | 2101582 | 0 | 902759479 | 0 | 12075166440804198 | 12075177980993861 | 12075177981124580 | 12075166441304147 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 226 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 20 | 24 | 166336 | 0x7f6da043ea00 | 0x7f6c94836d00 | 4604808 | 4447610 | 65536 | 575600 | 575600 | 18419200 | 17651257 | 0 | 0 | 0 | 48 | 636 | 0 | 4195600 | 4196517 | 0 | 1316 | 4195201 | 4196504 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195156 | 0 | 1648 | 0 | 0 | 9075 | 61976 | 0 | 0 | 4063262 | 0 | 4195250 | 0 | 1030615596 | 0 | 12075166441481537 | 12075177981165540 | 12075177981541217 | 12075166442272206 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 228 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 168576 | 0x7f6da043e900 | 0x7f6c94836d40 | 3178480 | 3004959 | 65536 | 397309 | 397309 | 12713888 | 11879456 | 0 | 0 | 0 | 48 | 769 | 0 | 4196032 | 4199798 | 0 | 1739 | 4198059 | 4198213 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4198226 | 0 | 7770 | 0 | 0 | 1042846 | 1096155 | 0 | 0 | 4063271 | 0 | 4197388 | 0 | 2205823563 | 0 | 12075166442384064 | 12075177981598816 | 12075177981851614 | 12075166443056033 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 230 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 170816 | 0x7f6da043e800 | 0x7f6c94836d80 | 1837424 | 1668640 | 65536 | 229677 | 229677 | 7349664 | 6421985 | 0 | 0 | 0 | 48 | 2722 | 0 | 2098928 | 2102452 | 0 | 1786 | 2100666 | 2103768 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2100425 | 0 | 6470 | 0 | 0 | 556278 | 608229 | 0 | 0 | 1966124 | 0 | 2101244 | 0 | 945138957 | 0 | 12075166443165957 | 12075177981906974 | 12075177982037053 | 12075166443656428 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 232 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 174080 | 0x7f6da043e700 | 0x7f6c94836dc0 | 3457264 | 3299379 | 65536 | 432157 | 432157 | 13829024 | 12754898 | 0 | 0 | 0 | 48 | 431 | 0 | 2099552 | 2100008 | 0 | 2397 | 2097611 | 2100172 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097640 | 0 | 874 | 0 | 0 | 44968 | 98506 | 0 | 0 | 1966133 | 0 | 2097582 | 0 | 569387697 | 0 | 12075166443829700 | 12075177982078173 | 12075177982356571 | 12075166444531895 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 234 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 176576 | 0x7f6da043e600 | 0x7f6c94836e00 | 1778240 | 1602324 | 65536 | 222279 | 222279 | 7112928 | 6164527 | 0 | 0 | 0 | 48 | 2468 | 0 | 2099072 | 2100933 | 0 | 1927 | 2099006 | 2100512 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098592 | 0 | 2798 | 0 | 0 | 709811 | 761374 | 0 | 0 | 1966123 | 0 | 2098748 | 0 | 1205960902 | 0 | 12075166444650566 | 12075177982427610 | 12075177982562969 | 12075166445136299 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 236 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 20 | 24 | 177792 | 0x7f6da043e500 | 0x7f6c94836e40 | 5162440 | 5004130 | 65536 | 645304 | 645304 | 20649728 | 19867301 | 0 | 0 | 0 | 48 | 980 | 0 | 4195312 | 4195948 | 0 | 1034 | 4194914 | 4196238 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195135 | 0 | 1618 | 0 | 0 | 16111 | 72696 | 0 | 0 | 4063256 | 0 | 4195144 | 0 | 1025411037 | 0 | 12075166445311244 | 12075177982603129 | 12075177983026325 | 12075166446150113 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 238 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 180288 | 0x7f6da043e400 | 0x7f6c94836e80 | 3250544 | 3051750 | 65536 | 406317 | 406317 | 13002144 | 12194256 | 0 | 0 | 0 | 48 | 3001 | 0 | 4196272 | 4199247 | 0 | 1974 | 4197273 | 4198975 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4196101 | 0 | 3510 | 0 | 0 | 931525 | 984996 | 0 | 0 | 4063276 | 0 | 4196259 | 0 | 2447463686 | 0 | 12075166446257572 | 12075177983093205 | 12075177983356243 | 12075166446933959 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 240 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 182784 | 0x7f6da043e300 | 0x7f6c94836ec0 | 1816904 | 1650288 | 65536 | 227112 | 227112 | 7267584 | 6330798 | 0 | 0 | 0 | 48 | 2776 | 0 | 2099120 | 2101962 | 0 | 1974 | 2099988 | 2100470 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098284 | 0 | 2180 | 0 | 0 | 497153 | 550145 | 0 | 0 | 1966124 | 0 | 2098495 | 0 | 1158890067 | 0 | 12075166447056257 | 12075177983417682 | 12075177983553841 | 12075166447545416 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 242 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 186304 | 0x7f6da043ea00 | 0x7f6c94836f00 | 3880264 | 3721249 | 65536 | 485032 | 485032 | 15521024 | 14380761 | 0 | 0 | 0 | 48 | 462 | 0 | 2099840 | 2100499 | 0 | 2679 | 2097820 | 2100333 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097633 | 0 | 848 | 0 | 0 | 51645 | 103943 | 0 | 0 | 1966139 | 0 | 2097646 | 0 | 603778173 | 0 | 12075166447687820 | 12075177983594641 | 12075177983909998 | 12075166448440269 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 244 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 189312 | 0x7f6da043e900 | 0x7f6c94836f40 | 2393672 | 1773871 | 65536 | 299208 | 299208 | 9574656 | 6897822 | 0 | 0 | 0 | 48 | 471 | 0 | 2099456 | 2100011 | 0 | 2303 | 2097708 | 2099919 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097618 | 0 | 834 | 0 | 0 | 127240 | 179174 | 0 | 0 | 1966131 | 0 | 2097628 | 0 | 574453628 | 0 | 12075166448559951 | 12075177983979918 | 12075177984131757 | 12075166449159886 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 246 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 20 | 24 | 190784 | 0x7f6da043e800 | 0x7f6c94836f80 | 6278408 | 6108428 | 65536 | 784800 | 784800 | 25113600 | 24272470 | 0 | 0 | 0 | 48 | 794 | 0 | 4195408 | 4196368 | 0 | 1128 | 4195240 | 4196125 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195198 | 0 | 1740 | 0 | 0 | 3985 | 59721 | 0 | 0 | 4063255 | 0 | 4195188 | 0 | 1011395191 | 0 | 12075166449269880 | 12075177984172236 | 12075177984688712 | 12075166450202313 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 248 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 193792 | 0x7f6da043e700 | 0x7f6c94836fc0 | 3610160 | 3429766 | 65536 | 451269 | 451269 | 14440608 | 13560345 | 0 | 0 | 0 | 48 | 1143 | 0 | 4196656 | 4197598 | 0 | 2350 | 4195248 | 4197797 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195266 | 0 | 1824 | 0 | 0 | 951338 | 1004812 | 0 | 0 | 4063284 | 0 | 4195542 | 0 | 2061313845 | 0 | 12075166450310184 | 12075177984749032 | 12075177985046629 | 12075166451020844 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 250 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 196800 | 0x7f6da043e600 | 0x7f6c94837000 | 1989424 | 1830493 | 65536 | 248677 | 248677 | 7957664 | 6997672 | 0 | 0 | 0 | 48 | 420 | 0 | 2099504 | 2100079 | 0 | 2350 | 2097729 | 2100049 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097659 | 0 | 914 | 0 | 0 | 39529 | 91193 | 0 | 0 | 1966132 | 0 | 2097669 | 0 | 586103348 | 0 | 12075166451130558 | 12075177985110789 | 12075177985263908 | 12075166451631770 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 252 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 201088 | 0x7f6da043e500 | 0x7f6c94837040 | 4737344 | 4579329 | 65536 | 592167 | 592167 | 18949344 | 13936755 | 0 | 0 | 0 | 48 | 483 | 0 | 2100416 | 2101058 | 0 | 3243 | 2097815 | 2100944 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097778 | 0 | 1114 | 0 | 0 | 46600 | 100195 | 0 | 0 | 1966151 | 0 | 2097548 | 0 | 604671471 | 0 | 12075166451806564 | 12075177985307267 | 12075177985692384 | 12075166452614215 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 254 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 204608 | 0x7f6da043e400 | 0x7f6c94837080 | 2244208 | 2086650 | 65536 | 280525 | 280525 | 8976800 | 7927626 | 0 | 0 | 0 | 48 | 692 | 0 | 2099840 | 2100366 | 0 | 2679 | 2097687 | 2100422 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097713 | 0 | 1008 | 0 | 0 | 110473 | 161649 | 0 | 0 | 1966139 | 0 | 2097684 | 0 | 571437317 | 0 | 12075166452737635 | 12075177985762784 | 12075177985938942 | 12075166453269212 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 256 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 20 | 24 | 206080 | 0x7f6da043e300 | 0x7f6c948370c0 | 7529032 | 7229220 | 65536 | 941128 | 941128 | 30116096 | 28664785 | 0 | 0 | 0 | 48 | 600 | 0 | 4195504 | 4196132 | 0 | 1222 | 4194910 | 4196504 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194910 | 0 | 1160 | 0 | 0 | 16509 | 73419 | 0 | 0 | 4063260 | 0 | 4195155 | 0 | 1005850472 | 0 | 12075166453444498 | 12075177985978782 | 12075177986588697 | 12075166454480303 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 258 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 209600 | 0x7f6da043ea00 | 0x7f6c94837100 | 4002176 | 3835965 | 65536 | 500271 | 500271 | 16008672 | 15180258 | 0 | 0 | 0 | 48 | 663 | 0 | 4197040 | 4197768 | 0 | 2726 | 4195042 | 4197893 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195352 | 0 | 1980 | 0 | 0 | 335640 | 388965 | 0 | 0 | 4063289 | 0 | 4195261 | 0 | 1516648396 | 0 | 12075166454583285 | 12075177986651257 | 12075177986980214 | 12075166455324792 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 260 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 213120 | 0x7f6da043e900 | 0x7f6c94837140 | 2240816 | 2085919 | 65536 | 280101 | 280101 | 8963232 | 8071107 | 0 | 0 | 0 | 48 | 575 | 0 | 2099888 | 2100382 | 0 | 2726 | 2097656 | 2100410 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097869 | 0 | 1318 | 0 | 0 | 71257 | 123086 | 0 | 0 | 1966140 | 0 | 2097816 | 0 | 572536237 | 0 | 12075166455403849 | 12075177987041974 | 12075177987220692 | 12075166455949544 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 262 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 218176 | 0x7f6da043e800 | 0x7f6c94837180 | 5563632 | 5412734 | 65536 | 695453 | 695453 | 22254496 | 16679800 | 0 | 0 | 0 | 48 | 419 | 0 | 2100992 | 2101575 | 0 | 3807 | 2097768 | 2101647 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097776 | 0 | 1086 | 0 | 0 | 42218 | 95315 | 0 | 0 | 1966163 | 0 | 2097701 | 0 | 584403366 | 0 | 12075166456121493 | 12075177987259572 | 12075177987714608 | 12075166456983796 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 264 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 222208 | 0x7f6da043e700 | 0x7f6c948371c0 | 2503920 | 2346109 | 65536 | 312989 | 312989 | 10015648 | 8953366 | 0 | 0 | 0 | 48 | 472 | 0 | 2100224 | 2100840 | 0 | 3055 | 2097785 | 2100794 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097692 | 0 | 950 | 0 | 0 | 52793 | 104276 | 0 | 0 | 1966147 | 0 | 2097637 | 0 | 589524986 | 0 | 12075166457105892 | 12075177987787408 | 12075177988022926 | 12075166457716297 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 266 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 20 | 24 | 223936 | 0x7f6da043e600 | 0x7f6c94837200 | 8504112 | 8351621 | 65536 | 1063013 | 1063013 | 34016416 | 33035498 | 0 | 0 | 0 | 48 | 751 | 0 | 4195600 | 4196269 | 0 | 1316 | 4194953 | 4196495 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195195 | 0 | 1726 | 0 | 0 | 13883 | 70624 | 0 | 0 | 4063262 | 0 | 4195080 | 0 | 1007310940 | 0 | 12075166457822133 | 12075177988087725 | 12075177988791400 | 12075166458945281 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 268 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 227968 | 0x7f6da043e500 | 0x7f6c94837240 | 4533808 | 4378569 | 65536 | 566725 | 566725 | 18135200 | 17092325 | 0 | 0 | 0 | 48 | 665 | 0 | 4197424 | 4198302 | 0 | 3102 | 4195200 | 4198437 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195132 | 0 | 1524 | 0 | 0 | 143414 | 197078 | 0 | 0 | 4063300 | 0 | 4195293 | 0 | 1293564048 | 0 | 12075166459061106 | 12075177988855559 | 12075177989229156 | 12075166459848790 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 270 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 232000 | 0x7f6da043e400 | 0x7f6c94837280 | 2538304 | 2379401 | 65536 | 317287 | 317287 | 10153184 | 9078364 | 0 | 0 | 0 | 48 | 445 | 0 | 2100272 | 2100872 | 0 | 3102 | 2097770 | 2100727 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097752 | 0 | 1068 | 0 | 0 | 71607 | 121657 | 0 | 0 | 1966148 | 0 | 2097659 | 0 | 588632873 | 0 | 12075166459957792 | 12075177989297156 | 12075177989498914 | 12075166460585819 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 272 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 237824 | 0x7f6da043e300 | 0x7f6c948372c0 | 6426776 | 6254924 | 65536 | 803346 | 803346 | 25707072 | 18659059 | 0 | 0 | 0 | 48 | 480 | 0 | 2101568 | 2102204 | 0 | 4371 | 2097833 | 2102139 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097679 | 0 | 868 | 0 | 0 | 52362 | 105295 | 0 | 0 | 1966175 | 0 | 2097582 | 0 | 674582448 | 0 | 12075166460695243 | 12075177989559874 | 12075177990085790 | 12075166461643325 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 274 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 242368 | 0x7f6da043ea00 | 0x7f6c94837300 | 2795056 | 2641959 | 65536 | 349381 | 349381 | 11180192 | 9979540 | 0 | 0 | 0 | 48 | 481 | 0 | 2100608 | 2101132 | 0 | 3431 | 2097701 | 2101148 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097669 | 0 | 888 | 0 | 0 | 22063 | 74528 | 0 | 0 | 1966155 | 0 | 2097685 | 0 | 550451742 | 0 | 12075166461765031 | 12075177990160989 | 12075177990383867 | 12075166462411873 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 276 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 20 | 24 | 243584 | 0x7f6da043e900 | 0x7f6c94837340 | 9640256 | 9487689 | 65536 | 1205031 | 1205031 | 38560992 | 37299033 | 0 | 0 | 0 | 48 | 897 | 0 | 4195312 | 4195890 | 0 | 1034 | 4194856 | 4196029 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195196 | 0 | 1740 | 0 | 0 | 7241 | 64546 | 0 | 0 | 4063256 | 0 | 4194922 | 0 | 1011709161 | 0 | 12075166462517068 | 12075177990447707 | 12075177991246261 | 12075166463728419 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 278 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 248128 | 0x7f6da043e800 | 0x7f6c94837380 | 5091120 | 4929043 | 65536 | 636389 | 636389 | 20364448 | 19394145 | 0 | 0 | 0 | 48 | 749 | 0 | 4197808 | 4198482 | 0 | 3478 | 4195004 | 4198416 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195087 | 0 | 1418 | 0 | 0 | 103037 | 156921 | 0 | 0 | 4063308 | 0 | 4195378 | 0 | 1126030641 | 0 | 12075166463835458 | 12075177991309620 | 12075177991730257 | 12075166464674438 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 280 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 252672 | 0x7f6da043e700 | 0x7f6c948373c0 | 2828656 | 2674121 | 65536 | 353581 | 353581 | 11314592 | 10088556 | 0 | 0 | 0 | 48 | 441 | 0 | 2100656 | 2101312 | 0 | 3478 | 2097834 | 2101186 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097662 | 0 | 872 | 0 | 0 | 76555 | 129069 | 0 | 0 | 1966156 | 0 | 2097931 | 0 | 550192322 | 0 | 12075166464743366 | 12075177991792176 | 12075177992017294 | 12075166465412950 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 282 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 259264 | 0x7f6da043e600 | 0x7f6c94837400 | 7266032 | 7114115 | 65536 | 908253 | 908253 | 29064096 | 19291698 | 0 | 0 | 0 | 48 | 566 | 0 | 2102144 | 2102665 | 0 | 4935 | 2097730 | 2102693 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097741 | 0 | 968 | 0 | 0 | 82614 | 138910 | 0 | 0 | 1966187 | 0 | 2097646 | 0 | 647162954 | 0 | 12075166465522093 | 12075177992084494 | 12075177992680969 | 12075166466533442 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 284 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 264832 | 0x7f6da043e500 | 0x7f6c94837440 | 3348336 | 3186170 | 65536 | 418541 | 418541 | 13393312 | 12006808 | 0 | 0 | 0 | 48 | 438 | 0 | 2101376 | 2101898 | 0 | 4183 | 2097715 | 2101906 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097712 | 0 | 942 | 0 | 0 | 56220 | 107313 | 0 | 0 | 1966171 | 0 | 2097768 | 0 | 587479173 | 0 | 12075166466648075 | 12075177992753289 | 12075177993021447 | 12075166467326757 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 286 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 20 | 24 | 266304 | 0x7f6da043e400 | 0x7f6c94837480 | 11883464 | 11733759 | 65536 | 1485432 | 1485432 | 47533824 | 45053412 | 0 | 0 | 0 | 48 | 772 | 0 | 4195408 | 4196048 | 0 | 1128 | 4194920 | 4196020 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195016 | 0 | 1376 | 0 | 0 | 13512 | 68511 | 0 | 0 | 4063258 | 0 | 4195148 | 0 | 1018809100 | 0 | 12075166467431832 | 12075177993085126 | 12075177994070238 | 12075166468832274 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 288 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 28 | 24 | 271872 | 0x7f6da043e300 | 0x7f6c948374c0 | 6200304 | 6044006 | 65536 | 775037 | 775037 | 24801184 | 23541808 | 0 | 0 | 0 | 48 | 776 | 0 | 4198576 | 4199336 | 0 | 4230 | 4195106 | 4199405 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195042 | 0 | 1296 | 0 | 0 | 55108 | 108467 | 0 | 0 | 4063321 | 0 | 4195305 | 0 | 1118542104 | 0 | 12075166468933963 | 12075177994130398 | 12075177994661274 | 12075166469863932 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 290 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 277440 | 0x7f6da043ea00 | 0x7f6c94837500 | 3370056 | 3215348 | 65536 | 421256 | 421256 | 13480192 | 12157552 | 0 | 0 | 0 | 48 | 472 | 0 | 2101424 | 2101998 | 0 | 4230 | 2097768 | 2102058 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097694 | 0 | 904 | 0 | 0 | 64240 | 116925 | 0 | 0 | 1966172 | 0 | 2097734 | 0 | 571247649 | 0 | 12075166469973305 | 12075177994722393 | 12075177995024311 | 12075166470664610 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 292 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 12 | 24 | 279936 | 0x7f6da043e900 | 0x7f6c94837540 | 8999560 | 8845040 | 65536 | 1124944 | 1124944 | 35998208 | 27908408 | 0 | 0 | 0 | 48 | 493 | 0 | 2099120 | 2099655 | 0 | 1974 | 2097681 | 2099783 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097710 | 0 | 1032 | 0 | 0 | 0 | 59848 | 0 | 0 | 1966124 | 0 | 2097639 | 0 | 524239773 | 0 | 12075166470772781 | 12075177995088950 | 12075177995826705 | 12075166471930051 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 294 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 286528 | 0x7f6da043e800 | 0x7f6c94837580 | 3893424 | 3738646 | 65536 | 486677 | 486677 | 15573664 | 14090136 | 0 | 0 | 0 | 48 | 429 | 0 | 2102144 | 2102626 | 0 | 4935 | 2097691 | 2102630 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097722 | 0 | 930 | 0 | 0 | 49718 | 101274 | 0 | 0 | 1966187 | 0 | 2097739 | 0 | 565720497 | 0 | 12075166472053220 | 12075177995899504 | 12075177996215021 | 12075166472779810 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 296 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 20 | 24 | 288000 | 0x7f6da043e700 | 0x7f6c948375c0 | 14140464 | 13981368 | 65536 | 1767557 | 1767557 | 56561824 | 53196955 | 0 | 0 | 0 | 48 | 590 | 0 | 4195504 | 4196288 | 0 | 1222 | 4195066 | 4196346 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195271 | 0 | 1882 | 0 | 0 | 2816 | 59440 | 0 | 0 | 4063260 | 0 | 4195214 | 0 | 1020740947 | 0 | 12075166472886328 | 12075177996285421 | 12075177997456132 | 12075166474479980 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 298 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 32 | 24 | 294592 | 0x7f6da043e600 | 0x7f6c94837600 | 7342208 | 7183431 | 65536 | 917775 | 917775 | 29368800 | 27846889 | 0 | 0 | 0 | 48 | 734 | 0 | 4199344 | 4200227 | 0 | 4982 | 4195245 | 4200211 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195375 | 0 | 1930 | 0 | 0 | 90836 | 143785 | 0 | 0 | 4063340 | 0 | 4194989 | 0 | 1129199896 | 0 | 12075166474589644 | 12075177997521251 | 12075177998131006 | 12075166475614158 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 300 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 301184 | 0x7f6da043e500 | 0x7f6c94837640 | 3929200 | 3772578 | 65536 | 491149 | 491149 | 15716768 | 14230481 | 0 | 0 | 0 | 48 | 442 | 0 | 2102192 | 2102697 | 0 | 4982 | 2097715 | 2102708 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097792 | 0 | 1068 | 0 | 0 | 52954 | 103742 | 0 | 0 | 1966188 | 0 | 2097683 | 0 | 577238864 | 0 | 12075166475682455 | 12075177998194526 | 12075177998512763 | 12075166476452506 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 302 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 12 | 24 | 304960 | 0x7f6da043e400 | 0x7f6c94837680 | 10679088 | 10528529 | 65536 | 1334885 | 1334885 | 42716320 | 31654073 | 0 | 0 | 0 | 48 | 639 | 0 | 2099984 | 2100562 | 0 | 2820 | 2097742 | 2100618 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097738 | 0 | 1052 | 0 | 0 | 0 | 57156 | 0 | 0 | 1966142 | 0 | 2097787 | 0 | 525637785 | 0 | 12075166476563141 | 12075177998575323 | 12075177999452436 | 12075166477852648 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 304 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 24 | 24 | 313600 | 0x7f6da043e300 | 0x7f6c948376c0 | 5047856 | 4887783 | 65536 | 630981 | 630981 | 20191392 | 16835615 | 0 | 0 | 0 | 48 | 434 | 0 | 2103680 | 2104174 | 0 | 6439 | 2097735 | 2104203 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097728 | 0 | 878 | 0 | 0 | 26398 | 75948 | 0 | 0 | 1966219 | 0 | 2097656 | 0 | 561131544 | 0 | 12075166477971048 | 12075177999529395 | 12075177999939152 | 12075166478802773 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 306 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 20 | 24 | 314816 | 0x7f6da043ea00 | 0x7f6c94837700 | 18642096 | 18492705 | 65536 | 2330261 | 2330261 | 74568352 | 66694682 | 0 | 0 | 0 | 48 | 942 | 0 | 4195312 | 4195894 | 0 | 1034 | 4194860 | 4196023 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195248 | 0 | 1844 | 0 | 0 | 8964 | 64983 | 0 | 0 | 4063256 | 0 | 4194856 | 0 | 1035518545 | 0 | 12075166478912537 | 12075178000001872 | 12075178001549539 | 12075166480881136 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 308 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 20 | 24 | 317568 | 0x7f6da043e900 | 0x7f6c94837740 | 9601840 | 9437673 | 65536 | 1200229 | 1200229 | 38407328 | 37273248 | 0 | 0 | 0 | 48 | 457 | 0 | 4196368 | 4197092 | 0 | 2068 | 4195024 | 4197022 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4194966 | 0 | 1236 | 0 | 0 | 14605 | 69975 | 0 | 0 | 4063278 | 0 | 4194908 | 0 | 1010001502 | 0 | 12075166480991000 | 12075178001661135 | 12075178002455051 | 12075166482206188 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 310 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 12 | 24 | 320320 | 0x7f6da043e800 | 0x7f6c94837780 | 5113392 | 4957281 | 65536 | 639173 | 639173 | 20453536 | 18762222 | 0 | 0 | 0 | 48 | 631 | 0 | 2099264 | 2099935 | 0 | 2115 | 2097820 | 2099925 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097853 | 0 | 1312 | 0 | 0 | 11 | 57314 | 0 | 0 | 1966127 | 0 | 2097780 | 0 | 509615852 | 0 | 12075166482313327 | 12075178002564009 | 12075178002977767 | 12075166483139002 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 312 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 12 | 24 | 324096 | 0x7f6da043e700 | 0x7f6c948377c0 | 14051888 | 13896198 | 65536 | 1756485 | 1756485 | 56207520 | 38459017 | 0 | 0 | 0 | 48 | 478 | 0 | 2099984 | 2100555 | 0 | 2820 | 2097735 | 2100732 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097853 | 0 | 1282 | 0 | 0 | 0 | 57025 | 0 | 0 | 1966142 | 0 | 2097676 | 0 | 540162085 | 0 | 12075166483249998 | 12075178003089606 | 12075178004247680 | 12075166484819725 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 314 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 12 | 24 | 328896 | 0x7f6da043e600 | 0x7f6c94837800 | 9580208 | 9418775 | 65536 | 1197525 | 1197525 | 38320800 | 29424076 | 0 | 0 | 0 | 48 | 579 | 0 | 2100752 | 2101385 | 0 | 3572 | 2097813 | 2101438 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097893 | 0 | 1330 | 0 | 0 | 0 | 56693 | 0 | 0 | 1966158 | 0 | 2097795 | 0 | 527602360 | 0 | 12075166484939467 | 12075178004365439 | 12075178005150875 | 12075166486146200 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 316 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 20 | 24 | 330112 | 0x7f6da043e500 | 0x7f6c94837840 | 36669896 | 36507670 | 65536 | 4583736 | 4583736 | 146679552 | 94862143 | 0 | 0 | 0 | 48 | 848 | 0 | 4195312 | 4195933 | 0 | 1034 | 4194899 | 4195891 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195160 | 0 | 1668 | 0 | 0 | 10987 | 66831 | 0 | 0 | 4063256 | 0 | 4195129 | 0 | 1121759509 | 0 | 12075166486254581 | 12075178005258874 | 12075178008311176 | 12075166489719861 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 318 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 20 | 24 | 332864 | 0x7f6da043e400 | 0x7f6c94837880 | 18585984 | 18425141 | 65536 | 2323247 | 2323247 | 74343904 | 65763875 | 0 | 0 | 0 | 48 | 557 | 0 | 4196368 | 4196995 | 0 | 2068 | 4194927 | 4196998 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195026 | 0 | 1356 | 0 | 0 | 10139 | 60518 | 0 | 0 | 4063278 | 0 | 4194773 | 0 | 1050963502 | 0 | 12075166489829635 | 12075178008418856 | 12075178009959007 | 12075166491795368 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 320 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 12 | 24 | 335616 | 0x7f6da043e300 | 0x7f6c948378c0 | 9617264 | 9457646 | 65536 | 1202157 | 1202157 | 38469024 | 30172410 | 0 | 0 | 0 | 48 | 524 | 0 | 2099264 | 2099807 | 0 | 2115 | 2097692 | 2099961 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097665 | 0 | 936 | 0 | 0 | 0 | 50150 | 0 | 0 | 1966127 | 0 | 2097730 | 0 | 524961394 | 0 | 12075166491901946 | 12075178010065726 | 12075178010854362 | 12075166493112606 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 322 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 12 | 24 | 339392 | 0x7f6da043ea00 | 0x7f6c94837900 | 27529536 | 27379510 | 65536 | 3441191 | 3441191 | 110118112 | 48159053 | 0 | 0 | 0 | 48 | 676 | 0 | 2099984 | 2100684 | 0 | 2820 | 2097864 | 2100680 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097869 | 0 | 1314 | 0 | 0 | 1184 | 57018 | 0 | 0 | 1966142 | 0 | 2097827 | 0 | 604638594 | 0 | 12075166493222090 | 12075178010960441 | 12075178013242028 | 12075166495914643 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 324 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 12 | 24 | 344192 | 0x7f6da043e900 | 0x7f6c94837940 | 18576240 | 18419208 | 65536 | 2322029 | 2322029 | 74304928 | 40804247 | 0 | 0 | 0 | 48 | 693 | 0 | 2100752 | 2101384 | 0 | 3572 | 2097812 | 2101419 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2098086 | 0 | 1716 | 0 | 0 | 0 | 57223 | 0 | 0 | 1966158 | 0 | 2097859 | 0 | 564792333 | 0 | 12075166496045026 | 12075178013361548 | 12075178014896099 | 12075166497995220 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 326 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 20 | 24 | 345408 | 0x7f6da043e800 | 0x7f6c94837980 | 72685000 | 72529858 | 65536 | 9085624 | 9085624 | 290739968 | 87478612 | 0 | 0 | 0 | 48 | 630 | 0 | 4195312 | 4196325 | 0 | 1034 | 4195291 | 4196232 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195090 | 0 | 1528 | 0 | 0 | 2026 | 57502 | 0 | 0 | 4063256 | 0 | 4195274 | 0 | 1203466231 | 0 | 12075166498104934 | 12075178015050018 | 12075178021105183 | 12075166504576079 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 328 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 20 | 24 | 348160 | 0x7f6da043e700 | 0x7f6c948379c0 | 36565232 | 36407687 | 65536 | 4570653 | 4570653 | 146260896 | 78402099 | 0 | 0 | 0 | 48 | 595 | 0 | 4196368 | 4197205 | 0 | 2068 | 4195137 | 4196948 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4195097 | 0 | 1498 | 0 | 0 | 16203 | 71554 | 0 | 0 | 4063263 | 0 | 4195077 | 0 | 1350683185 | 0 | 12075166504686034 | 12075178021228543 | 12075178024271725 | 12075166508152546 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 330 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 12 | 24 | 350912 | 0x7f6da043e600 | 0x7f6c94837a00 | 18609072 | 18454675 | 65536 | 2326133 | 2326133 | 74436256 | 42296522 | 0 | 0 | 0 | 48 | 896 | 0 | 2099264 | 2099950 | 0 | 2115 | 2097835 | 2100045 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097852 | 0 | 1310 | 0 | 0 | 0 | 55435 | 0 | 0 | 1966114 | 0 | 2098021 | 0 | 581828888 | 0 | 12075166508260386 | 12075178024404684 | 12075178025943716 | 12075166510211582 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 332 | 863873 | 863878 | 4194304 | 256 | 0 | 0 | 12 | 24 | 0 | 0x7f6da043e500 | 0x7f6c94837a40 | 54494640 | 54332958 | 65536 | 6811829 | 6811829 | 217978528 | 48069020 | 0 | 0 | 0 | 48 | 626 | 0 | 2099984 | 2100518 | 0 | 2820 | 2097698 | 2100655 | 0 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097777 | 0 | 1130 | 0 | 0 | 1196 | 58659 | 0 | 0 | 1966127 | 0 | 2097796 | 0 | 622491547 | 0 | 12075166510322458 | 12075178026095715 | 12075178030623849 | 12075166515263851 |