60 KiB
60 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | SQ_CYCLES | SQ_BUSY_CYCLES | SQ_WAVES | GRBM_COUNT | GRBM_GUI_ACTIVE | SPI_CSN_WINDOW_VALID | SPI_CSN_BUSY | GRBM_SPI_BUSY | SPI_CSN_NUM_THREADGROUPS | SPI_CSN_WAVE | SPI_RA_REQ_NO_ALLOC | SPI_RA_REQ_NO_ALLOC_CSN | SPI_RA_RES_STALL_CSN | SPI_RA_TMP_STALL_CSN | SPI_RA_WAVE_SIMD_FULL_CSN | SPI_RA_VGPR_SIMD_FULL_CSN | SPI_RA_SGPR_SIMD_FULL_CSN | SPI_RA_LDS_CU_FULL_CSN | SPI_RA_BAR_CU_FULL_CSN | SPI_RA_TGLIM_CU_FULL_CSN | SPI_RA_WVLIM_STALL_CSN | SPI_SWC_CSC_WR | SPI_VWC_CSC_WR | SPI_RA_BULKY_CU_FULL_CSN | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBuffer.kd | 0 | 0 | 0 | 866123 | 866128 | 33554432 | 256 | 0 | 0 | 8 | 32 | 6464 | 0x0 | 0x7f8756c04180 | 4039312 | 3829273 | 524288 | 504913 | 504913 | 4039304 | 3841146 | 480691 | 131072 | 524288 | 243029 | 767317 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3145728 | 524288 | 0 | 12075222642432714 | 12075231812466956 | 12075231812792233 | 12075222885713539 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 866123 | 866128 | 32768 | 256 | 0 | 0 | 24 | 24 | 12480 | 0x0 | 0x7f8756c35100 | 223584 | 76405 | 512 | 27947 | 27947 | 223576 | 88027 | 12078 | 128 | 512 | 116 | 628 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2560 | 512 | 0 | 12075222900007172 | 12075231827875508 | 12075231827882068 | 12075222900344198 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 4 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 12928 | 0x7f8886c24900 | 0x7f8756c35140 | 1737952 | 1566380 | 65536 | 217243 | 217243 | 1737944 | 1578107 | 198642 | 16384 | 65536 | 231405 | 241240 | 27397 | 0 | 772162 | 774953 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222900410802 | 12075231827930868 | 12075231828062387 | 12075222900772574 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 6 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 36 | 24 | 13632 | 0x7f8886c24800 | 0x7f8756c35180 | 3146704 | 2981171 | 65536 | 393337 | 393337 | 3146696 | 2992895 | 375271 | 16384 | 65536 | 528762 | 430769 | 135214 | 0 | 0 | 5867132 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222900848806 | 12075231828111826 | 12075231828353744 | 12075222901361729 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 8 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 14080 | 0x7f8886c24700 | 0x7f8756c351c0 | 3199712 | 3022728 | 65536 | 399963 | 399963 | 3199704 | 3034457 | 366698 | 16384 | 65536 | 399625 | 444809 | 5877 | 0 | 0 | 304808 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222901401243 | 12075231828488623 | 12075231828737101 | 12075222901908525 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 10 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14528 | 0x7f8886c24600 | 0x7f8756c35200 | 1733392 | 1566405 | 65536 | 216673 | 216673 | 1733384 | 1578139 | 203119 | 16384 | 65536 | 162097 | 195287 | 39431 | 0 | 628827 | 630825 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222901946756 | 12075231828870700 | 12075231828995499 | 12075222902268484 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 12 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14976 | 0x7f8886c24500 | 0x7f8756c35240 | 1729376 | 1557822 | 65536 | 216171 | 216171 | 1729368 | 1569553 | 200297 | 16384 | 65536 | 161350 | 194785 | 32467 | 0 | 618892 | 620950 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222902327955 | 12075231829024459 | 12075231829149738 | 12075222902645435 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 14 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 15424 | 0x7f8886c24400 | 0x7f8756c35280 | 1728848 | 1560722 | 65536 | 216105 | 216105 | 1728840 | 1572457 | 196453 | 16384 | 65536 | 184008 | 219813 | 18907 | 0 | 286967 | 287984 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222902731855 | 12075231829208297 | 12075231829332136 | 12075222903046670 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 16 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16128 | 0x7f8886c24300 | 0x7f8756c352c0 | 3185632 | 3000889 | 65536 | 398203 | 398203 | 3185624 | 3012615 | 378217 | 16384 | 65536 | 326153 | 381886 | 13778 | 0 | 0 | 247613 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222903106161 | 12075231829361896 | 12075231829612774 | 12075222903577667 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 18 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16832 | 0x7f8886c24a00 | 0x7f8756c35300 | 3284432 | 3088756 | 65536 | 410553 | 410553 | 3284424 | 3100481 | 371177 | 16384 | 65536 | 250855 | 313858 | 2167 | 0 | 0 | 57653 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222903615838 | 12075231829742053 | 12075231829990211 | 12075222904129433 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 20 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 17536 | 0x7f8886c24900 | 0x7f8756c35340 | 1736736 | 1555166 | 65536 | 217091 | 217091 | 1736728 | 1568671 | 196965 | 16384 | 65536 | 181630 | 220902 | 36103 | 0 | 396292 | 397624 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222904154850 | 12075231830120930 | 12075231830245409 | 12075222904497787 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 22 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18240 | 0x7f8886c24800 | 0x7f8756c35380 | 1751248 | 1570387 | 65536 | 218905 | 218905 | 1751240 | 1583891 | 204791 | 16384 | 65536 | 191124 | 221430 | 25250 | 0 | 428302 | 429812 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222904554563 | 12075231830274208 | 12075231830399327 | 12075222904870269 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 24 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18944 | 0x7f8886c24700 | 0x7f8756c353c0 | 1729696 | 1553530 | 65536 | 216211 | 216211 | 1729688 | 1565269 | 196984 | 16384 | 65536 | 185224 | 227646 | 15372 | 0 | 327775 | 328931 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222904940540 | 12075231830442047 | 12075231830566526 | 12075222905259633 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 26 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 19904 | 0x7f8886c24600 | 0x7f8756c35400 | 3172816 | 2992550 | 65536 | 396601 | 396601 | 3172808 | 3004281 | 377704 | 16384 | 65536 | 340464 | 392975 | 20238 | 0 | 0 | 316056 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222905322460 | 12075231830598206 | 12075231830845884 | 12075222905817249 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 28 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 20608 | 0x7f8886c24500 | 0x7f8756c35440 | 3195680 | 3020860 | 65536 | 399459 | 399459 | 3195672 | 3032585 | 380137 | 16384 | 65536 | 285408 | 346062 | 1456 | 0 | 0 | 74917 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222905847075 | 12075231830979002 | 12075231831227640 | 12075222906367852 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 30 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 21312 | 0x7f8886c24400 | 0x7f8756c35480 | 1711440 | 1547359 | 65536 | 213929 | 213929 | 1711432 | 1560867 | 196478 | 16384 | 65536 | 137747 | 201009 | 18702 | 0 | 295614 | 296594 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222906396526 | 12075231831319480 | 12075231831444119 | 12075222906719957 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 32 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22016 | 0x7f8886c24300 | 0x7f8756c354c0 | 1733280 | 1558640 | 65536 | 216659 | 216659 | 1733272 | 1570371 | 195183 | 16384 | 65536 | 137204 | 195156 | 12010 | 0 | 429251 | 430599 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222906777113 | 12075231831477558 | 12075231831603157 | 12075222907095274 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 34 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22720 | 0x7f8886c24a00 | 0x7f8756c35500 | 1759312 | 1585601 | 65536 | 219913 | 219913 | 1759304 | 1597331 | 193635 | 16384 | 65536 | 161813 | 211109 | 12520 | 0 | 285906 | 286814 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222907167398 | 12075231831644757 | 12075231831769716 | 12075222907480210 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 36 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 23680 | 0x7f8886c24900 | 0x7f8756c35540 | 3224480 | 3047169 | 65536 | 403059 | 403059 | 3224472 | 3058901 | 375281 | 16384 | 65536 | 279720 | 333466 | 24274 | 0 | 0 | 1033588 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222907537306 | 12075231831801556 | 12075231832045714 | 12075222908037395 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 38 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 24384 | 0x7f8886c24800 | 0x7f8756c35580 | 3191952 | 3027937 | 65536 | 398993 | 398993 | 3191944 | 3039667 | 379623 | 16384 | 65536 | 253662 | 316475 | 1379 | 0 | 0 | 87256 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222908063954 | 12075231832131473 | 12075231832375471 | 12075222908585554 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 40 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25088 | 0x7f8886c24700 | 0x7f8756c355c0 | 1764192 | 1590680 | 65536 | 220523 | 220523 | 1764184 | 1602411 | 194794 | 16384 | 65536 | 162221 | 215046 | 5789 | 0 | 376138 | 377391 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222908614828 | 12075231832460430 | 12075231832585069 | 12075222908939952 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 42 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25792 | 0x7f8886c24600 | 0x7f8756c35600 | 1676496 | 1503584 | 65536 | 209561 | 209561 | 1676488 | 1515309 | 194020 | 16384 | 65536 | 127985 | 192945 | 359 | 0 | 27739 | 27903 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222908995896 | 12075231832616909 | 12075231832742668 | 12075222909316953 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 44 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 26496 | 0x7f8886c24500 | 0x7f8756c35640 | 1787168 | 1613780 | 65536 | 223395 | 223395 | 1787160 | 1625517 | 194921 | 16384 | 65536 | 155670 | 219945 | 16949 | 0 | 14033 | 14093 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222909387203 | 12075231832784268 | 12075231832908747 | 12075222909701407 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 46 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 27712 | 0x7f8886c24400 | 0x7f8756c35680 | 3283920 | 3107787 | 65536 | 410489 | 410489 | 3283912 | 3119517 | 392866 | 16384 | 65536 | 281644 | 335637 | 20435 | 0 | 0 | 575815 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222909757241 | 12075231832940426 | 12075231833192424 | 12075222910270765 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 48 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 28416 | 0x7f8886c24300 | 0x7f8756c356c0 | 3704800 | 3053285 | 65536 | 463099 | 463099 | 3704792 | 3074961 | 384588 | 16384 | 65536 | 263782 | 326766 | 3452 | 0 | 0 | 127327 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222910295892 | 12075231833279784 | 12075231833525541 | 12075222910855332 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 50 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 29120 | 0x7f8886c24a00 | 0x7f8756c35700 | 1716816 | 1542657 | 65536 | 214601 | 214601 | 1716808 | 1554387 | 195172 | 16384 | 65536 | 146309 | 200916 | 1838 | 0 | 63283 | 63564 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222910881290 | 12075231833659140 | 12075231833783779 | 12075222911222985 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 52 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30080 | 0x7f8886c24900 | 0x7f8756c35740 | 1726560 | 1546020 | 65536 | 215819 | 215819 | 1726552 | 1557747 | 194918 | 16384 | 65536 | 128114 | 192987 | 0 | 0 | 1966 | 1983 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222911282165 | 12075231833815139 | 12075231833940098 | 12075222911597120 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 54 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30784 | 0x7f8886c24800 | 0x7f8756c35780 | 1750608 | 1584859 | 65536 | 218825 | 218825 | 1750600 | 1596589 | 198767 | 16384 | 65536 | 186376 | 234962 | 16375 | 0 | 156635 | 157316 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222911666469 | 12075231833981058 | 12075231834105857 | 12075222911975574 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 56 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32000 | 0x7f8886c24700 | 0x7f8756c357c0 | 3242976 | 3064401 | 65536 | 405371 | 405371 | 3242968 | 3076131 | 374505 | 16384 | 65536 | 328150 | 373992 | 18958 | 0 | 0 | 317702 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222912033551 | 12075231834137216 | 12075231834385534 | 12075222912544010 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 58 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32960 | 0x7f8886c24600 | 0x7f8756c35800 | 3231120 | 3047395 | 65536 | 403889 | 403889 | 3231112 | 3059123 | 372205 | 16384 | 65536 | 269670 | 330307 | 2043 | 0 | 0 | 33916 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222912569918 | 12075231834519293 | 12075231834762971 | 12075222913082190 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 60 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 33920 | 0x7f8886c24500 | 0x7f8756c35840 | 1754080 | 1583095 | 65536 | 219259 | 219259 | 1754072 | 1594821 | 195561 | 16384 | 65536 | 145867 | 207260 | 1615 | 0 | 117486 | 117991 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222913108269 | 12075231834893850 | 12075231835018169 | 12075222913442650 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 62 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 34880 | 0x7f8886c24400 | 0x7f8756c35880 | 1756752 | 1583188 | 65536 | 219593 | 219593 | 1756744 | 1594917 | 200040 | 16384 | 65536 | 135782 | 201106 | 4634 | 0 | 481 | 495 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222913501139 | 12075231835049849 | 12075231835174968 | 12075222913817406 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 64 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 35840 | 0x7f8886c24300 | 0x7f8756c358c0 | 1747808 | 1570552 | 65536 | 218475 | 218475 | 1747800 | 1582281 | 193868 | 16384 | 65536 | 146542 | 208992 | 116 | 0 | 239627 | 240599 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222913886625 | 12075231835216888 | 12075231835341527 | 12075222914207942 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 66 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 37312 | 0x7f8886c24a00 | 0x7f8756c35900 | 3104016 | 2934205 | 65536 | 388001 | 388001 | 3104008 | 2945937 | 378733 | 16384 | 65536 | 266298 | 317784 | 10368 | 0 | 0 | 465412 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222914268124 | 12075231835372886 | 12075231835622484 | 12075222914753286 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 68 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 38272 | 0x7f8886c24900 | 0x7f8756c35940 | 3733408 | 3054180 | 65536 | 466675 | 466675 | 3733400 | 3072229 | 371306 | 16384 | 65536 | 322039 | 385374 | 5553 | 0 | 0 | 134182 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222914781408 | 12075231835717523 | 12075231835963921 | 12075222915335187 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 70 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 39232 | 0x7f8886c24800 | 0x7f8756c35980 | 1765840 | 1581332 | 65536 | 220729 | 220729 | 1765832 | 1593057 | 194287 | 16384 | 65536 | 142329 | 207754 | 259 | 0 | 65394 | 65708 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222915360494 | 12075231836053361 | 12075231836178480 | 12075222915690698 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 72 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 40192 | 0x7f8886c24700 | 0x7f8756c359c0 | 1745312 | 1556083 | 65536 | 218163 | 218163 | 1745304 | 1567807 | 196074 | 16384 | 65536 | 132546 | 195378 | 57 | 0 | 6504 | 6536 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222915750869 | 12075231836210159 | 12075231836335758 | 12075222916057309 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 74 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 41152 | 0x7f8886c24600 | 0x7f8756c35a00 | 1747664 | 1576424 | 65536 | 218457 | 218457 | 1747656 | 1588151 | 198250 | 16384 | 65536 | 167754 | 233124 | 2873 | 0 | 246555 | 247586 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222916127319 | 12075231836376238 | 12075231836500557 | 12075222916437195 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 76 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 42624 | 0x7f8886c24500 | 0x7f8756c35a40 | 3203040 | 3023844 | 65536 | 400379 | 400379 | 3203032 | 3035579 | 384205 | 16384 | 65536 | 268952 | 317242 | 50333 | 0 | 0 | 309609 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222916493630 | 12075231836530637 | 12075231836815754 | 12075222916993869 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 78 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 43584 | 0x7f8886c24400 | 0x7f8756c35a80 | 3612496 | 2988580 | 65536 | 451561 | 451561 | 3612488 | 3006625 | 374251 | 16384 | 65536 | 318656 | 380562 | 3957 | 0 | 0 | 72400 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222917025558 | 12075231836858794 | 12075231837105512 | 12075222917578917 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 80 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 44544 | 0x7f8886c24300 | 0x7f8756c35ac0 | 1705504 | 1524205 | 65536 | 213187 | 213187 | 1705496 | 1535931 | 197613 | 16384 | 65536 | 160673 | 220199 | 2196 | 0 | 98361 | 98757 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222917607089 | 12075231837155112 | 12075231837279911 | 12075222917925691 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 82 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 45760 | 0x7f8886c24a00 | 0x7f8756c35b00 | 1782096 | 1604098 | 65536 | 222761 | 222761 | 1782088 | 1615837 | 200296 | 16384 | 65536 | 148888 | 214004 | 1974 | 0 | 31741 | 31894 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222917981976 | 12075231837311750 | 12075231837438149 | 12075222918307190 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 84 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 46720 | 0x7f8886c24900 | 0x7f8756c35b40 | 1680352 | 1501450 | 65536 | 210043 | 210043 | 1680344 | 1513177 | 195946 | 16384 | 65536 | 156454 | 210660 | 634 | 0 | 22322 | 22449 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222918376529 | 12075231837479429 | 12075231837603588 | 12075222918678861 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 86 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 48448 | 0x7f8886c24800 | 0x7f8756c35b80 | 3290448 | 3121852 | 65536 | 411305 | 411305 | 3290440 | 3133585 | 386152 | 16384 | 65536 | 395271 | 368378 | 90290 | 0 | 0 | 2648170 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222918735426 | 12075231837635908 | 12075231837889346 | 12075222919241556 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 88 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 49408 | 0x7f8886c24700 | 0x7f8756c35bc0 | 3233056 | 3061863 | 65536 | 404131 | 404131 | 3233048 | 3073587 | 381290 | 16384 | 65536 | 254115 | 317458 | 1852 | 0 | 0 | 23337 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222919268096 | 12075231837932065 | 12075231838177503 | 12075222919781039 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 90 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 50368 | 0x7f8886c24600 | 0x7f8756c35c00 | 1679056 | 1507505 | 65536 | 209881 | 209881 | 1679048 | 1519239 | 196720 | 16384 | 65536 | 142324 | 204142 | 10292 | 0 | 65261 | 65614 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222919806727 | 12075231838225823 | 12075231838349982 | 12075222920124076 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 92 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 51584 | 0x7f8886c24500 | 0x7f8756c35c40 | 1808416 | 1630550 | 65536 | 226051 | 226051 | 1808408 | 1642279 | 199021 | 16384 | 65536 | 144706 | 206519 | 8557 | 0 | 281380 | 282432 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222920185841 | 12075231838379901 | 12075231838506460 | 12075222920509052 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 94 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 52544 | 0x7f8886c24400 | 0x7f8756c35c80 | 1709392 | 1543712 | 65536 | 213673 | 213673 | 1709384 | 1555443 | 194670 | 16384 | 65536 | 143662 | 204508 | 10912 | 0 | 23568 | 23713 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222920577629 | 12075231838547100 | 12075231838672699 | 12075222920881875 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 96 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 54272 | 0x7f8886c24300 | 0x7f8756c35cc0 | 3345952 | 3173510 | 65536 | 418243 | 418243 | 3345944 | 3185245 | 393075 | 16384 | 65536 | 331492 | 325065 | 137973 | 0 | 0 | 1832694 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222920936967 | 12075231838703899 | 12075231838956377 | 12075222921461963 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 98 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 55488 | 0x7f8886c24a00 | 0x7f8756c35d00 | 3143632 | 2977463 | 65536 | 392953 | 392953 | 3143624 | 2989199 | 382302 | 16384 | 65536 | 274331 | 335261 | 2605 | 0 | 0 | 82912 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222921488081 | 12075231838998776 | 12075231839243254 | 12075222921995274 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 100 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 56704 | 0x7f8886c24900 | 0x7f8756c35d40 | 1703200 | 1536088 | 65536 | 212899 | 212899 | 1703192 | 1547821 | 194410 | 16384 | 65536 | 152800 | 207595 | 1089 | 0 | 4158 | 4175 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222922020250 | 12075231839289174 | 12075231839414773 | 12075222922350243 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 102 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 58176 | 0x7f8886c24800 | 0x7f8756c35d80 | 1852496 | 1643300 | 65536 | 231561 | 231561 | 1852488 | 1656803 | 209201 | 16384 | 65536 | 173601 | 224608 | 17987 | 0 | 562064 | 564308 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222922406638 | 12075231839447413 | 12075231839575732 | 12075222922727995 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 104 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 59392 | 0x7f8886c24700 | 0x7f8756c35dc0 | 1792160 | 1619815 | 65536 | 224019 | 224019 | 1792152 | 1631545 | 196334 | 16384 | 65536 | 136949 | 197331 | 2518 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222922798346 | 12075231839616531 | 12075231839741490 | 12075222923108753 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 106 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 61376 | 0x7f8886c24600 | 0x7f8756c35e00 | 3351184 | 3176443 | 65536 | 418897 | 418897 | 3351176 | 3188173 | 396007 | 16384 | 65536 | 549105 | 480475 | 94442 | 0 | 0 | 2257515 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222923165519 | 12075231839769650 | 12075231840022608 | 12075222923679914 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 108 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 62592 | 0x7f8886c24500 | 0x7f8756c35e40 | 3178464 | 3003543 | 65536 | 397307 | 397307 | 3178456 | 3015275 | 378474 | 16384 | 65536 | 422485 | 480127 | 4931 | 0 | 0 | 119039 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222923706784 | 12075231840068688 | 12075231840317486 | 12075222924222082 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 110 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 63808 | 0x7f8886c24400 | 0x7f8756c35e80 | 1727568 | 1552733 | 65536 | 215945 | 215945 | 1727560 | 1564461 | 192625 | 16384 | 65536 | 160288 | 218527 | 23 | 0 | 10265 | 10327 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222924247439 | 12075231840363245 | 12075231840488524 | 12075222924573846 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 112 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 65280 | 0x7f8886c24300 | 0x7f8756c35ec0 | 1813792 | 1626206 | 65536 | 226723 | 226723 | 1813784 | 1637935 | 204785 | 16384 | 65536 | 196144 | 237163 | 16396 | 0 | 586307 | 588566 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222924630301 | 12075231840522444 | 12075231840651403 | 12075222924949404 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 114 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 66496 | 0x7f8886c24a00 | 0x7f8756c35f00 | 1683216 | 1512105 | 65536 | 210401 | 210401 | 1683208 | 1523841 | 199269 | 16384 | 65536 | 134364 | 199817 | 187 | 0 | 2872 | 2905 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222925025295 | 12075231840693962 | 12075231840818761 | 12075222925328668 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 116 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 68480 | 0x7f8886c24900 | 0x7f8756c35f40 | 3331616 | 3135409 | 65536 | 416451 | 416451 | 3331608 | 3147137 | 381289 | 16384 | 65536 | 352325 | 338843 | 74234 | 0 | 0 | 4479833 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222925385224 | 12075231840848041 | 12075231841109319 | 12075222925896774 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 118 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 69696 | 0x7f8886c24800 | 0x7f8756c35f80 | 3186384 | 3024522 | 65536 | 398297 | 398297 | 3186376 | 3036255 | 379760 | 16384 | 65536 | 245126 | 307308 | 2984 | 0 | 0 | 134839 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222925921049 | 12075231841159559 | 12075231841404037 | 12075222926435806 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 120 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 70912 | 0x7f8886c24700 | 0x7f8756c35fc0 | 1684256 | 1509307 | 65536 | 210531 | 210531 | 1684248 | 1521035 | 197867 | 16384 | 65536 | 150020 | 215461 | 594 | 0 | 26906 | 27075 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222926465681 | 12075231841450596 | 12075231841575235 | 12075222926788020 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 122 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 72384 | 0x7f8886c24600 | 0x7f8756c36000 | 1782992 | 1600077 | 65536 | 222873 | 222873 | 1782984 | 1611807 | 205037 | 16384 | 65536 | 179632 | 236710 | 665 | 0 | 60886 | 61214 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222926846720 | 12075231841607875 | 12075231841744034 | 12075222927171243 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 124 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 73600 | 0x7f8886c24500 | 0x7f8756c36040 | 1757408 | 1578801 | 65536 | 219675 | 219675 | 1757400 | 1590537 | 195821 | 16384 | 65536 | 129875 | 194832 | 315 | 0 | 23337 | 23467 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222927242104 | 12075231841785633 | 12075231841910112 | 12075222927547512 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 126 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 75840 | 0x7f8886c24400 | 0x7f8756c36080 | 3354768 | 3146321 | 65536 | 419345 | 419345 | 3354760 | 3158049 | 390887 | 16384 | 65536 | 324053 | 316851 | 145041 | 0 | 0 | 3607118 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222927604438 | 12075231841937952 | 12075231842188670 | 12075222928111790 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 128 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 77056 | 0x7f8886c24300 | 0x7f8756c360c0 | 3159968 | 2985599 | 65536 | 394995 | 394995 | 3159960 | 2997327 | 371178 | 16384 | 65536 | 265412 | 328661 | 3138 | 0 | 0 | 122210 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222928137468 | 12075231842231710 | 12075231842514587 | 12075222928646965 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 130 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 78272 | 0x7f8886c24a00 | 0x7f8756c36100 | 1721808 | 1555409 | 65536 | 215225 | 215225 | 1721800 | 1567131 | 195941 | 16384 | 65536 | 137040 | 201468 | 1925 | 0 | 8788 | 8841 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222928674156 | 12075231842555547 | 12075231842679866 | 12075222928999470 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 132 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 80000 | 0x7f8886c24900 | 0x7f8756c36140 | 1870560 | 1680383 | 65536 | 233819 | 233819 | 1870552 | 1692109 | 215274 | 16384 | 65536 | 211742 | 276537 | 633 | 0 | 17275 | 17358 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222929058400 | 12075231842711386 | 12075231842853785 | 12075222929379807 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 134 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 81216 | 0x7f8886c24800 | 0x7f8756c36180 | 1734416 | 1566260 | 65536 | 216801 | 216801 | 1734408 | 1577987 | 192873 | 16384 | 65536 | 130839 | 196270 | 18 | 0 | 19360 | 19443 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222929449256 | 12075231842894584 | 12075231843019223 | 12075222929752800 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 136 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 83456 | 0x7f8886c24700 | 0x7f8756c361c0 | 3265888 | 3093752 | 65536 | 408235 | 408235 | 3265880 | 3105477 | 388594 | 16384 | 65536 | 337512 | 331362 | 134296 | 0 | 0 | 1969979 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222929809776 | 12075231843053783 | 12075231843309781 | 12075222930316418 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 138 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 84928 | 0x7f8886c24600 | 0x7f8756c36200 | 3233424 | 3062524 | 65536 | 404177 | 404177 | 3233416 | 3074249 | 370383 | 16384 | 65536 | 244994 | 308544 | 7854 | 0 | 0 | 138202 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222930344800 | 12075231843361140 | 12075231843605938 | 12075222930861811 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 140 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 86400 | 0x7f8886c24500 | 0x7f8756c36240 | 1769888 | 1586491 | 65536 | 221235 | 221235 | 1769880 | 1598225 | 203756 | 16384 | 65536 | 118337 | 183847 | 1 | 0 | 4966 | 5023 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222930887579 | 12075231843655538 | 12075231843780817 | 12075222931223383 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 142 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 88128 | 0x7f8886c24400 | 0x7f8756c36280 | 1916496 | 1752885 | 65536 | 239561 | 239561 | 1916488 | 1764613 | 222446 | 16384 | 65536 | 226399 | 291935 | 213 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222931279157 | 12075231843812977 | 12075231843961456 | 12075222931609891 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 144 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 89600 | 0x7f8886c24300 | 0x7f8756c362c0 | 1825952 | 1643803 | 65536 | 228243 | 228243 | 1825944 | 1655539 | 199909 | 16384 | 65536 | 151399 | 214309 | 444 | 0 | 70381 | 70751 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222931693757 | 12075231844022575 | 12075231844148014 | 12075222932006769 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 146 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 92096 | 0x7f8886c24a00 | 0x7f8756c36300 | 3378128 | 3156711 | 65536 | 422265 | 422265 | 3378120 | 3168445 | 389861 | 16384 | 65536 | 435522 | 398838 | 105233 | 0 | 0 | 2134170 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222932063244 | 12075231844177774 | 12075231844433292 | 12075222932583080 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 148 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 93568 | 0x7f8886c24900 | 0x7f8756c36340 | 3170656 | 2991890 | 65536 | 396331 | 396331 | 3170648 | 3003619 | 374505 | 16384 | 65536 | 262603 | 325365 | 12781 | 0 | 0 | 61368 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222932607525 | 12075231844479051 | 12075231844725769 | 12075222933119056 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 150 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 95040 | 0x7f8886c24800 | 0x7f8756c36380 | 1769424 | 1596586 | 65536 | 221177 | 221177 | 1769416 | 1608317 | 198770 | 16384 | 65536 | 136219 | 201585 | 946 | 0 | 120294 | 120681 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222933147188 | 12075231844775529 | 12075231844900808 | 12075222933475569 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 152 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 96768 | 0x7f8886c24700 | 0x7f8756c363c0 | 2018016 | 1848809 | 65536 | 252251 | 252251 | 2018008 | 1860541 | 234097 | 16384 | 65536 | 198847 | 264383 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222933533697 | 12075231844928808 | 12075231845086566 | 12075222933873077 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 154 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 98240 | 0x7f8886c24600 | 0x7f8756c36400 | 1741200 | 1560616 | 65536 | 217649 | 217649 | 1741192 | 1572345 | 199273 | 16384 | 65536 | 149419 | 213393 | 0 | 0 | 817 | 825 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222933941474 | 12075231845130886 | 12075231845256165 | 12075222934252783 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 156 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 100736 | 0x7f8886c24500 | 0x7f8756c36440 | 3264608 | 3085996 | 65536 | 408075 | 408075 | 3264600 | 3097727 | 386024 | 16384 | 65536 | 340973 | 327257 | 137494 | 0 | 0 | 2873721 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222934311873 | 12075231845288005 | 12075231845551522 | 12075222934819246 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 158 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 102208 | 0x7f8886c24400 | 0x7f8756c36480 | 3221328 | 3036103 | 65536 | 402665 | 402665 | 3221320 | 3047837 | 369392 | 16384 | 65536 | 243347 | 306630 | 3403 | 0 | 0 | 231376 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222934843972 | 12075231845593442 | 12075231845839040 | 12075222935338761 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 160 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 103680 | 0x7f8886c24300 | 0x7f8756c364c0 | 1800288 | 1628857 | 65536 | 225035 | 225035 | 1800280 | 1640579 | 208745 | 16384 | 65536 | 139957 | 203912 | 1252 | 0 | 11166 | 11242 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222935378505 | 12075231845884480 | 12075231846010559 | 12075222935690925 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 162 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 105664 | 0x7f8886c24a00 | 0x7f8756c36500 | 2116688 | 1946192 | 65536 | 264585 | 264585 | 2116680 | 1957921 | 245615 | 16384 | 65536 | 141236 | 206772 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222935747030 | 12075231846038878 | 12075231846203997 | 12075222936093413 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 164 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 107136 | 0x7f8886c24900 | 0x7f8756c36540 | 1726304 | 1553907 | 65536 | 215787 | 215787 | 1726296 | 1565639 | 190961 | 16384 | 65536 | 157599 | 222870 | 5837 | 0 | 4457 | 4503 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222936162592 | 12075231846245277 | 12075231846370716 | 12075222936470855 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 166 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 109888 | 0x7f8886c24800 | 0x7f8756c36580 | 3225744 | 3028317 | 65536 | 403217 | 403217 | 3225736 | 3040041 | 383594 | 16384 | 65536 | 396590 | 366167 | 87654 | 0 | 0 | 2208821 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222936527009 | 12075231846402235 | 12075231846656793 | 12075222937015196 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 168 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 111360 | 0x7f8886c24700 | 0x7f8756c365c0 | 3183456 | 2980203 | 65536 | 397931 | 397931 | 3183448 | 2991941 | 369767 | 16384 | 65536 | 316848 | 362990 | 3326 | 0 | 0 | 165087 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222937042617 | 12075231846700793 | 12075231846946711 | 12075222937541765 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 170 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 112832 | 0x7f8886c24600 | 0x7f8756c36600 | 1819920 | 1655288 | 65536 | 227489 | 227489 | 1819912 | 1667031 | 201193 | 16384 | 65536 | 137592 | 201622 | 88 | 0 | 21659 | 21794 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222937566190 | 12075231846992471 | 12075231847118229 | 12075222937896063 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 172 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 114816 | 0x7f8886c24500 | 0x7f8756c36640 | 2206688 | 2045732 | 65536 | 275835 | 275835 | 2206680 | 2057461 | 259054 | 16384 | 65536 | 238988 | 304524 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222937952699 | 12075231847150069 | 12075231847322548 | 12075222938312127 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 174 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 116288 | 0x7f8886c24400 | 0x7f8756c36680 | 1816592 | 1633102 | 65536 | 227073 | 227073 | 1816584 | 1644833 | 193385 | 16384 | 65536 | 143661 | 209190 | 1121 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222938381455 | 12075231847363507 | 12075231847488946 | 12075222938694978 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 176 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 119040 | 0x7f8886c24300 | 0x7f8756c366c0 | 3251872 | 3052392 | 65536 | 406483 | 406483 | 3251864 | 3064117 | 382576 | 16384 | 65536 | 294792 | 294270 | 76962 | 0 | 0 | 2339677 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222938751343 | 12075231847519346 | 12075231847773104 | 12075222939215315 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 178 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 120768 | 0x7f8886c24a00 | 0x7f8756c36700 | 3236496 | 3053428 | 65536 | 404561 | 404561 | 3236488 | 3065163 | 370922 | 16384 | 65536 | 263366 | 303633 | 18332 | 0 | 0 | 60546 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222939267502 | 12075231847820144 | 12075231848064142 | 12075222939747153 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 180 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 122496 | 0x7f8886c24900 | 0x7f8756c36740 | 1836000 | 1659833 | 65536 | 229499 | 229499 | 1835992 | 1673341 | 198000 | 16384 | 65536 | 142462 | 207711 | 117 | 0 | 71626 | 71977 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222939774294 | 12075231848110861 | 12075231848236460 | 12075222940103155 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 182 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 124736 | 0x7f8886c24800 | 0x7f8756c36780 | 2313040 | 2149605 | 65536 | 289129 | 289129 | 2313032 | 2161333 | 271210 | 16384 | 65536 | 258600 | 324136 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222940161564 | 12075231848268620 | 12075231848450859 | 12075222940528225 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 184 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 126464 | 0x7f8886c24700 | 0x7f8756c367c0 | 1791264 | 1619163 | 65536 | 223907 | 223907 | 1791256 | 1630897 | 208495 | 16384 | 65536 | 145325 | 209015 | 2661 | 0 | 29391 | 29538 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222940597574 | 12075231848492138 | 12075231848617897 | 12075222940907610 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 186 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 20 | 24 | 127936 | 0x7f8886c24600 | 0x7f8756c36800 | 3328208 | 3132623 | 65536 | 416025 | 416025 | 3328200 | 3144353 | 391142 | 16384 | 65536 | 377285 | 442821 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222940963734 | 12075231848645897 | 12075231848909255 | 12075222941463353 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 188 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 129664 | 0x7f8886c24500 | 0x7f8756c36840 | 3247840 | 3077256 | 65536 | 405979 | 405979 | 3247832 | 3088973 | 370151 | 16384 | 65536 | 293724 | 352802 | 58360 | 0 | 0 | 109665 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222941488620 | 12075231848954054 | 12075231849203172 | 12075222941984862 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 190 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 131392 | 0x7f8886c24400 | 0x7f8756c36880 | 1823120 | 1654161 | 65536 | 227889 | 227889 | 1823112 | 1665899 | 199146 | 16384 | 65536 | 145304 | 204811 | 1404 | 0 | 26021 | 26143 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222942015198 | 12075231849255652 | 12075231849381251 | 12075222942348898 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 192 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 133632 | 0x7f8886c24300 | 0x7f8756c368c0 | 2416224 | 2255921 | 65536 | 302027 | 302027 | 2416216 | 2267647 | 284592 | 16384 | 65536 | 253679 | 319215 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222942405484 | 12075231849412931 | 12075231849603649 | 12075222942821617 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 194 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 135360 | 0x7f8886c24a00 | 0x7f8756c36900 | 1828944 | 1645084 | 65536 | 228617 | 228617 | 1828936 | 1656811 | 208243 | 16384 | 65536 | 167593 | 230339 | 2194 | 0 | 29603 | 29758 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222942857574 | 12075231849644449 | 12075231849771488 | 12075222943180454 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 196 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 20 | 24 | 136832 | 0x7f8886c24900 | 0x7f8756c36940 | 3495136 | 3325176 | 65536 | 436891 | 436891 | 3495128 | 3336907 | 421353 | 16384 | 65536 | 498420 | 563956 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222943235977 | 12075231849799487 | 12075231850080765 | 12075222943749201 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 198 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 138560 | 0x7f8886c24800 | 0x7f8756c36980 | 3188304 | 3006909 | 65536 | 398537 | 398537 | 3188296 | 3018641 | 369257 | 16384 | 65536 | 312021 | 319335 | 46204 | 0 | 0 | 1381107 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222943775820 | 12075231850123805 | 12075231850371803 | 12075222944270209 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 200 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 140288 | 0x7f8886c24700 | 0x7f8756c369c0 | 1823264 | 1637059 | 65536 | 227907 | 227907 | 1823256 | 1648787 | 202473 | 16384 | 65536 | 155683 | 220537 | 18317 | 0 | 170545 | 171302 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222944296327 | 12075231850414362 | 12075231850539961 | 12075222944629196 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 202 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 142784 | 0x7f8886c24600 | 0x7f8756c36a00 | 2638800 | 2477856 | 65536 | 329849 | 329849 | 2638792 | 2489595 | 312623 | 16384 | 65536 | 285753 | 351289 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222944685681 | 12075231850572761 | 12075231850782839 | 12075222945119958 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 204 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 144768 | 0x7f8886c24500 | 0x7f8756c36a40 | 1795680 | 1614098 | 65536 | 224459 | 224459 | 1795672 | 1625827 | 204569 | 16384 | 65536 | 150891 | 214321 | 8511 | 0 | 103012 | 103465 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222945156526 | 12075231850837399 | 12075231850964918 | 12075222945472253 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 206 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 20 | 24 | 145728 | 0x7f8886c24400 | 0x7f8756c36a80 | 3776784 | 3609242 | 65536 | 472097 | 472097 | 3776776 | 3620975 | 452205 | 16384 | 65536 | 359864 | 425400 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222945527375 | 12075231850992918 | 12075231851299475 | 12075222946063131 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 208 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 147712 | 0x7f8886c24300 | 0x7f8756c36ac0 | 3224416 | 3041686 | 65536 | 403051 | 403051 | 3224408 | 3053417 | 378985 | 16384 | 65536 | 409641 | 464220 | 79404 | 0 | 0 | 98968 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222946088518 | 12075231851344595 | 12075231851594353 | 12075222946587125 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 210 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 149696 | 0x7f8886c24a00 | 0x7f8756c36b00 | 1810064 | 1638898 | 65536 | 226257 | 226257 | 1810056 | 1650623 | 197224 | 16384 | 65536 | 135806 | 195195 | 10173 | 0 | 229194 | 230011 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222946613754 | 12075231851634992 | 12075231851762671 | 12075222946946893 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 212 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 152192 | 0x7f8886c24900 | 0x7f8756c36b40 | 2856224 | 2687711 | 65536 | 357027 | 357027 | 2856216 | 2701207 | 338540 | 16384 | 65536 | 298131 | 363667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222947011062 | 12075231851790671 | 12075231852018669 | 12075222947476067 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 214 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 154176 | 0x7f8886c24800 | 0x7f8756c36b80 | 1835024 | 1657864 | 65536 | 229377 | 229377 | 1835016 | 1669599 | 205554 | 16384 | 65536 | 146696 | 201094 | 19683 | 0 | 262324 | 263366 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222947512745 | 12075231852079469 | 12075231852207628 | 12075222947830826 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 216 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 20 | 24 | 155648 | 0x7f8886c24700 | 0x7f8756c36bc0 | 4036064 | 3863806 | 65536 | 504507 | 504507 | 4036056 | 3875535 | 485354 | 16384 | 65536 | 300250 | 365786 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222947886028 | 12075231852235627 | 12075231852563785 | 12075222948448123 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 218 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 157632 | 0x7f8886c24600 | 0x7f8756c36c00 | 3253136 | 3044108 | 65536 | 406641 | 406641 | 3253128 | 3055843 | 381928 | 16384 | 65536 | 310402 | 304848 | 60080 | 0 | 0 | 2744120 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222948473119 | 12075231852608744 | 12075231852857702 | 12075222948976044 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 220 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 159616 | 0x7f8886c24500 | 0x7f8756c36c40 | 1823776 | 1632886 | 65536 | 227971 | 227971 | 1823768 | 1644625 | 207209 | 16384 | 65536 | 157858 | 202457 | 15847 | 0 | 331605 | 333052 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222949005539 | 12075231852902982 | 12075231853030501 | 12075222949339259 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 222 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 162368 | 0x7f8886c24400 | 0x7f8756c36c80 | 3063120 | 2901121 | 65536 | 382889 | 382889 | 3063112 | 2912853 | 365168 | 16384 | 65536 | 252981 | 318517 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222949401394 | 12075231853062181 | 12075231853307298 | 12075222949866178 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 224 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 164608 | 0x7f8886c24300 | 0x7f8756c36cc0 | 1847008 | 1662836 | 65536 | 230875 | 230875 | 1847000 | 1674571 | 206692 | 16384 | 65536 | 152469 | 199255 | 6044 | 0 | 213618 | 214650 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222949901844 | 12075231853369698 | 12075231853498977 | 12075222950228381 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 226 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 20 | 24 | 166336 | 0x7f8886c24a00 | 0x7f8756c36d00 | 4595664 | 4429370 | 65536 | 574457 | 574457 | 4595656 | 4442873 | 556140 | 16384 | 65536 | 345230 | 410766 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222950289094 | 12075231853527137 | 12075231853902014 | 12075222950910128 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 228 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 168576 | 0x7f8886c24900 | 0x7f8756c36d40 | 3294048 | 3111696 | 65536 | 411755 | 411755 | 3294040 | 3123429 | 381162 | 16384 | 65536 | 307452 | 302228 | 67370 | 0 | 0 | 4375444 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222950935485 | 12075231853944573 | 12075231854204091 | 12075222951447998 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 230 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 170816 | 0x7f8886c24800 | 0x7f8756c36d80 | 1835344 | 1660375 | 65536 | 229417 | 229417 | 1835336 | 1672101 | 212070 | 16384 | 65536 | 179403 | 227918 | 34233 | 0 | 518466 | 520766 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222951475449 | 12075231854254171 | 12075231854383770 | 12075222951806123 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 232 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 174080 | 0x7f8886c24700 | 0x7f8756c36dc0 | 3472032 | 3301486 | 65536 | 434003 | 434003 | 3472024 | 3313217 | 416739 | 16384 | 65536 | 327104 | 392640 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222951860224 | 12075231854412729 | 12075231854690327 | 12075222952376042 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 234 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 176576 | 0x7f8886c24600 | 0x7f8756c36e00 | 1759888 | 1582875 | 65536 | 219985 | 219985 | 1759880 | 1594613 | 210229 | 16384 | 65536 | 193376 | 257146 | 5836 | 0 | 38450 | 38685 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222952414474 | 12075231854749207 | 12075231854884245 | 12075222952726113 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 236 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 20 | 24 | 177792 | 0x7f8886c24500 | 0x7f8756c36e40 | 5167776 | 4997911 | 65536 | 645971 | 645971 | 5167768 | 5009643 | 625517 | 16384 | 65536 | 523147 | 588683 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222952781596 | 12075231854912565 | 12075231855334642 | 12075222953433648 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 238 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 180288 | 0x7f8886c24400 | 0x7f8756c36e80 | 3213648 | 3040285 | 65536 | 401705 | 401705 | 3213640 | 3052017 | 384233 | 16384 | 65536 | 578195 | 492042 | 80700 | 0 | 0 | 4105730 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222953459376 | 12075231855380081 | 12075231855637199 | 12075222953955578 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 240 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 182784 | 0x7f8886c24300 | 0x7f8756c36ec0 | 1827296 | 1647708 | 65536 | 228411 | 228411 | 1827288 | 1659433 | 205426 | 16384 | 65536 | 151825 | 217139 | 304 | 0 | 984 | 999 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222953982879 | 12075231855678319 | 12075231855814158 | 12075222954319234 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 242 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 186304 | 0x7f8886c24a00 | 0x7f8756c36f00 | 4241680 | 3727829 | 65536 | 530209 | 530209 | 4241672 | 3747473 | 469361 | 16384 | 65536 | 322654 | 388190 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222954381569 | 12075231855844878 | 12075231856159115 | 12075222954958732 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 244 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 189312 | 0x7f8886c24900 | 0x7f8756c36f40 | 1981216 | 1801832 | 65536 | 247651 | 247651 | 1981208 | 1813561 | 225386 | 16384 | 65536 | 179118 | 244654 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222954994258 | 12075231856216235 | 12075231856367753 | 12075222955338608 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 246 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 20 | 24 | 190784 | 0x7f8886c24800 | 0x7f8756c36f80 | 6277328 | 6112073 | 65536 | 784665 | 784665 | 6277320 | 6123801 | 765933 | 16384 | 65536 | 364576 | 430112 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222955396496 | 12075231856399273 | 12075231856914629 | 12075222956141069 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 248 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 193792 | 0x7f8886c24700 | 0x7f8756c36fc0 | 3667168 | 3487457 | 65536 | 458395 | 458395 | 3667160 | 3499193 | 438442 | 16384 | 65536 | 628012 | 512404 | 162526 | 0 | 0 | 3944100 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222956174511 | 12075231856959428 | 12075231857255586 | 12075222956711980 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 250 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 196800 | 0x7f8886c24600 | 0x7f8756c37000 | 1973648 | 1808770 | 65536 | 246705 | 246705 | 1973640 | 1820503 | 227955 | 16384 | 65536 | 214238 | 279774 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222956745392 | 12075231857307426 | 12075231857461344 | 12075222957085344 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 252 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 201088 | 0x7f8886c24500 | 0x7f8756c37040 | 4756256 | 4582735 | 65536 | 594531 | 594531 | 4756248 | 4594473 | 575548 | 16384 | 65536 | 308056 | 373592 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222957137421 | 12075231857489344 | 12075231857873661 | 12075222957769596 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 254 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 204608 | 0x7f8886c24400 | 0x7f8756c37080 | 2225360 | 2061299 | 65536 | 278169 | 278169 | 2225352 | 2073029 | 261104 | 16384 | 65536 | 164892 | 230428 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222957809009 | 12075231857933180 | 12075231858109019 | 12075222958160672 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 256 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 20 | 24 | 206080 | 0x7f8886c24300 | 0x7f8756c370c0 | 7407200 | 7241710 | 65536 | 925899 | 925899 | 7407192 | 7253441 | 908773 | 16384 | 65536 | 372491 | 438027 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222958218790 | 12075231858138619 | 12075231858760214 | 12075222959061226 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 258 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 209600 | 0x7f8886c24a00 | 0x7f8756c37100 | 4010704 | 3838047 | 65536 | 501337 | 501337 | 4010696 | 3849779 | 481260 | 16384 | 65536 | 526086 | 545520 | 41239 | 0 | 0 | 876997 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222959091402 | 12075231858806613 | 12075231859132050 | 12075222959654248 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 260 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 213120 | 0x7f8886c24900 | 0x7f8756c37140 | 2262304 | 2087096 | 65536 | 282787 | 282787 | 2262296 | 2098829 | 266084 | 16384 | 65536 | 187294 | 252830 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222959679725 | 12075231859184210 | 12075231859361169 | 12075222960048951 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 262 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 218176 | 0x7f8886c24800 | 0x7f8756c37180 | 5574288 | 5410201 | 65536 | 696785 | 696785 | 5574280 | 5421931 | 681197 | 16384 | 65536 | 264235 | 329771 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222960107951 | 12075231859389168 | 12075231859844365 | 12075222960784478 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 264 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 222208 | 0x7f8886c24700 | 0x7f8756c371c0 | 2521696 | 2350125 | 65536 | 315211 | 315211 | 2521688 | 2361853 | 295017 | 16384 | 65536 | 259803 | 325339 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222960824853 | 12075231859900364 | 12075231860099883 | 12075222961243381 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 266 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 20 | 24 | 223936 | 0x7f8886c24600 | 0x7f8756c37200 | 8518352 | 8350851 | 65536 | 1064793 | 1064793 | 8518344 | 8362579 | 1047277 | 16384 | 65536 | 397048 | 462584 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222961268658 | 12075231860145322 | 12075231860853956 | 12075222962230706 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 268 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 227968 | 0x7f8886c24500 | 0x7f8756c37240 | 4560992 | 4392419 | 65536 | 570123 | 570123 | 4560984 | 4404143 | 548850 | 16384 | 65536 | 466752 | 532283 | 2064 | 0 | 0 | 114605 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222962256504 | 12075231860898756 | 12075231861272353 | 12075222962886535 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 270 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 232000 | 0x7f8886c24400 | 0x7f8756c37280 | 2582416 | 2393559 | 65536 | 322801 | 322801 | 2582408 | 2405291 | 299753 | 16384 | 65536 | 214134 | 279670 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222962914627 | 12075231861323072 | 12075231861525311 | 12075222963383168 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 272 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 237824 | 0x7f8886c24300 | 0x7f8756c372c0 | 6433184 | 6265123 | 65536 | 804147 | 804147 | 6433176 | 6276851 | 785711 | 16384 | 65536 | 393368 | 458904 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222963409336 | 12075231861568830 | 12075231862094586 | 12075222964194375 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 274 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 242368 | 0x7f8886c24a00 | 0x7f8756c37300 | 2788304 | 2627891 | 65536 | 348537 | 348537 | 2788296 | 2639613 | 330733 | 16384 | 65536 | 281463 | 346999 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222964234039 | 12075231862156346 | 12075231862379224 | 12075222964696078 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 276 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 20 | 24 | 243584 | 0x7f8886c24900 | 0x7f8756c37340 | 9657376 | 9492207 | 65536 | 1207171 | 1207171 | 9657368 | 9503939 | 1189232 | 16384 | 65536 | 438248 | 503784 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222964723829 | 12075231862428343 | 12075231863225457 | 12075222965788027 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 278 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 248128 | 0x7f8886c24800 | 0x7f8756c37380 | 5109712 | 4940722 | 65536 | 638713 | 638713 | 5109704 | 4952451 | 618085 | 16384 | 65536 | 601612 | 666467 | 95 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222965823352 | 12075231863273136 | 12075231863693133 | 12075222966499028 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 280 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 252672 | 0x7f8886c24700 | 0x7f8756c373c0 | 2830752 | 2660947 | 65536 | 353843 | 353843 | 2830744 | 2672679 | 337640 | 16384 | 65536 | 245260 | 310796 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222966526870 | 12075231863738573 | 12075231863963211 | 12075222966979130 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 282 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 259264 | 0x7f8886c24600 | 0x7f8756c37400 | 7271056 | 7106109 | 65536 | 908881 | 908881 | 7271048 | 7117843 | 892147 | 16384 | 65536 | 439489 | 505025 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222967012993 | 12075231864015690 | 12075231864611045 | 12075222967862492 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 284 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 264832 | 0x7f8886c24500 | 0x7f8756c37440 | 3339168 | 3169446 | 65536 | 417395 | 417395 | 3339160 | 3181169 | 401137 | 16384 | 65536 | 314231 | 379767 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222967897777 | 12075231864674405 | 12075231864942883 | 12075222968410600 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 286 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 20 | 24 | 266304 | 0x7f8886c24400 | 0x7f8756c37480 | 11897296 | 11731497 | 65536 | 1487161 | 1487161 | 11897288 | 11743223 | 1470188 | 16384 | 65536 | 242958 | 308494 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222968434815 | 12075231864989122 | 12075231865972634 | 12075222969681763 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 288 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 28 | 24 | 271872 | 0x7f8886c24300 | 0x7f8756c374c0 | 6233184 | 6059778 | 65536 | 779147 | 779147 | 6233176 | 6071505 | 759803 | 16384 | 65536 | 859275 | 924811 | 52 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222969714904 | 12075231866023834 | 12075231866555829 | 12075222970489494 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 290 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 277440 | 0x7f8886c24a00 | 0x7f8756c37500 | 3387536 | 3223468 | 65536 | 423441 | 423441 | 3387528 | 3235203 | 404710 | 16384 | 65536 | 236746 | 302282 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222970515472 | 12075231866602549 | 12075231866873427 | 12075222971051628 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 292 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 12 | 24 | 279936 | 0x7f8886c24900 | 0x7f8756c37540 | 9027680 | 8854188 | 65536 | 1128459 | 1128459 | 9027672 | 8865915 | 1107438 | 16384 | 65536 | 316892 | 382428 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222971077507 | 12075231866921266 | 12075231867658060 | 12075222972071774 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 294 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 286528 | 0x7f8886c24800 | 0x7f8756c37580 | 4259024 | 3745930 | 65536 | 532377 | 532377 | 4259016 | 3763981 | 470758 | 16384 | 65536 | 368773 | 434309 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222972110566 | 12075231867721420 | 12075231868036937 | 12075222972697257 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 296 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 20 | 24 | 288000 | 0x7f8886c24700 | 0x7f8756c375c0 | 14153760 | 13977726 | 65536 | 1769219 | 1769219 | 14153752 | 13989457 | 1750708 | 16384 | 65536 | 348211 | 413747 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222972728715 | 12075231868086217 | 12075231869255167 | 12075222974157069 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 298 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 32 | 24 | 294592 | 0x7f8886c24600 | 0x7f8756c37600 | 7381904 | 7208857 | 65536 | 922737 | 922737 | 7381896 | 7220583 | 904947 | 16384 | 65536 | 1628127 | 1015602 | 692932 | 0 | 0 | 36746998 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222974188558 | 12075231869301407 | 12075231869912442 | 12075222975059246 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 300 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 301184 | 0x7f8886c24500 | 0x7f8756c37640 | 3944160 | 3762722 | 65536 | 493019 | 493019 | 3944152 | 3774439 | 475632 | 16384 | 65536 | 314385 | 379921 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222975085365 | 12075231869960441 | 12075231870278359 | 12075222975661125 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 302 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 12 | 24 | 304960 | 0x7f8886c24400 | 0x7f8756c37680 | 10694672 | 10526837 | 65536 | 1336833 | 1336833 | 10694664 | 10538563 | 1318867 | 16384 | 65536 | 362429 | 427965 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222975686241 | 12075231870334358 | 12075231871211471 | 12075222976827452 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 304 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 24 | 24 | 313600 | 0x7f8886c24300 | 0x7f8756c376c0 | 5038240 | 4860357 | 65536 | 629779 | 629779 | 5038232 | 4872089 | 612715 | 16384 | 65536 | 412075 | 477611 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222976862427 | 12075231871273870 | 12075231871684107 | 12075222977523656 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 306 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 20 | 24 | 314816 | 0x7f8886c24a00 | 0x7f8756c37700 | 18652624 | 18488461 | 65536 | 2331577 | 2331577 | 18652616 | 18500201 | 2314477 | 16384 | 65536 | 285956 | 351492 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222977552459 | 12075231871732107 | 12075231873279454 | 12075222979351543 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 308 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 20 | 24 | 317568 | 0x7f8886c24900 | 0x7f8756c37740 | 9628512 | 9448099 | 65536 | 1203563 | 1203563 | 9628504 | 9459825 | 1182565 | 16384 | 65536 | 299614 | 365150 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222979383763 | 12075231873347773 | 12075231874140247 | 12075222980438643 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 310 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 12 | 24 | 320320 | 0x7f8886c24800 | 0x7f8756c37780 | 5123280 | 4960568 | 65536 | 640409 | 640409 | 5123272 | 4974077 | 620913 | 16384 | 65536 | 311459 | 376995 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222980471164 | 12075231874229686 | 12075231874643283 | 12075222981142070 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 312 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 12 | 24 | 324096 | 0x7f8886c24700 | 0x7f8756c377c0 | 14077728 | 13901011 | 65536 | 1759715 | 1759715 | 14077720 | 13912745 | 1739887 | 16384 | 65536 | 346943 | 412479 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222981175422 | 12075231874733218 | 12075231875891610 | 12075222982591263 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 314 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 12 | 24 | 328896 | 0x7f8886c24600 | 0x7f8756c37800 | 9599952 | 9427286 | 65536 | 1199993 | 1199993 | 9599944 | 9439017 | 1180657 | 16384 | 65536 | 365118 | 430654 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222982637059 | 12075231875999768 | 12075231876784563 | 12075222983667594 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 316 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 20 | 24 | 330112 | 0x7f8886c24500 | 0x7f8756c37840 | 36683936 | 36509927 | 65536 | 4585491 | 4585491 | 36683928 | 36521657 | 4566250 | 16384 | 65536 | 393677 | 459213 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222983701777 | 12075231876877042 | 12075231879927581 | 12075222987019643 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 318 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 20 | 24 | 332864 | 0x7f8886c24400 | 0x7f8756c37880 | 18608848 | 18429047 | 65536 | 2326105 | 2326105 | 18608840 | 18442559 | 2305516 | 16384 | 65536 | 357945 | 423481 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222987055580 | 12075231880029180 | 12075231881569809 | 12075222988845997 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 320 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 12 | 24 | 335616 | 0x7f8886c24300 | 0x7f8756c378c0 | 9631200 | 9462867 | 65536 | 1203899 | 1203899 | 9631192 | 9474601 | 1185395 | 16384 | 65536 | 260152 | 325688 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222988877906 | 12075231881668208 | 12075231882456843 | 12075222989922467 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 322 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 12 | 24 | 339392 | 0x7f8886c24a00 | 0x7f8756c37900 | 27563408 | 27388043 | 65536 | 3445425 | 3445425 | 27563400 | 27399771 | 3424878 | 16384 | 65536 | 315590 | 381126 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222989955268 | 12075231882551242 | 12075231884832346 | 12075222992500358 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 324 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 12 | 24 | 344192 | 0x7f8886c24900 | 0x7f8756c37940 | 18590752 | 18418067 | 65536 | 2323843 | 2323843 | 18590744 | 18431577 | 2304364 | 16384 | 65536 | 391926 | 457462 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222992547025 | 12075231884936345 | 12075231886472015 | 12075222994324348 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 326 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 20 | 24 | 345408 | 0x7f8886c24800 | 0x7f8756c37980 | 72702736 | 72530002 | 65536 | 9087841 | 9087841 | 72702728 | 72541735 | 9069541 | 16384 | 65536 | 372030 | 437566 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075222994356748 | 12075231886567214 | 12075231892621411 | 12075223000673867 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 328 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 20 | 24 | 348160 | 0x7f8886c24700 | 0x7f8756c379c0 | 36582496 | 36411788 | 65536 | 4572811 | 4572811 | 36582488 | 36425293 | 4555628 | 16384 | 65536 | 484687 | 550223 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075223000702811 | 12075231892711651 | 12075231895753709 | 12075223004006340 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 330 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 12 | 24 | 350912 | 0x7f8886c24600 | 0x7f8756c37a00 | 18647248 | 18464200 | 65536 | 2330905 | 2330905 | 18647240 | 18475929 | 2311270 | 16384 | 65536 | 496876 | 562412 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075223004034122 | 12075231895843309 | 12075231897382178 | 12075223005842923 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 332 | 866123 | 866128 | 4194304 | 256 | 0 | 0 | 12 | 24 | 0 | 0x7f8886c24500 | 0x7f8756c37a40 | 54520480 | 54337510 | 65536 | 6815059 | 6815059 | 54520472 | 54349241 | 6793840 | 16384 | 65536 | 401940 | 467476 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 327680 | 65536 | 0 | 12075223005875093 | 12075231897477697 | 12075231902005345 | 12075223010674613 |