50 KiB
50 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | GRBM_COUNT | GRBM_GUI_ACTIVE | CPC_ME1_BUSY_FOR_PACKET_DECODE | SQ_CYCLES | SQ_WAVES | SQ_WAVE_CYCLES | SQ_BUSY_CYCLES | SQ_LEVEL_WAVES | SQ_ACCUM_PREV_HIRES | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBuffer.kd | 0 | 0 | 0 | 891508 | 891513 | 33554432 | 256 | 0 | 0 | 8 | 32 | 6464 | 0x0 | 0x7f3019604180 | 499350 | 499350 | 16307 | 3994808 | 524288 | 366187131 | 3797807 | 0 | 1479546076 | 12075754251090885 | 12075754495567939 | 12075754495889537 | 12075754496009475 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 891508 | 891513 | 32768 | 256 | 0 | 0 | 24 | 24 | 12480 | 0x0 | 0x7f3019635100 | 27998 | 27998 | 20958 | 223992 | 512 | 1150884 | 77352 | 0 | 4617544 | 12075754510868550 | 12075754511211259 | 12075754511217979 | 12075754511227137 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 4 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 12928 | 0x7f3125360900 | 0x7f3019635140 | 215716 | 215716 | 22358 | 1725736 | 65536 | 144148643 | 1567089 | 0 | 578410044 | 12075754511296295 | 12075754511540537 | 12075754511672537 | 12075754511676271 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 6 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 36 | 24 | 13632 | 0x7f3125360800 | 0x7f3019635180 | 397044 | 397044 | 26823 | 3176360 | 65536 | 230155708 | 3006315 | 0 | 922438472 | 12075754511754887 | 12075754511949815 | 12075754512203094 | 12075754512275956 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 8 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 14080 | 0x7f3125360700 | 0x7f30196351c0 | 395844 | 395844 | 25936 | 3166760 | 65536 | 303596624 | 3001770 | 0 | 1216200116 | 12075754512313626 | 12075754512528852 | 12075754512781171 | 12075754512850784 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 10 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14528 | 0x7f3125360600 | 0x7f3019635200 | 217980 | 217980 | 22890 | 1743848 | 65536 | 148869285 | 1578184 | 0 | 597286488 | 12075754512877003 | 12075754513096530 | 12075754513229969 | 12075754513233536 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 12 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14976 | 0x7f3125360500 | 0x7f3019635240 | 217077 | 217077 | 21495 | 1736624 | 65536 | 146185523 | 1577524 | 0 | 586553980 | 12075754513294359 | 12075754513496368 | 12075754513629807 | 12075754513632998 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 14 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 15424 | 0x7f3125360400 | 0x7f3019635280 | 212069 | 212069 | 17771 | 1696560 | 65536 | 160466215 | 1510719 | 0 | 643683976 | 12075754513718737 | 12075754513904366 | 12075754514032205 | 12075754514035666 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 16 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16128 | 0x7f3125360300 | 0x7f30196352c0 | 395565 | 395565 | 37079 | 3164528 | 65536 | 321608091 | 2984875 | 0 | 1288249212 | 12075754514098152 | 12075754514305964 | 12075754514557002 | 12075754514601558 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 18 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16832 | 0x7f3125360a00 | 0x7f3019635300 | 393444 | 393444 | 31301 | 3147560 | 65536 | 324001151 | 2975660 | 0 | 1297821672 | 12075754514638046 | 12075754514843561 | 12075754515093960 | 12075754515164875 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 20 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 17536 | 0x7f3125360900 | 0x7f3019635340 | 211173 | 211173 | 21996 | 1689392 | 65536 | 159105244 | 1531422 | 0 | 638239296 | 12075754515191504 | 12075754515405958 | 12075754515535077 | 12075754515538179 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 22 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18240 | 0x7f3125360800 | 0x7f3019635380 | 210869 | 210869 | 20604 | 1686960 | 65536 | 149526114 | 1524310 | 0 | 599923268 | 12075754515598671 | 12075754515799236 | 12075754515927875 | 12075754515931009 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 24 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18944 | 0x7f3125360700 | 0x7f30196353c0 | 218941 | 218941 | 25213 | 1751536 | 65536 | 154493256 | 1572671 | 0 | 619786724 | 12075754516011769 | 12075754516193154 | 12075754516325793 | 12075754516328828 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 26 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 19904 | 0x7f3125360600 | 0x7f3019635400 | 395645 | 395645 | 33532 | 3165168 | 65536 | 317185555 | 2998701 | 0 | 1270557244 | 12075754516383590 | 12075754516587072 | 12075754516839711 | 12075754516909217 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 28 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 20608 | 0x7f3125360500 | 0x7f3019635440 | 387132 | 387132 | 30531 | 3097064 | 65536 | 328175939 | 2933182 | 0 | 1314522048 | 12075754516935926 | 12075754517163229 | 12075754517410588 | 12075754517479907 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 30 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 21312 | 0x7f3125360400 | 0x7f3019635480 | 215692 | 215692 | 23652 | 1725544 | 65536 | 150093530 | 1561198 | 0 | 602194228 | 12075754517506406 | 12075754517713626 | 12075754517845466 | 12075754517848853 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 32 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22016 | 0x7f3125360300 | 0x7f30196354c0 | 215668 | 215668 | 22897 | 1725352 | 65536 | 154397177 | 1565185 | 0 | 619400580 | 12075754517908394 | 12075754518122744 | 12075754518255224 | 12075754518258314 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 34 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22720 | 0x7f3125360a00 | 0x7f3019635500 | 211749 | 211749 | 23368 | 1694000 | 65536 | 161212691 | 1533308 | 0 | 646670736 | 12075754518334546 | 12075754518520822 | 12075754518650742 | 12075754518653939 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 36 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 23680 | 0x7f3125360900 | 0x7f3019635540 | 398108 | 398108 | 28395 | 3184872 | 65536 | 310683765 | 3009033 | 0 | 1244552616 | 12075754518715313 | 12075754518904820 | 12075754519159379 | 12075754519232063 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 38 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 24384 | 0x7f3125360800 | 0x7f3019635580 | 392340 | 392340 | 29265 | 3138728 | 65536 | 290396954 | 2972075 | 0 | 1163403780 | 12075754519259464 | 12075754519467378 | 12075754519718096 | 12075754519787255 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 40 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25088 | 0x7f3125360700 | 0x7f30196355c0 | 218581 | 218581 | 21428 | 1748656 | 65536 | 154037882 | 1586664 | 0 | 617961524 | 12075754519812482 | 12075754520025455 | 12075754520160014 | 12075754520163605 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 42 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25792 | 0x7f3125360600 | 0x7f3019635600 | 218197 | 218197 | 24982 | 1745584 | 65536 | 161674916 | 1578111 | 0 | 648511824 | 12075754520224308 | 12075754520424813 | 12075754520558412 | 12075754520561745 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 44 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 26496 | 0x7f3125360500 | 0x7f3019635640 | 213068 | 213068 | 22334 | 1704552 | 65536 | 159943257 | 1543226 | 0 | 641590724 | 12075754520637055 | 12075754520816011 | 12075754520946730 | 12075754520950046 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 46 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 27712 | 0x7f3125360400 | 0x7f3019635680 | 393133 | 393133 | 32988 | 3145072 | 65536 | 299822691 | 2986963 | 0 | 1201107444 | 12075754521007212 | 12075754521210569 | 12075754521462568 | 12075754521532328 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 48 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 28416 | 0x7f3125360300 | 0x7f30196356c0 | 389156 | 389156 | 29683 | 3113256 | 65536 | 331756166 | 2951632 | 0 | 1328842272 | 12075754521558207 | 12075754521762886 | 12075754522012165 | 12075754522081719 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 50 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 29120 | 0x7f3125360a00 | 0x7f3019635700 | 219061 | 219061 | 25394 | 1752496 | 65536 | 164568768 | 1593759 | 0 | 660087696 | 12075754522110383 | 12075754522318723 | 12075754522453443 | 12075754522456676 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 52 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30080 | 0x7f3125360900 | 0x7f3019635740 | 211740 | 211740 | 25878 | 1693928 | 65536 | 169386679 | 1534346 | 0 | 679366436 | 12075754522516017 | 12075754522706721 | 12075754522837121 | 12075754522840279 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 54 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30784 | 0x7f3125360800 | 0x7f3019635780 | 207668 | 207668 | 20363 | 1661352 | 65536 | 166310708 | 1501465 | 0 | 667062032 | 12075754522911331 | 12075754523098559 | 12075754523225919 | 12075754523229503 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 56 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32000 | 0x7f3125360700 | 0x7f30196357c0 | 450044 | 450044 | 15991 | 3600360 | 65536 | 320218477 | 2962685 | 0 | 1282691480 | 12075754523283543 | 12075754523488477 | 12075754523778556 | 12075754523822585 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 58 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32960 | 0x7f3125360600 | 0x7f3019635800 | 386949 | 386949 | 32219 | 3095600 | 65536 | 331786990 | 2922520 | 0 | 1328965024 | 12075754523858872 | 12075754524055995 | 12075754524303353 | 12075754524374280 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 60 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 33920 | 0x7f3125360500 | 0x7f3019635840 | 211100 | 211100 | 25332 | 1688808 | 65536 | 166518955 | 1509118 | 0 | 667902356 | 12075754524400028 | 12075754524602712 | 12075754524730551 | 12075754524733828 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 62 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 34880 | 0x7f3125360400 | 0x7f3019635880 | 211292 | 211292 | 20863 | 1690344 | 65536 | 160574702 | 1515031 | 0 | 644137172 | 12075754524798889 | 12075754524990870 | 12075754525120949 | 12075754525124314 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 64 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 35840 | 0x7f3125360300 | 0x7f30196358c0 | 211157 | 211157 | 21981 | 1689264 | 65536 | 162691784 | 1534735 | 0 | 652585884 | 12075754525195126 | 12075754525378548 | 12075754525508627 | 12075754525511634 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 66 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 37312 | 0x7f3125360a00 | 0x7f3019635900 | 393653 | 393653 | 30587 | 3149232 | 65536 | 336012607 | 2984424 | 0 | 1345868568 | 12075754525571736 | 12075754525766386 | 12075754526019665 | 12075754526090259 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 68 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 38272 | 0x7f3125360900 | 0x7f3019635940 | 392365 | 392365 | 32832 | 3138928 | 65536 | 355919394 | 2974649 | 0 | 1425493996 | 12075754526117460 | 12075754526326543 | 12075754526577582 | 12075754526646793 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 70 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 39232 | 0x7f3125360800 | 0x7f3019635980 | 218252 | 218252 | 26173 | 1746024 | 65536 | 161197731 | 1563291 | 0 | 646622084 | 12075754526671479 | 12075754526882700 | 12075754527015020 | 12075754527018584 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 72 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 40192 | 0x7f3125360700 | 0x7f30196359c0 | 222100 | 222100 | 26139 | 1776808 | 65536 | 166839666 | 1615753 | 0 | 669206972 | 12075754527082313 | 12075754527284778 | 12075754527422378 | 12075754527425962 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 74 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 41152 | 0x7f3125360600 | 0x7f3019635a00 | 210621 | 210621 | 25295 | 1684976 | 65536 | 160226902 | 1522721 | 0 | 642726244 | 12075754527496893 | 12075754527675816 | 12075754527805256 | 12075754527808392 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 76 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 42624 | 0x7f3125360500 | 0x7f3019635a40 | 400277 | 400277 | 30440 | 3202224 | 65536 | 314067863 | 3036441 | 0 | 1258091608 | 12075754527868484 | 12075754528072934 | 12075754528330373 | 12075754528402637 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 78 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 43584 | 0x7f3125360400 | 0x7f3019635a80 | 399565 | 399565 | 25430 | 3196528 | 65536 | 267618519 | 3025155 | 0 | 1072292452 | 12075754528429086 | 12075754528634852 | 12075754528891330 | 12075754528960544 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 80 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 44544 | 0x7f3125360300 | 0x7f3019635ac0 | 212757 | 212757 | 20618 | 1702064 | 65536 | 170626585 | 1539715 | 0 | 684333620 | 12075754528986392 | 12075754529202849 | 12075754529333568 | 12075754529337414 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 82 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 45760 | 0x7f3125360a00 | 0x7f3019635b00 | 216164 | 216164 | 26627 | 1729320 | 65536 | 159688535 | 1567761 | 0 | 640575468 | 12075754529397736 | 12075754529591647 | 12075754529725566 | 12075754529728781 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 84 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 46720 | 0x7f3125360900 | 0x7f3019635b40 | 208492 | 208492 | 20532 | 1667944 | 65536 | 163644466 | 1504993 | 0 | 656406856 | 12075754529802358 | 12075754529979645 | 12075754530107964 | 12075754530111162 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 86 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 48448 | 0x7f3125360800 | 0x7f3019635b80 | 406333 | 406333 | 28976 | 3250672 | 65536 | 288198691 | 3080484 | 0 | 1154608804 | 12075754530171484 | 12075754530367003 | 12075754530629082 | 12075754530696700 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 88 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 49408 | 0x7f3125360700 | 0x7f3019635bc0 | 390685 | 390685 | 34475 | 3125488 | 65536 | 326000939 | 2956035 | 0 | 1305819684 | 12075754530722227 | 12075754530926680 | 12075754531177239 | 12075754531248496 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 90 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 50368 | 0x7f3125360600 | 0x7f3019635c00 | 206485 | 206485 | 23232 | 1651888 | 65536 | 164574149 | 1493609 | 0 | 660137796 | 12075754531274233 | 12075754531478997 | 12075754531606197 | 12075754531609296 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 92 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 51584 | 0x7f3125360500 | 0x7f3019635c40 | 221341 | 221341 | 26227 | 1770736 | 65536 | 169363250 | 1604422 | 0 | 679273536 | 12075754531670019 | 12075754531859955 | 12075754531997235 | 12075754532005132 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 94 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 52544 | 0x7f3125360400 | 0x7f3019635c80 | 209269 | 209269 | 23856 | 1674160 | 65536 | 161429639 | 1509720 | 0 | 647545008 | 12075754532074831 | 12075754532256913 | 12075754532385233 | 12075754532388985 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 96 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 54272 | 0x7f3125360300 | 0x7f3019635cc0 | 403165 | 403165 | 31794 | 3225328 | 65536 | 315809343 | 3051106 | 0 | 1265056324 | 12075754532447794 | 12075754532642831 | 12075754532902510 | 12075754532971518 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 98 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 55488 | 0x7f3125360a00 | 0x7f3019635d00 | 396780 | 396780 | 25227 | 3174248 | 65536 | 302614146 | 2990525 | 0 | 1212270720 | 12075754532996805 | 12075754533213229 | 12075754533468107 | 12075754533537610 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 100 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 56704 | 0x7f3125360900 | 0x7f3019635d40 | 208861 | 208861 | 21572 | 1670896 | 65536 | 168748118 | 1510263 | 0 | 676855088 | 12075754533568477 | 12075754533764906 | 12075754533893385 | 12075754533896687 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 102 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 58176 | 0x7f3125360800 | 0x7f3019635d80 | 225333 | 225333 | 26448 | 1802672 | 65536 | 179253311 | 1640362 | 0 | 718866680 | 12075754533957219 | 12075754534158504 | 12075754534298663 | 12075754534302281 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 104 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 59392 | 0x7f3125360700 | 0x7f3019635dc0 | 210341 | 210341 | 19398 | 1682736 | 65536 | 168660440 | 1520638 | 0 | 676501400 | 12075754534374836 | 12075754534555302 | 12075754534684901 | 12075754534688158 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 106 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 61376 | 0x7f3125360600 | 0x7f3019635e00 | 406860 | 406860 | 32066 | 3254888 | 65536 | 263141582 | 3046674 | 0 | 1054385940 | 12075754534747608 | 12075754534938340 | 12075754535199139 | 12075754535269579 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 108 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 62592 | 0x7f3125360500 | 0x7f3019635e40 | 399957 | 399957 | 29805 | 3199664 | 65536 | 301462176 | 3027714 | 0 | 1207666228 | 12075754535295427 | 12075754535499617 | 12075754535756896 | 12075754535826453 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 110 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 63808 | 0x7f3125360400 | 0x7f3019635e80 | 212613 | 212613 | 19806 | 1700912 | 65536 | 161851359 | 1544093 | 0 | 649235940 | 12075754535851600 | 12075754536074974 | 12075754536206173 | 12075754536209956 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 112 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 65280 | 0x7f3125360300 | 0x7f3019635ec0 | 223277 | 223277 | 27831 | 1786224 | 65536 | 186332789 | 1622574 | 0 | 747171700 | 12075754536271480 | 12075754536464092 | 12075754536602971 | 12075754536606243 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 114 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 66496 | 0x7f3125360a00 | 0x7f3019635f00 | 213797 | 213797 | 23838 | 1710384 | 65536 | 170722090 | 1552687 | 0 | 684732544 | 12075754536678988 | 12075754536857370 | 12075754536989210 | 12075754536992480 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 116 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 68480 | 0x7f3125360900 | 0x7f3019635f40 | 400805 | 400805 | 33057 | 3206448 | 65536 | 328489022 | 3035896 | 0 | 1315775844 | 12075754537052422 | 12075754537246648 | 12075754537505207 | 12075754537575053 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 118 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 69696 | 0x7f3125360800 | 0x7f3019635f80 | 394796 | 394796 | 33589 | 3158376 | 65536 | 355970800 | 2992050 | 0 | 1425700644 | 12075754537602324 | 12075754537802965 | 12075754538057204 | 12075754538127841 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 120 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 70912 | 0x7f3125360700 | 0x7f3019635fc0 | 211517 | 211517 | 25008 | 1692144 | 65536 | 163524455 | 1528089 | 0 | 655933644 | 12075754538153588 | 12075754538356403 | 12075754538486642 | 12075754538489963 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 122 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 72384 | 0x7f3125360600 | 0x7f3019636000 | 221885 | 221885 | 25330 | 1775088 | 65536 | 188668658 | 1612524 | 0 | 756546216 | 12075754538550646 | 12075754538743441 | 12075754538881840 | 12075754538885098 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 124 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 73600 | 0x7f3125360500 | 0x7f3019636040 | 209452 | 209452 | 20632 | 1675624 | 65536 | 166245228 | 1507691 | 0 | 666810740 | 12075754538957552 | 12075754539145519 | 12075754539274318 | 12075754539278078 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 126 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 75840 | 0x7f3125360400 | 0x7f3019636080 | 412093 | 412093 | 35097 | 3296752 | 65536 | 307874323 | 3122866 | 0 | 1233313348 | 12075754539330836 | 12075754539529997 | 12075754539795915 | 12075754539866331 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 128 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 77056 | 0x7f3125360300 | 0x7f30196360c0 | 390612 | 390612 | 29657 | 3124904 | 65536 | 342935157 | 2956031 | 0 | 1373557408 | 12075754539893091 | 12075754540100714 | 12075754540352393 | 12075754540423176 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 130 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 78272 | 0x7f3125360a00 | 0x7f3019636100 | 213572 | 213572 | 23214 | 1708584 | 65536 | 162939724 | 1552161 | 0 | 653593628 | 12075754540448323 | 12075754540649351 | 12075754540781350 | 12075754540784478 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 132 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 80000 | 0x7f3125360900 | 0x7f3019636140 | 232588 | 232588 | 27373 | 1860712 | 65536 | 198545225 | 1691777 | 0 | 796050336 | 12075754540844129 | 12075754541040389 | 12075754541186148 | 12075754541189661 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 134 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 81216 | 0x7f3125360800 | 0x7f3019636180 | 214660 | 214660 | 25145 | 1717288 | 65536 | 163989035 | 1558245 | 0 | 657788268 | 12075754541262256 | 12075754541441027 | 12075754541573667 | 12075754541576840 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 136 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 83456 | 0x7f3125360700 | 0x7f30196361c0 | 409485 | 409485 | 33982 | 3275888 | 65536 | 309959234 | 3087030 | 0 | 1241654816 | 12075754541636772 | 12075754541828705 | 12075754542094304 | 12075754542163831 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 138 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 84928 | 0x7f3125360600 | 0x7f3019636200 | 393524 | 393524 | 24100 | 3148200 | 65536 | 223397794 | 2967661 | 0 | 895409476 | 12075754542190421 | 12075754542398142 | 12075754542652221 | 12075754542720536 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 140 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 86400 | 0x7f3125360500 | 0x7f3019636240 | 217533 | 217533 | 21859 | 1740272 | 65536 | 170944572 | 1580733 | 0 | 685668364 | 12075754542746674 | 12075754542947900 | 12075754543082779 | 12075754543086396 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 142 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 88128 | 0x7f3125360400 | 0x7f3019636280 | 240972 | 240972 | 28099 | 1927784 | 65536 | 205605748 | 1760557 | 0 | 824281320 | 12075754543148080 | 12075754543343738 | 12075754543494777 | 12075754543498281 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 144 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 89600 | 0x7f3125360300 | 0x7f30196362c0 | 215229 | 215229 | 21537 | 1721840 | 65536 | 170414996 | 1562682 | 0 | 683521568 | 12075754543591925 | 12075754543772855 | 12075754543906135 | 12075754543909295 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 146 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 92096 | 0x7f3125360a00 | 0x7f3019636300 | 412196 | 412196 | 32147 | 3297576 | 65536 | 305065535 | 3088441 | 0 | 1222078532 | 12075754543967613 | 12075754544168853 | 12075754544435892 | 12075754544506515 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 148 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 93568 | 0x7f3125360900 | 0x7f3019636340 | 384933 | 384933 | 30330 | 3079472 | 65536 | 314223250 | 2912268 | 0 | 1258711020 | 12075754544536901 | 12075754544733011 | 12075754544981809 | 12075754545051568 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 150 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 95040 | 0x7f3125360800 | 0x7f3019636380 | 220861 | 220861 | 27266 | 1766896 | 65536 | 169459820 | 1604255 | 0 | 679697332 | 12075754545076795 | 12075754545284848 | 12075754545421647 | 12075754545424912 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 152 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 96768 | 0x7f3125360700 | 0x7f30196363c0 | 251093 | 251093 | 27735 | 2008752 | 65536 | 222700754 | 1850982 | 0 | 892660940 | 12075754545483451 | 12075754545674606 | 12075754545832365 | 12075754545835605 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 154 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 98240 | 0x7f3125360600 | 0x7f3019636400 | 210629 | 210629 | 22358 | 1685040 | 65536 | 162170732 | 1520160 | 0 | 650539920 | 12075754545907188 | 12075754546089324 | 12075754546219083 | 12075754546222374 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 156 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 100736 | 0x7f3125360500 | 0x7f3019636440 | 402044 | 402044 | 34605 | 3216360 | 65536 | 298162592 | 3032672 | 0 | 1194475700 | 12075754546281995 | 12075754546473802 | 12075754546734441 | 12075754546804596 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 158 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 102208 | 0x7f3125360400 | 0x7f3019636480 | 395140 | 395140 | 29652 | 3161128 | 65536 | 309316544 | 2991310 | 0 | 1239108096 | 12075754546829452 | 12075754547037479 | 12075754547292518 | 12075754547363695 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 160 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 103680 | 0x7f3125360300 | 0x7f30196364c0 | 212333 | 212333 | 24523 | 1698672 | 65536 | 164185260 | 1531396 | 0 | 658594640 | 12075754547388671 | 12075754547593956 | 12075754547724996 | 12075754547728313 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 162 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 105664 | 0x7f3125360a00 | 0x7f3019636500 | 264181 | 264181 | 28319 | 2113456 | 65536 | 235419743 | 1957122 | 0 | 943544900 | 12075754547793063 | 12075754547986434 | 12075754548152833 | 12075754548156679 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 164 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 107136 | 0x7f3125360900 | 0x7f3019636540 | 214372 | 214372 | 27096 | 1714984 | 65536 | 168193335 | 1547913 | 0 | 674635040 | 12075754548229544 | 12075754548408512 | 12075754548540992 | 12075754548544279 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 166 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 109888 | 0x7f3125360800 | 0x7f3019636580 | 406812 | 406812 | 39577 | 3254504 | 65536 | 328809738 | 3052583 | 0 | 1317056996 | 12075754548603529 | 12075754548796030 | 12075754549058429 | 12075754549128304 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 168 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 111360 | 0x7f3125360700 | 0x7f30196365c0 | 393252 | 393252 | 25671 | 3146024 | 65536 | 331967485 | 2950639 | 0 | 1329729320 | 12075754549155415 | 12075754549356507 | 12075754549610906 | 12075754549679338 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 170 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 112832 | 0x7f3125360600 | 0x7f3019636600 | 214821 | 214821 | 25606 | 1718576 | 65536 | 170892158 | 1553993 | 0 | 685425428 | 12075754549704986 | 12075754549904825 | 12075754550037784 | 12075754550041451 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 172 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 114816 | 0x7f3125360500 | 0x7f3019636640 | 275829 | 275829 | 29701 | 2206640 | 65536 | 246668499 | 2046421 | 0 | 988530272 | 12075754550100922 | 12075754550293623 | 12075754550468022 | 12075754550471210 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 174 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 116288 | 0x7f3125360400 | 0x7f3019636680 | 221372 | 221372 | 22868 | 1770984 | 65536 | 167494436 | 1604729 | 0 | 671827952 | 12075754550543535 | 12075754550723541 | 12075754550860660 | 12075754550863920 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 176 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 119040 | 0x7f3125360300 | 0x7f30196366c0 | 399052 | 399052 | 35583 | 3192424 | 65536 | 350386378 | 3028937 | 0 | 1403367236 | 12075754550922900 | 12075754551125779 | 12075754551384817 | 12075754551456772 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 178 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 120768 | 0x7f3125360a00 | 0x7f3019636700 | 393469 | 393469 | 30315 | 3147760 | 65536 | 304171915 | 2963493 | 0 | 1218521024 | 12075754551485225 | 12075754551685456 | 12075754551939854 | 12075754552016392 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 180 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 122496 | 0x7f3125360900 | 0x7f3019636740 | 221108 | 221108 | 26122 | 1768872 | 65536 | 160741883 | 1598553 | 0 | 644800292 | 12075754552041719 | 12075754552246253 | 12075754552383052 | 12075754552386369 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 182 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 124736 | 0x7f3125360800 | 0x7f3019636780 | 288997 | 288997 | 30478 | 2311984 | 65536 | 256788998 | 2151622 | 0 | 1028997616 | 12075754552446040 | 12075754552635531 | 12075754552818570 | 12075754552821819 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 184 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 126464 | 0x7f3125360700 | 0x7f30196367c0 | 219740 | 219740 | 25365 | 1757928 | 65536 | 171045012 | 1598359 | 0 | 686049296 | 12075754552913479 | 12075754553103849 | 12075754553240328 | 12075754553243933 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 186 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 20 | 24 | 127936 | 0x7f3125360600 | 0x7f3019636800 | 411653 | 411653 | 35643 | 3293232 | 65536 | 381159929 | 3131049 | 0 | 1526461128 | 12075754553303364 | 12075754553500167 | 12075754553767845 | 12075754553838177 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 188 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 129664 | 0x7f3125360500 | 0x7f3019636840 | 390196 | 390196 | 31784 | 3121576 | 65536 | 325961779 | 2944543 | 0 | 1305665820 | 12075754553863164 | 12075754554068964 | 12075754554321762 | 12075754554391165 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 190 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 131392 | 0x7f3125360400 | 0x7f3019636880 | 217612 | 217612 | 25763 | 1740904 | 65536 | 172532445 | 1572649 | 0 | 692002996 | 12075754554416693 | 12075754554621121 | 12075754554756000 | 12075754554759279 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 192 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 133632 | 0x7f3125360300 | 0x7f30196368c0 | 301821 | 301821 | 31665 | 2414576 | 65536 | 271596539 | 2256793 | 0 | 1088231064 | 12075754554821535 | 12075754555009439 | 12075754555201438 | 12075754555274597 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 194 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 135360 | 0x7f3125360a00 | 0x7f3019636900 | 219764 | 219764 | 26409 | 1758120 | 65536 | 172685906 | 1594924 | 0 | 692636316 | 12075754555311215 | 12075754555505117 | 12075754555641596 | 12075754555644845 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 196 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 20 | 24 | 136832 | 0x7f3125360900 | 0x7f3019636940 | 437477 | 437477 | 39489 | 3499824 | 65536 | 402905359 | 3336888 | 0 | 1613442596 | 12075754555703744 | 12075754555892795 | 12075754556178233 | 12075754556248677 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 198 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 138560 | 0x7f3125360800 | 0x7f3019636980 | 391380 | 391380 | 31724 | 3131048 | 65536 | 306968458 | 2953763 | 0 | 1229694172 | 12075754556274094 | 12075754556474872 | 12075754556728630 | 12075754556798128 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 200 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 140288 | 0x7f3125360700 | 0x7f30196369c0 | 216988 | 216988 | 20976 | 1735912 | 65536 | 171321428 | 1571712 | 0 | 687168756 | 12075754556824257 | 12075754557033269 | 12075754557167988 | 12075754557171673 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 202 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 142784 | 0x7f3125360600 | 0x7f3019636a00 | 330060 | 330060 | 33152 | 2640488 | 65536 | 301149987 | 2480705 | 0 | 1206456800 | 12075754557232115 | 12075754557431507 | 12075754557641906 | 12075754557685267 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 204 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 144768 | 0x7f3125360500 | 0x7f3019636a40 | 217373 | 217373 | 24441 | 1738992 | 65536 | 168347982 | 1575123 | 0 | 675279436 | 12075754557735911 | 12075754557919024 | 12075754558054064 | 12075754558057910 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 206 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 20 | 24 | 145728 | 0x7f3125360400 | 0x7f3019636a80 | 470316 | 470316 | 40750 | 3762536 | 65536 | 438751532 | 3603227 | 0 | 1756826184 | 12075754558120476 | 12075754558314215 | 12075754558621894 | 12075754558695414 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 208 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 147712 | 0x7f3125360300 | 0x7f3019636ac0 | 403276 | 403276 | 32000 | 3226216 | 65536 | 290510791 | 3040624 | 0 | 1163871876 | 12075754558721403 | 12075754558930532 | 12075754559191810 | 12075754559262829 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 210 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 149696 | 0x7f3125360a00 | 0x7f3019636b00 | 221493 | 221493 | 25682 | 1771952 | 65536 | 174023399 | 1611053 | 0 | 697957176 | 12075754559288917 | 12075754559493089 | 12075754559630689 | 12075754559633217 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 212 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 152192 | 0x7f3125360900 | 0x7f3019636b40 | 355293 | 355293 | 34025 | 2842352 | 65536 | 321613561 | 2687968 | 0 | 1288312752 | 12075754559696194 | 12075754559883968 | 12075754560112447 | 12075754560183420 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 214 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 154176 | 0x7f3125360800 | 0x7f3019636b80 | 215540 | 215540 | 27185 | 1724328 | 65536 | 167684548 | 1553692 | 0 | 672630636 | 12075754560222282 | 12075754560417566 | 12075754560551165 | 12075754560553728 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 216 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 20 | 24 | 155648 | 0x7f3125360700 | 0x7f3019636bc0 | 547732 | 547732 | 15020 | 4381864 | 65536 | 474046660 | 3893383 | 0 | 1898006860 | 12075754560612998 | 12075754560808444 | 12075754561167963 | 12075754561238801 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 218 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 157632 | 0x7f3125360600 | 0x7f3019636c00 | 398988 | 398988 | 34483 | 3191912 | 65536 | 343891744 | 3012733 | 0 | 1377399428 | 12075754561264419 | 12075754561467801 | 12075754561725880 | 12075754561794394 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 220 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 159616 | 0x7f3125360500 | 0x7f3019636c40 | 217053 | 217053 | 26003 | 1736432 | 65536 | 169251569 | 1577444 | 0 | 678881200 | 12075754561820442 | 12075754562029559 | 12075754562164758 | 12075754562167617 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 222 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 162368 | 0x7f3125360400 | 0x7f3019636c80 | 382948 | 382948 | 37117 | 3063592 | 65536 | 351774838 | 2907360 | 0 | 1408957132 | 12075754562231065 | 12075754562425877 | 12075754562671796 | 12075754562739951 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 224 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 164608 | 0x7f3125360300 | 0x7f3019636cc0 | 228356 | 228356 | 29184 | 1826856 | 65536 | 183677780 | 1652658 | 0 | 736607112 | 12075754562775908 | 12075754562967315 | 12075754563109715 | 12075754563112744 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 226 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 20 | 24 | 166336 | 0x7f3125360a00 | 0x7f3019636d00 | 579964 | 579964 | 52231 | 4639720 | 65536 | 543696467 | 4451896 | 0 | 2176605776 | 12075754563171623 | 12075754563373554 | 12075754563752432 | 12075754563821742 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 228 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 168576 | 0x7f3125360900 | 0x7f3019636d40 | 401420 | 401420 | 34960 | 3211368 | 65536 | 334835951 | 3008365 | 0 | 1341163856 | 12075754563846938 | 12075754564058031 | 12075754564316270 | 12075754564387222 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 230 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 170816 | 0x7f3125360800 | 0x7f3019636d80 | 226820 | 226820 | 28648 | 1814568 | 65536 | 180441113 | 1636708 | 0 | 723650748 | 12075754564409694 | 12075754564615309 | 12075754564756108 | 12075754564758863 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 232 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 174080 | 0x7f3125360700 | 0x7f3019636dc0 | 436885 | 436885 | 42472 | 3495088 | 65536 | 402345976 | 3312791 | 0 | 1611235560 | 12075754564819536 | 12075754565007467 | 12075754565287786 | 12075754565358928 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 234 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 176576 | 0x7f3125360600 | 0x7f3019636e00 | 224909 | 224909 | 29401 | 1799280 | 65536 | 184767684 | 1625020 | 0 | 740960508 | 12075754565396698 | 12075754565587785 | 12075754565728744 | 12075754565731370 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 236 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 20 | 24 | 177792 | 0x7f3125360500 | 0x7f3019636e40 | 643532 | 643532 | 50932 | 5148264 | 65536 | 610447231 | 4984835 | 0 | 2443608480 | 12075754565791051 | 12075754565981223 | 12075754566405541 | 12075754566474982 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 238 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 180288 | 0x7f3125360400 | 0x7f3019636e80 | 401148 | 401148 | 37036 | 3209192 | 65536 | 301533799 | 3022552 | 0 | 1207966416 | 12075754566506070 | 12075754566703300 | 12075754566963619 | 12075754567014575 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 240 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 182784 | 0x7f3125360300 | 0x7f3019636ec0 | 218597 | 218597 | 25171 | 1748784 | 65536 | 184033325 | 1588121 | 0 | 738035284 | 12075754567043960 | 12075754567244258 | 12075754567380257 | 12075754567382990 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 242 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 186304 | 0x7f3125360a00 | 0x7f3019636f00 | 488684 | 488684 | 43249 | 3909480 | 65536 | 457843234 | 3748792 | 0 | 1833231172 | 12075754567443933 | 12075754567636736 | 12075754567953535 | 12075754568025434 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 244 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 189312 | 0x7f3125360900 | 0x7f3019636f40 | 242364 | 242364 | 27363 | 1938920 | 65536 | 214995485 | 1779921 | 0 | 861868608 | 12075754568063735 | 12075754568253854 | 12075754568406013 | 12075754568408656 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 246 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 20 | 24 | 190784 | 0x7f3125360800 | 0x7f3019636f80 | 786756 | 786756 | 61323 | 6294056 | 65536 | 749637536 | 6122219 | 0 | 3000369972 | 12075754568467816 | 12075754568659132 | 12075754569178810 | 12075754569248747 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 248 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 193792 | 0x7f3125360700 | 0x7f3019636fc0 | 458253 | 458253 | 41550 | 3666032 | 65536 | 386107387 | 3482764 | 0 | 1546280828 | 12075754569275196 | 12075754569476729 | 12075754569776888 | 12075754569845326 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 250 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 196800 | 0x7f3125360600 | 0x7f3019637000 | 247741 | 247741 | 28146 | 1981936 | 65536 | 219000357 | 1820072 | 0 | 877883352 | 12075754569871946 | 12075754570085046 | 12075754570240886 | 12075754570244217 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 252 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 201088 | 0x7f3125360500 | 0x7f3019637040 | 595693 | 595693 | 49932 | 4765552 | 65536 | 562828990 | 4595164 | 0 | 2253170312 | 12075754570306363 | 12075754570496405 | 12075754570883923 | 12075754570927217 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 254 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 204608 | 0x7f3125360400 | 0x7f3019637080 | 279524 | 279524 | 29527 | 2236200 | 65536 | 251365342 | 2075513 | 0 | 1007354340 | 12075754570977119 | 12075754571167602 | 12075754571345201 | 12075754571348149 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 256 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 20 | 24 | 206080 | 0x7f3125360300 | 0x7f30196370c0 | 928389 | 928389 | 67993 | 7427120 | 65536 | 890130627 | 7241400 | 0 | 3562343908 | 12075754571409192 | 12075754571605520 | 12075754572218158 | 12075754572287374 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 258 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 209600 | 0x7f3125360a00 | 0x7f3019637100 | 498965 | 498965 | 43712 | 3991728 | 65536 | 460868396 | 3821720 | 0 | 1845316172 | 12075754572312802 | 12075754572521837 | 12075754572848875 | 12075754572916724 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 260 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 213120 | 0x7f3125360900 | 0x7f3019637140 | 280605 | 280605 | 29368 | 2244848 | 65536 | 253659789 | 2089580 | 0 | 1016519780 | 12075754572943544 | 12075754573154474 | 12075754573333033 | 12075754573336093 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 262 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 218176 | 0x7f3125360800 | 0x7f3019637180 | 701524 | 701524 | 58272 | 5612200 | 65536 | 665764892 | 5426573 | 0 | 2664910412 | 12075754573396596 | 12075754573596872 | 12075754574053990 | 12075754574123667 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 264 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 222208 | 0x7f3125360700 | 0x7f30196371c0 | 315253 | 315253 | 31797 | 2522032 | 65536 | 285702587 | 2362092 | 0 | 1144707212 | 12075754574163802 | 12075754574348069 | 12075754574550308 | 12075754574618507 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 266 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 20 | 24 | 223936 | 0x7f3125360600 | 0x7f3019637200 | 1074892 | 1074892 | 15034 | 8599144 | 65536 | 1029084320 | 8360574 | 0 | 4118158572 | 12075754574644705 | 12075754574842147 | 12075754575553664 | 12075754575625518 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 268 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 227968 | 0x7f3125360500 | 0x7f3019637240 | 568068 | 568068 | 48452 | 4544552 | 65536 | 531190243 | 4370221 | 0 | 2126611796 | 12075754575654843 | 12075754575862623 | 12075754576236062 | 12075754576305933 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 270 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 232000 | 0x7f3125360400 | 0x7f3019637280 | 318132 | 318132 | 31631 | 2545064 | 65536 | 290490876 | 2391997 | 0 | 1163865964 | 12075754576331340 | 12075754576537020 | 12075754576741020 | 12075754576809298 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 272 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 237824 | 0x7f3125360300 | 0x7f30196372c0 | 805749 | 805749 | 63634 | 6446000 | 65536 | 767514462 | 6260811 | 0 | 3071904920 | 12075754576838452 | 12075754577043578 | 12075754577570456 | 12075754577639782 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 274 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 242368 | 0x7f3125360a00 | 0x7f3019637300 | 349708 | 349708 | 33872 | 2797672 | 65536 | 319436243 | 2636794 | 0 | 1279635176 | 12075754577677362 | 12075754577868695 | 12075754578093974 | 12075754578163275 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 276 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 20 | 24 | 243584 | 0x7f3125360900 | 0x7f3019637340 | 1208764 | 1208764 | 85127 | 9670120 | 65536 | 1171392412 | 9503401 | 0 | 4687390632 | 12075754578190255 | 12075754578392533 | 12075754579193490 | 12075754579262908 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 278 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 248128 | 0x7f3125360800 | 0x7f3019637380 | 637844 | 637844 | 52534 | 5102760 | 65536 | 602273717 | 4936560 | 0 | 2410937196 | 12075754579296400 | 12075754579500368 | 12075754579921807 | 12075754579990560 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 280 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 252672 | 0x7f3125360700 | 0x7f30196373c0 | 356668 | 356668 | 37077 | 2853352 | 65536 | 324340815 | 2670910 | 0 | 1299266032 | 12075754580024974 | 12075754580231246 | 12075754580458765 | 12075754580529412 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 282 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 259264 | 0x7f3125360600 | 0x7f3019637400 | 909565 | 909565 | 66834 | 7276528 | 65536 | 874791131 | 7113717 | 0 | 3501012348 | 12075754580557374 | 12075754580763563 | 12075754581361641 | 12075754581431629 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 284 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 264832 | 0x7f3125360500 | 0x7f3019637440 | 420285 | 420285 | 39158 | 3362288 | 65536 | 385774791 | 3173264 | 0 | 1544997716 | 12075754581469259 | 12075754581666920 | 12075754581936679 | 12075754582011937 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 286 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 20 | 24 | 266304 | 0x7f3125360400 | 0x7f3019637480 | 1487612 | 1487612 | 30302 | 11900904 | 65536 | 1448364550 | 11740401 | 0 | 5795277836 | 12075754582045099 | 12075754582246597 | 12075754583232993 | 12075754583302696 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 288 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 28 | 24 | 271872 | 0x7f3125360300 | 0x7f30196374c0 | 778493 | 778493 | 60556 | 6227952 | 65536 | 742036025 | 6057328 | 0 | 2970002716 | 12075754583336439 | 12075754583538112 | 12075754584052190 | 12075754584120265 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 290 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 277440 | 0x7f3125360a00 | 0x7f3019637500 | 422725 | 422725 | 38803 | 3381808 | 65536 | 391433912 | 3221159 | 0 | 1567628180 | 12075754584148047 | 12075754584357629 | 12075754584631388 | 12075754584699963 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 292 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 279936 | 0x7f3125360900 | 0x7f3019637540 | 1125973 | 1125973 | 79240 | 9007792 | 65536 | 1090173602 | 8846801 | 0 | 4362517008 | 12075754584724939 | 12075754584931547 | 12075754585671064 | 12075754585742109 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 294 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 286528 | 0x7f3125360800 | 0x7f3019637580 | 491285 | 491285 | 45233 | 3930288 | 65536 | 457924042 | 3759537 | 0 | 1833593080 | 12075754585779820 | 12075754585971542 | 12075754586290261 | 12075754586359607 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 296 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 20 | 24 | 288000 | 0x7f3125360700 | 0x7f30196375c0 | 1768892 | 1768892 | 117510 | 14151144 | 65536 | 1724480471 | 13989377 | 0 | 6899742248 | 12075754586386237 | 12075754586591700 | 12075754587764015 | 12075754587833156 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 298 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 32 | 24 | 294592 | 0x7f3125360600 | 0x7f3019637600 | 917589 | 917589 | 64648 | 7340720 | 65536 | 817065072 | 7178198 | 0 | 3270105980 | 12075754587862360 | 12075754588068974 | 12075754588679371 | 12075754588749599 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 300 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 301184 | 0x7f3125360500 | 0x7f3019637640 | 493980 | 493980 | 44699 | 3951848 | 65536 | 461018352 | 3784689 | 0 | 1845973996 | 12075754588775677 | 12075754588982890 | 12075754589302889 | 12075754589373939 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 302 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 304960 | 0x7f3125360400 | 0x7f3019637680 | 1336141 | 1336141 | 20584 | 10689136 | 65536 | 1299542372 | 10535623 | 0 | 5199999904 | 12075754589399977 | 12075754589602728 | 12075754590482084 | 12075754590551077 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 304 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 24 | 24 | 313600 | 0x7f3125360300 | 0x7f30196376c0 | 634613 | 634613 | 55186 | 5076912 | 65536 | 597975220 | 4898764 | 0 | 2393802940 | 12075754590592424 | 12075754590774563 | 12075754591188641 | 12075754591258081 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 306 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 20 | 24 | 314816 | 0x7f3125360a00 | 0x7f3019637700 | 2331629 | 2331629 | 78651 | 18653040 | 65536 | 2285089271 | 18496211 | 0 | 9142177912 | 12075754591290912 | 12075754591492160 | 12075754593041914 | 12075754593111896 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 308 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 20 | 24 | 317568 | 0x7f3125360900 | 0x7f3019637740 | 1201749 | 1201749 | 84261 | 9614000 | 65536 | 1164223788 | 9445451 | 0 | 4658718184 | 12075754593143835 | 12075754593349113 | 12075754594145429 | 12075754594214565 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 310 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 320320 | 0x7f3125360800 | 0x7f3019637780 | 639589 | 639589 | 50572 | 5116720 | 65536 | 608748905 | 4964210 | 0 | 2436820920 | 12075754594240253 | 12075754594450868 | 12075754594866546 | 12075754594935144 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 312 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 324096 | 0x7f3125360700 | 0x7f30196377c0 | 1756613 | 1756613 | 45141 | 14052912 | 65536 | 1715784544 | 13899558 | 0 | 6864965088 | 12075754594965461 | 12075754595176465 | 12075754596336140 | 12075754596406849 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 314 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 328896 | 0x7f3125360600 | 0x7f3019637800 | 1199037 | 1199037 | 83022 | 9592304 | 65536 | 1162989107 | 9428433 | 0 | 4653784912 | 12075754596445431 | 12075754596636939 | 12075754597423336 | 12075754597493058 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 316 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 20 | 24 | 330112 | 0x7f3125360500 | 0x7f3019637840 | 4583484 | 4583484 | 280720 | 36667880 | 65536 | 4514850892 | 36510039 | 0 | 18061225064 | 12075754597526480 | 12075754597726055 | 12075754600779003 | 12075754600848314 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 318 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 20 | 24 | 332864 | 0x7f3125360400 | 0x7f3019637880 | 2324404 | 2324404 | 79831 | 18595240 | 65536 | 2275540691 | 18435576 | 0 | 9103984240 | 12075754600880974 | 12075754601091161 | 12075754602635635 | 12075754602707519 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 320 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 335616 | 0x7f3125360300 | 0x7f30196378c0 | 1207477 | 1207477 | 17661 | 9659824 | 65536 | 1166549713 | 9463757 | 0 | 4668022292 | 12075754602734379 | 12075754602944594 | 12075754603736111 | 12075754603806261 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 322 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 339392 | 0x7f3125360a00 | 0x7f3019637900 | 3443813 | 3443813 | 215423 | 27550512 | 65536 | 3385674577 | 27386268 | 0 | 13544525060 | 12075754603833161 | 12075754604052429 | 12075754606335460 | 12075754606406734 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 324 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 344192 | 0x7f3125360900 | 0x7f3019637940 | 2323060 | 2323060 | 148824 | 18584488 | 65536 | 2277076633 | 18430984 | 0 | 9110134244 | 12075754606456196 | 12075754606639619 | 12075754608176733 | 12075754608246333 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 326 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 20 | 24 | 345408 | 0x7f3125360800 | 0x7f3019637980 | 9086885 | 9086885 | 543240 | 72695088 | 65536 | 8977901897 | 72534850 | 0 | 35913427912 | 12075754608280185 | 12075754608479451 | 12075754614536547 | 12075754614609949 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 328 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 20 | 24 | 348160 | 0x7f3125360700 | 0x7f30196379c0 | 4574028 | 4574028 | 283010 | 36592232 | 65536 | 4503870957 | 36426867 | 0 | 18017307904 | 12075754614638682 | 12075754614840866 | 12075754617890453 | 12075754617966096 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 330 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 350912 | 0x7f3125360600 | 0x7f3019637a00 | 2327900 | 2327900 | 148830 | 18623208 | 65536 | 2281926568 | 18465178 | 0 | 9129532480 | 12075754617994349 | 12075754618209972 | 12075754619750126 | 12075754619819972 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 332 | 891508 | 891513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 0 | 0x7f3125360500 | 0x7f3019637a40 | 6812053 | 6812053 | 411443 | 54496432 | 65536 | 6724808985 | 54340299 | 0 | 26901062884 | 12075754619854035 | 12075754620060261 | 12075754624589525 | 12075754624664906 |