43 KiB
43 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | SQ_INSTS_SMEM | SQ_INST_LEVEL_SMEM | SQ_ACCUM_PREV_HIRES | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBuffer.kd | 0 | 0 | 0 | 920962 | 920967 | 33554432 | 256 | 0 | 0 | 8 | 32 | 6464 | 0x0 | 0x7f7263004180 | 4194304 | 3135444 | 401545944 | 12076323351077524 | 12076323592299708 | 12076323592623867 | 12076323592733000 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 920962 | 920967 | 32768 | 256 | 0 | 0 | 24 | 24 | 12480 | 0x0 | 0x7f7263035100 | 512 | 24896 | 3171600 | 12076323607390380 | 12076323607684997 | 12076323607691557 | 12076323607697111 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 4 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 12928 | 0x7f7393007900 | 0x7f7263035140 | 65536 | 199274 | 25512368 | 12076323607762212 | 12076323607972035 | 12076323608104675 | 12076323608108936 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 6 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 36 | 24 | 13632 | 0x7f7393007800 | 0x7f7263035180 | 65536 | 160944 | 20540400 | 12076323608178325 | 12076323608357474 | 12076323608605152 | 12076323608672253 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 8 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 14080 | 0x7f7393007700 | 0x7f72630351c0 | 65536 | 200012 | 25576368 | 12076323608705605 | 12076323608885311 | 12076323609132670 | 12076323609203490 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 10 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14528 | 0x7f7393007600 | 0x7f7263035200 | 65536 | 192548 | 24657112 | 12076323609233927 | 12076323609419549 | 12076323609557308 | 12076323609560974 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 12 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 14976 | 0x7f7393007500 | 0x7f7263035240 | 65536 | 201756 | 25789000 | 12076323609615696 | 12076323609779547 | 12076323609916666 | 12076323609920593 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 14 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 15424 | 0x7f7393007400 | 0x7f7263035280 | 131072 | 245308 | 31232888 | 12076323609990523 | 12076323610162105 | 12076323610293785 | 12076323610297623 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 16 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16128 | 0x7f7393007300 | 0x7f72630352c0 | 131072 | 200518 | 25634984 | 12076323610344220 | 12076323610519704 | 12076323610769622 | 12076323610810206 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 18 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 16832 | 0x7f7393007a00 | 0x7f7263035300 | 65536 | 179708 | 22941752 | 12076323610835333 | 12076323611013941 | 12076323611266580 | 12076323611305296 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 20 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 17536 | 0x7f7393007900 | 0x7f7263035340 | 131072 | 247130 | 31623792 | 12076323611329822 | 12076323611521459 | 12076323611652498 | 12076323611656288 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 22 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18240 | 0x7f7393007800 | 0x7f7263035380 | 131072 | 264724 | 34008888 | 12076323611701232 | 12076323611879217 | 12076323612015697 | 12076323612019664 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 24 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 18944 | 0x7f7393007700 | 0x7f72630353c0 | 131072 | 244246 | 31482840 | 12076323612083482 | 12076323612256496 | 12076323612386415 | 12076323612390363 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 26 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 19904 | 0x7f7393007600 | 0x7f7263035400 | 131072 | 213208 | 27239808 | 12076323612443391 | 12076323612605454 | 12076323612861773 | 12076323612927150 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 28 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 20608 | 0x7f7393007500 | 0x7f7263035440 | 65536 | 196076 | 25275816 | 12076323612953149 | 12076323613135532 | 12076323613383050 | 12076323613448068 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 30 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 21312 | 0x7f7393007400 | 0x7f7263035480 | 131072 | 242030 | 30925496 | 12076323613474588 | 12076323613650249 | 12076323613778889 | 12076323613782841 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 32 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22016 | 0x7f7393007300 | 0x7f72630354c0 | 131072 | 244734 | 31308384 | 12076323613837853 | 12076323613996648 | 12076323614128007 | 12076323614132150 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 34 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 22720 | 0x7f7393007a00 | 0x7f7263035500 | 131072 | 254372 | 32612704 | 12076323614193303 | 12076323614355846 | 12076323614488165 | 12076323614491988 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 36 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 23680 | 0x7f7393007900 | 0x7f7263035540 | 131072 | 231684 | 29588352 | 12076323614543384 | 12076323614709604 | 12076323614969443 | 12076323615035949 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 38 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 24384 | 0x7f7393007800 | 0x7f7263035580 | 65536 | 176878 | 22653784 | 12076323615061597 | 12076323615254082 | 12076323615510721 | 12076323615575652 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 40 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25088 | 0x7f7393007700 | 0x7f72630355c0 | 131072 | 239276 | 30622448 | 12076323615601120 | 12076323615781439 | 12076323615918239 | 12076323615921966 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 42 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 25792 | 0x7f7393007600 | 0x7f7263035600 | 131072 | 240084 | 30830432 | 12076323615974083 | 12076323616147998 | 12076323616278877 | 12076323616282867 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 44 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 26496 | 0x7f7393007500 | 0x7f7263035640 | 131072 | 273802 | 35277120 | 12076323616345493 | 12076323616510556 | 12076323616637595 | 12076323616641573 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 46 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 27712 | 0x7f7393007400 | 0x7f7263035680 | 131072 | 225544 | 28745000 | 12076323616689422 | 12076323616857754 | 12076323617111513 | 12076323617177008 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 48 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 28416 | 0x7f7393007300 | 0x7f72630356c0 | 65536 | 201166 | 25813712 | 12076323617202917 | 12076323617381432 | 12076323617627191 | 12076323617691765 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 50 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 29120 | 0x7f7393007a00 | 0x7f7263035700 | 131072 | 243484 | 31154056 | 12076323617718425 | 12076323617889590 | 12076323618019189 | 12076323618023131 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 52 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30080 | 0x7f7393007900 | 0x7f7263035740 | 131072 | 268342 | 34452464 | 12076323618066572 | 12076323618248948 | 12076323618379827 | 12076323618383821 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 54 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 30784 | 0x7f7393007800 | 0x7f7263035780 | 131072 | 245610 | 31610232 | 12076323618445366 | 12076323618606866 | 12076323618735826 | 12076323618739542 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 56 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32000 | 0x7f7393007700 | 0x7f72630357c0 | 131072 | 229882 | 29441200 | 12076323618789986 | 12076323618964305 | 12076323619220144 | 12076323619285327 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 58 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 32960 | 0x7f7393007600 | 0x7f7263035800 | 65536 | 185232 | 23694696 | 12076323619309532 | 12076323619489742 | 12076323619738861 | 12076323619803981 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 60 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 33920 | 0x7f7393007500 | 0x7f7263035840 | 131072 | 256836 | 32780368 | 12076323619829568 | 12076323620003020 | 12076323620131819 | 12076323620135827 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 62 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 34880 | 0x7f7393007400 | 0x7f7263035880 | 131072 | 261330 | 33319384 | 12076323620179368 | 12076323620356778 | 12076323620488618 | 12076323620492420 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 64 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 35840 | 0x7f7393007300 | 0x7f72630358c0 | 131072 | 280200 | 35738192 | 12076323620553834 | 12076323620714377 | 12076323620846536 | 12076323620850315 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 66 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 37312 | 0x7f7393007a00 | 0x7f7263035900 | 131072 | 225070 | 28804256 | 12076323620901330 | 12076323621078375 | 12076323621335814 | 12076323621401139 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 68 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 38272 | 0x7f7393007900 | 0x7f7263035940 | 65536 | 167896 | 21475136 | 12076323621424532 | 12076323621602693 | 12076323621853731 | 12076323621892512 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 70 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 39232 | 0x7f7393007800 | 0x7f7263035980 | 131072 | 247034 | 31388232 | 12076323621917148 | 12076323622100130 | 12076323622230850 | 12076323622234698 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 72 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 40192 | 0x7f7393007700 | 0x7f72630359c0 | 131072 | 245000 | 31370136 | 12076323622285943 | 12076323622447649 | 12076323622580928 | 12076323622584658 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 74 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 41152 | 0x7f7393007600 | 0x7f7263035a00 | 131072 | 228836 | 29280720 | 12076323622647315 | 12076323622810367 | 12076323622941886 | 12076323622945860 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 76 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 42624 | 0x7f7393007500 | 0x7f7263035a40 | 131072 | 199872 | 25664664 | 12076323622995662 | 12076323623168765 | 12076323623424924 | 12076323623490412 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 78 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 43584 | 0x7f7393007400 | 0x7f7263035a80 | 65536 | 180488 | 23097792 | 12076323623514015 | 12076323623690843 | 12076323623940122 | 12076323624010608 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 80 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 44544 | 0x7f7393007300 | 0x7f7263035ac0 | 131072 | 229426 | 29189648 | 12076323624038510 | 12076323624215481 | 12076323624346040 | 12076323624350069 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 82 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 45760 | 0x7f7393007a00 | 0x7f7263035b00 | 131072 | 247200 | 31674880 | 12076323624399341 | 12076323624560759 | 12076323624691638 | 12076323624695481 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 84 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 46720 | 0x7f7393007900 | 0x7f7263035b40 | 131072 | 230340 | 29218688 | 12076323624756074 | 12076323624917717 | 12076323625044757 | 12076323625048748 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 86 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 48448 | 0x7f7393007800 | 0x7f7263035b80 | 131072 | 224878 | 28730232 | 12076323625101255 | 12076323625263636 | 12076323625528114 | 12076323625571169 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 88 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 49408 | 0x7f7393007700 | 0x7f7263035bc0 | 65536 | 202786 | 25939176 | 12076323625596766 | 12076323625771313 | 12076323626062192 | 12076323626127663 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 90 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 50368 | 0x7f7393007600 | 0x7f7263035c00 | 131072 | 276026 | 35451600 | 12076323626159702 | 12076323626335951 | 12076323626465870 | 12076323626469678 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 92 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 51584 | 0x7f7393007500 | 0x7f7263035c40 | 131072 | 270312 | 34520392 | 12076323626520994 | 12076323626699789 | 12076323626835148 | 12076323626838845 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 94 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 52544 | 0x7f7393007400 | 0x7f7263035c80 | 131072 | 242800 | 31121504 | 12076323626901020 | 12076323627065707 | 12076323627196107 | 12076323627200306 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 96 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 54272 | 0x7f7393007300 | 0x7f7263035cc0 | 131072 | 226928 | 29085304 | 12076323627248476 | 12076323627417706 | 12076323627676425 | 12076323627742083 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 98 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 55488 | 0x7f7393007a00 | 0x7f7263035d00 | 65536 | 202828 | 25941824 | 12076323627767250 | 12076323627940423 | 12076323628194822 | 12076323628262440 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 100 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 56704 | 0x7f7393007900 | 0x7f7263035d40 | 131072 | 259198 | 32951472 | 12076323628286976 | 12076323628463621 | 12076323628601060 | 12076323628604786 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 102 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 58176 | 0x7f7393007800 | 0x7f7263035d80 | 131072 | 258282 | 33021128 | 12076323628659398 | 12076323628817859 | 12076323628958979 | 12076323628962852 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 104 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 59392 | 0x7f7393007700 | 0x7f7263035dc0 | 131072 | 246652 | 31546856 | 12076323629030508 | 12076323629193058 | 12076323629326977 | 12076323629330896 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 106 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 61376 | 0x7f7393007600 | 0x7f7263035e00 | 131072 | 245768 | 31560440 | 12076323629386489 | 12076323629542816 | 12076323629802175 | 12076323629843308 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 108 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 62592 | 0x7f7393007500 | 0x7f7263035e40 | 65536 | 173704 | 22245464 | 12076323629867744 | 12076323630053854 | 12076323630304572 | 12076323630369406 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 110 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 63808 | 0x7f7393007400 | 0x7f7263035e80 | 131072 | 275760 | 35575200 | 12076323630396136 | 12076323630575771 | 12076323630705851 | 12076323630709839 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 112 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 65280 | 0x7f7393007300 | 0x7f7263035ec0 | 131072 | 244024 | 31108304 | 12076323630758709 | 12076323630925690 | 12076323631065529 | 12076323631069196 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 114 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 66496 | 0x7f7393007a00 | 0x7f7263035f00 | 131072 | 279584 | 35794400 | 12076323631131202 | 12076323631293528 | 12076323631422807 | 12076323631426681 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 116 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 68480 | 0x7f7393007900 | 0x7f7263035f40 | 131072 | 228846 | 29230256 | 12076323631480451 | 12076323631638646 | 12076323631904725 | 12076323631949863 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 118 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 69696 | 0x7f7393007800 | 0x7f7263035f80 | 65536 | 199242 | 25601280 | 12076323631974689 | 12076323632154964 | 12076323632410643 | 12076323632457957 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 120 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 70912 | 0x7f7393007700 | 0x7f7263035fc0 | 131072 | 275868 | 35385664 | 12076323632483795 | 12076323632658802 | 12076323632792881 | 12076323632796607 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 122 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 72384 | 0x7f7393007600 | 0x7f7263036000 | 131072 | 271282 | 35165760 | 12076323632845918 | 12076323633015120 | 12076323633153199 | 12076323633157317 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 124 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 73600 | 0x7f7393007500 | 0x7f7263036040 | 131072 | 249424 | 31654416 | 12076323633219923 | 12076323633379598 | 12076323633507598 | 12076323633511495 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 126 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 75840 | 0x7f7393007400 | 0x7f7263036080 | 131072 | 231548 | 29587792 | 12076323633560647 | 12076323633723597 | 12076323633993835 | 12076323634041110 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 128 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 77056 | 0x7f7393007300 | 0x7f72630360c0 | 65536 | 171078 | 21878256 | 12076323634064573 | 12076323634247754 | 12076323634497513 | 12076323634544395 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 130 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 78272 | 0x7f7393007a00 | 0x7f7263036100 | 131072 | 279070 | 35673072 | 12076323634566676 | 12076323634744392 | 12076323634881991 | 12076323634885539 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 132 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 80000 | 0x7f7393007900 | 0x7f7263036140 | 131072 | 219862 | 28157320 | 12076323634935893 | 12076323635109030 | 12076323635251590 | 12076323635255527 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 134 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 81216 | 0x7f7393007800 | 0x7f7263036180 | 131072 | 280770 | 35921656 | 12076323635319345 | 12076323635479429 | 12076323635616548 | 12076323635620505 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 136 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 83456 | 0x7f7393007700 | 0x7f72630361c0 | 131072 | 225682 | 28885744 | 12076323635670889 | 12076323635830627 | 12076323636091906 | 12076323636139600 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 138 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 84928 | 0x7f7393007600 | 0x7f7263036200 | 65536 | 201798 | 25883064 | 12076323636164396 | 12076323636340705 | 12076323636596864 | 12076323636643787 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 140 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 86400 | 0x7f7393007500 | 0x7f7263036240 | 131072 | 252542 | 32432200 | 12076323636670386 | 12076323636858942 | 12076323636997182 | 12076323637004858 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 142 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 88128 | 0x7f7393007400 | 0x7f7263036280 | 131072 | 231120 | 29646360 | 12076323637050883 | 12076323637215581 | 12076323637365500 | 12076323637369425 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 144 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 89600 | 0x7f7393007300 | 0x7f72630362c0 | 131072 | 247918 | 31659608 | 12076323637448462 | 12076323637609019 | 12076323637738138 | 12076323637741947 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 146 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 92096 | 0x7f7393007a00 | 0x7f7263036300 | 131072 | 229196 | 29396248 | 12076323637791129 | 12076323637955257 | 12076323638219416 | 12076323638265911 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 148 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 93568 | 0x7f7393007900 | 0x7f7263036340 | 65536 | 171128 | 21860632 | 12076323638292190 | 12076323638465335 | 12076323638713494 | 12076323638759559 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 150 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 95040 | 0x7f7393007800 | 0x7f7263036380 | 131072 | 269838 | 34491744 | 12076323638783784 | 12076323638956053 | 12076323639092532 | 12076323639096996 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 152 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 96768 | 0x7f7393007700 | 0x7f72630363c0 | 131072 | 250148 | 31950712 | 12076323639146939 | 12076323639306611 | 12076323639463410 | 12076323639467314 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 154 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 98240 | 0x7f7393007600 | 0x7f7263036400 | 131072 | 278424 | 35527744 | 12076323639529730 | 12076323639688369 | 12076323639816529 | 12076323639820390 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 156 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 100736 | 0x7f7393007500 | 0x7f7263036440 | 131072 | 229564 | 29332528 | 12076323639870634 | 12076323640039248 | 12076323640302286 | 12076323640328254 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 158 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 102208 | 0x7f7393007400 | 0x7f7263036480 | 65536 | 197912 | 25313256 | 12076323640369481 | 12076323640524365 | 12076323640777804 | 12076323640798889 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 160 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 103680 | 0x7f7393007300 | 0x7f72630364c0 | 131072 | 241314 | 30851752 | 12076323640843542 | 12076323640998763 | 12076323641131883 | 12076323641136055 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 162 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 105664 | 0x7f7393007a00 | 0x7f7263036500 | 131072 | 245368 | 31363504 | 12076323641187371 | 12076323641361162 | 12076323641526441 | 12076323641530699 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 164 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 107136 | 0x7f7393007900 | 0x7f7263036540 | 131072 | 249022 | 31724096 | 12076323641592574 | 12076323641748200 | 12076323641886919 | 12076323641891058 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 166 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 109888 | 0x7f7393007800 | 0x7f7263036580 | 131072 | 201048 | 25737440 | 12076323641941292 | 12076323642098758 | 12076323642357637 | 12076323642381981 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 168 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 111360 | 0x7f7393007700 | 0x7f72630365c0 | 65536 | 184142 | 23592320 | 12076323642424540 | 12076323642581796 | 12076323642833315 | 12076323642855160 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 170 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 112832 | 0x7f7393007600 | 0x7f7263036600 | 131072 | 246264 | 31616104 | 12076323642900564 | 12076323643068674 | 12076323643204193 | 12076323643208517 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 172 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 114816 | 0x7f7393007500 | 0x7f7263036640 | 131072 | 273018 | 34992472 | 12076323643260002 | 12076323643419712 | 12076323643593311 | 12076323643597510 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 174 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 116288 | 0x7f7393007400 | 0x7f7263036680 | 131072 | 280378 | 35882680 | 12076323643659204 | 12076323643815870 | 12076323643953950 | 12076323643958020 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 176 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 119040 | 0x7f7393007300 | 0x7f72630366c0 | 131072 | 205034 | 26250952 | 12076323644013503 | 12076323644174589 | 12076323644432507 | 12076323644458490 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 178 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 120768 | 0x7f7393007a00 | 0x7f7263036700 | 65536 | 198622 | 25316456 | 12076323644499306 | 12076323644652186 | 12076323644904505 | 12076323644926209 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 180 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 122496 | 0x7f7393007900 | 0x7f7263036740 | 131072 | 261612 | 33500992 | 12076323644970712 | 12076323645130904 | 12076323645305783 | 12076323645309822 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 182 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 124736 | 0x7f7393007800 | 0x7f7263036780 | 131072 | 267348 | 34363840 | 12076323645359484 | 12076323645526742 | 12076323645709782 | 12076323645713653 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 184 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 126464 | 0x7f7393007700 | 0x7f72630367c0 | 131072 | 247098 | 31735760 | 12076323645779445 | 12076323645934741 | 12076323646068660 | 12076323646073081 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 186 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 20 | 24 | 127936 | 0x7f7393007600 | 0x7f7263036800 | 131072 | 270960 | 34546432 | 12076323646122943 | 12076323646289619 | 12076323646556818 | 12076323646579101 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 188 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 129664 | 0x7f7393007500 | 0x7f7263036840 | 65536 | 174178 | 22248688 | 12076323646622692 | 12076323646779857 | 12076323647036015 | 12076323647061838 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 190 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 131392 | 0x7f7393007400 | 0x7f7263036880 | 131072 | 222078 | 28508408 | 12076323647105479 | 12076323647273294 | 12076323647411054 | 12076323647414984 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 192 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 133632 | 0x7f7393007300 | 0x7f72630368c0 | 131072 | 254992 | 32479592 | 12076323647464476 | 12076323647644013 | 12076323647835692 | 12076323647856705 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 194 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 135360 | 0x7f7393007a00 | 0x7f7263036900 | 131072 | 241268 | 30908928 | 12076323647916827 | 12076323648093771 | 12076323648229770 | 12076323648234087 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 196 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 20 | 24 | 136832 | 0x7f7393007900 | 0x7f7263036940 | 131072 | 254598 | 32667048 | 12076323648276766 | 12076323648458729 | 12076323648773287 | 12076323648791362 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 198 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 138560 | 0x7f7393007800 | 0x7f7263036980 | 65536 | 206060 | 26411944 | 12076323648838079 | 12076323649005126 | 12076323649256005 | 12076323649277977 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 200 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 140288 | 0x7f7393007700 | 0x7f72630369c0 | 131072 | 240384 | 30637304 | 12076323649326036 | 12076323649491044 | 12076323649621124 | 12076323649625092 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 202 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 142784 | 0x7f7393007600 | 0x7f7263036a00 | 131072 | 240522 | 30740944 | 12076323649680595 | 12076323649842883 | 12076323650052162 | 12076323650076410 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 204 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 144768 | 0x7f7393007500 | 0x7f7263036a40 | 131072 | 265848 | 33910808 | 12076323650135931 | 12076323650304160 | 12076323650439680 | 12076323650443833 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 206 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 20 | 24 | 145728 | 0x7f7393007400 | 0x7f7263036a80 | 131072 | 251302 | 32293864 | 12076323650496311 | 12076323650666879 | 12076323650974077 | 12076323650994046 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 208 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 147712 | 0x7f7393007300 | 0x7f7263036ac0 | 65536 | 185108 | 23763296 | 12076323651055961 | 12076323651225276 | 12076323651481595 | 12076323651501198 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 210 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 149696 | 0x7f7393007a00 | 0x7f7263036b00 | 131072 | 260708 | 33431120 | 12076323651553466 | 12076323651720794 | 12076323651861753 | 12076323651865916 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 212 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 152192 | 0x7f7393007900 | 0x7f7263036b40 | 131072 | 243794 | 30960016 | 12076323651916691 | 12076323652097912 | 12076323652325591 | 12076323652350136 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 214 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 154176 | 0x7f7393007800 | 0x7f7263036b80 | 131072 | 246144 | 31516136 | 12076323652403676 | 12076323652569750 | 12076323652710069 | 12076323652714123 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 216 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 20 | 24 | 155648 | 0x7f7393007700 | 0x7f7263036bc0 | 131072 | 238694 | 30613232 | 12076323652763665 | 12076323652943508 | 12076323653301427 | 12076323653323185 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 218 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 157632 | 0x7f7393007600 | 0x7f7263036c00 | 65536 | 194502 | 24878424 | 12076323653368719 | 12076323653534386 | 12076323653794064 | 12076323653815029 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 220 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 159616 | 0x7f7393007500 | 0x7f7263036c40 | 131072 | 243150 | 31094624 | 12076323653859551 | 12076323654027503 | 12076323654207822 | 12076323654212217 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 222 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 162368 | 0x7f7393007400 | 0x7f7263036c80 | 131072 | 280728 | 35873288 | 12076323654261919 | 12076323654437901 | 12076323654682860 | 12076323654704502 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 224 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 164608 | 0x7f7393007300 | 0x7f7263036cc0 | 131072 | 268110 | 34345528 | 12076323654766988 | 12076323654933579 | 12076323655073098 | 12076323655077264 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 226 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 20 | 24 | 166336 | 0x7f7393007a00 | 0x7f7263036d00 | 131072 | 264734 | 34046968 | 12076323655131836 | 12076323655300233 | 12076323655676232 | 12076323655700312 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 228 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 168576 | 0x7f7393007900 | 0x7f7263036d40 | 65536 | 171002 | 21915616 | 12076323655742691 | 12076323655909670 | 12076323656170149 | 12076323656195052 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 230 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 170816 | 0x7f7393007800 | 0x7f7263036d80 | 131072 | 273494 | 35074952 | 12076323656241849 | 12076323656407588 | 12076323656551108 | 12076323656553819 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 232 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 174080 | 0x7f7393007700 | 0x7f7263036dc0 | 131072 | 244350 | 31350440 | 12076323656603721 | 12076323656771747 | 12076323657081346 | 12076323657101136 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 234 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 176576 | 0x7f7393007600 | 0x7f7263036e00 | 131072 | 272266 | 34966264 | 12076323657165044 | 12076323657337025 | 12076323657479905 | 12076323657482895 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 236 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 20 | 24 | 177792 | 0x7f7393007500 | 0x7f7263036e40 | 131072 | 250608 | 32167248 | 12076323657532858 | 12076323657712384 | 12076323658135263 | 12076323658155425 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 238 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 180288 | 0x7f7393007400 | 0x7f7263036e80 | 65536 | 187080 | 23892424 | 12076323658200890 | 12076323658366782 | 12076323658626941 | 12076323658646518 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 240 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 182784 | 0x7f7393007300 | 0x7f7263036ec0 | 131072 | 273216 | 34911920 | 12076323658691571 | 12076323658863581 | 12076323659002780 | 12076323659011135 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 242 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 186304 | 0x7f7393007a00 | 0x7f7263036f00 | 131072 | 267540 | 34379304 | 12076323659056800 | 12076323659238300 | 12076323659553819 | 12076323659577328 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 244 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 189312 | 0x7f7393007900 | 0x7f7263036f40 | 131072 | 235356 | 30315176 | 12076323659636908 | 12076323659806618 | 12076323659958297 | 12076323659961081 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 246 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 20 | 24 | 190784 | 0x7f7393007800 | 0x7f7263036f80 | 131072 | 267360 | 34113064 | 12076323660017666 | 12076323660195097 | 12076323660711415 | 12076323660735460 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 248 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 193792 | 0x7f7393007700 | 0x7f7263036fc0 | 65536 | 190478 | 24411512 | 12076323660777127 | 12076323660943734 | 12076323661245973 | 12076323661266196 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 250 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 196800 | 0x7f7393007600 | 0x7f7263037000 | 131072 | 270202 | 34510000 | 12076323661315488 | 12076323661485813 | 12076323661640532 | 12076323661643397 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 252 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 201088 | 0x7f7393007500 | 0x7f7263037040 | 131072 | 241254 | 31062272 | 12076323661695865 | 12076323661881811 | 12076323662292210 | 12076323662315797 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 254 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 204608 | 0x7f7393007400 | 0x7f7263037080 | 131072 | 247580 | 31499496 | 12076323662372633 | 12076323662543249 | 12076323662719249 | 12076323662722222 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 256 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 20 | 24 | 206080 | 0x7f7393007300 | 0x7f72630370c0 | 131072 | 276044 | 35254912 | 12076323662771854 | 12076323662946928 | 12076323663556046 | 12076323663576370 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 258 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 209600 | 0x7f7393007a00 | 0x7f7263037100 | 65536 | 183066 | 23358592 | 12076323663620852 | 12076323663787086 | 12076323664114765 | 12076323664138274 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 260 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 213120 | 0x7f7393007900 | 0x7f7263037140 | 131072 | 268336 | 34289904 | 12076323664180352 | 12076323664344684 | 12076323664524363 | 12076323664527257 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 262 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 218176 | 0x7f7393007800 | 0x7f7263037180 | 131072 | 245282 | 31478568 | 12076323664577129 | 12076323664755883 | 12076323665211721 | 12076323665232327 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 264 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 222208 | 0x7f7393007700 | 0x7f72630371c0 | 131072 | 262508 | 33623432 | 12076323665292599 | 12076323665461320 | 12076323665661640 | 12076323665677815 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 266 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 20 | 24 | 223936 | 0x7f7393007600 | 0x7f7263037200 | 131072 | 272266 | 35134032 | 12076323665727437 | 12076323665892519 | 12076323666594117 | 12076323666617932 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 268 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 227968 | 0x7f7393007500 | 0x7f7263037240 | 65536 | 183994 | 23642032 | 12076323666658768 | 12076323666829796 | 12076323667204195 | 12076323667223918 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 270 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 232000 | 0x7f7393007400 | 0x7f7263037280 | 131072 | 243438 | 31099776 | 12076323667270735 | 12076323667437154 | 12076323667639074 | 12076323667659808 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 272 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 237824 | 0x7f7393007300 | 0x7f72630372c0 | 131072 | 248044 | 31575664 | 12076323667703059 | 12076323667867233 | 12076323668392991 | 12076323668420122 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 274 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 242368 | 0x7f7393007a00 | 0x7f7263037300 | 131072 | 251398 | 31976816 | 12076323668481055 | 12076323668652351 | 12076323668876830 | 12076323668897068 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 276 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 20 | 24 | 243584 | 0x7f7393007900 | 0x7f7263037340 | 131072 | 235974 | 30452504 | 12076323668940909 | 12076323669110589 | 12076323669908507 | 12076323669931871 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 278 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 248128 | 0x7f7393007800 | 0x7f7263037380 | 65536 | 187722 | 23922960 | 12076323669974450 | 12076323670149146 | 12076323670570585 | 12076323670594963 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 280 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 252672 | 0x7f7393007700 | 0x7f72630373c0 | 131072 | 257792 | 33088104 | 12076323670635920 | 12076323670808184 | 12076323671035863 | 12076323671057253 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 282 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 259264 | 0x7f7393007600 | 0x7f7263037400 | 131072 | 275128 | 35343024 | 12076323671101405 | 12076323671258583 | 12076323671855061 | 12076323671876725 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 284 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 264832 | 0x7f7393007500 | 0x7f7263037440 | 131072 | 241150 | 31176032 | 12076323671927931 | 12076323672091860 | 12076323672360339 | 12076323672384890 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 286 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 20 | 24 | 266304 | 0x7f7393007400 | 0x7f7263037480 | 131072 | 245240 | 31135608 | 12076323672428952 | 12076323672588339 | 12076323673572336 | 12076323673639532 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 288 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 28 | 24 | 271872 | 0x7f7393007300 | 0x7f72630374c0 | 65536 | 185494 | 23810848 | 12076323673667443 | 12076323673849455 | 12076323674363053 | 12076323674429710 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 290 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 277440 | 0x7f7393007a00 | 0x7f7263037500 | 131072 | 270626 | 34681168 | 12076323674461469 | 12076323674637132 | 12076323674908011 | 12076323674970535 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 292 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 12 | 24 | 279936 | 0x7f7393007900 | 0x7f7263037540 | 131072 | 260462 | 33475128 | 12076323675006943 | 12076323675186411 | 12076323675924008 | 12076323675991472 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 294 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 286528 | 0x7f7393007800 | 0x7f7263037580 | 131072 | 244092 | 31314424 | 12076323676037167 | 12076323676197447 | 12076323676512647 | 12076323676578483 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 296 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 20 | 24 | 288000 | 0x7f7393007700 | 0x7f72630375c0 | 131072 | 252970 | 32336632 | 12076323676603239 | 12076323676779046 | 12076323677948322 | 12076323678020764 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 298 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 32 | 24 | 294592 | 0x7f7393007600 | 0x7f7263037600 | 65536 | 189272 | 24113696 | 12076323678050098 | 12076323678234721 | 12076323678844159 | 12076323678909375 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 300 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 301184 | 0x7f7393007500 | 0x7f7263037640 | 131072 | 244778 | 31233472 | 12076323678933039 | 12076323679122398 | 12076323679440798 | 12076323679505072 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 302 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 12 | 24 | 304960 | 0x7f7393007400 | 0x7f7263037680 | 131072 | 241846 | 31135968 | 12076323679531732 | 12076323679705277 | 12076323680583034 | 12076323680649179 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 304 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 24 | 24 | 313600 | 0x7f7393007300 | 0x7f72630376c0 | 131072 | 250416 | 31980512 | 12076323680686989 | 12076323680856473 | 12076323681267192 | 12076323681336947 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 306 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 20 | 24 | 314816 | 0x7f7393007a00 | 0x7f7263037700 | 131072 | 277318 | 35157984 | 12076323681365360 | 12076323681546391 | 12076323683093586 | 12076323683159244 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 308 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 20 | 24 | 317568 | 0x7f7393007900 | 0x7f7263037740 | 65536 | 174154 | 22345656 | 12076323683188548 | 12076323683365585 | 12076323684158383 | 12076323684223451 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 310 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 12 | 24 | 320320 | 0x7f7393007800 | 0x7f7263037780 | 131072 | 279118 | 35872752 | 12076323684246374 | 12076323684431982 | 12076323684846381 | 12076323684910077 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 312 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 12 | 24 | 324096 | 0x7f7393007700 | 0x7f72630377c0 | 131072 | 247552 | 31617312 | 12076323684935525 | 12076323685122220 | 12076323686280456 | 12076323686347078 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 314 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 12 | 24 | 328896 | 0x7f7393007600 | 0x7f7263037800 | 131072 | 277944 | 35691800 | 12076323686387293 | 12076323686546216 | 12076323687332453 | 12076323687401077 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 316 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 20 | 24 | 330112 | 0x7f7393007500 | 0x7f7263037840 | 131072 | 246180 | 31661440 | 12076323687432706 | 12076323687605252 | 12076323690654203 | 12076323690720816 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 318 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 20 | 24 | 332864 | 0x7f7393007400 | 0x7f7263037880 | 65536 | 191938 | 24518880 | 12076323690752285 | 12076323690931962 | 12076323692473717 | 12076323692541400 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 320 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 12 | 24 | 335616 | 0x7f7393007300 | 0x7f72630378c0 | 131072 | 239628 | 30708424 | 12076323692574762 | 12076323692744437 | 12076323693534834 | 12076323693601841 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 322 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 12 | 24 | 339392 | 0x7f7393007a00 | 0x7f7263037900 | 131072 | 274278 | 35149648 | 12076323693628941 | 12076323693813553 | 12076323696095786 | 12076323696163692 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 324 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 12 | 24 | 344192 | 0x7f7393007900 | 0x7f7263037940 | 131072 | 248204 | 31666344 | 12076323696207553 | 12076323696367785 | 12076323697903461 | 12076323697970319 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 326 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 20 | 24 | 345408 | 0x7f7393007800 | 0x7f7263037980 | 131072 | 279784 | 35644216 | 12076323697998331 | 12076323698187300 | 12076323704239601 | 12076323704292348 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 328 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 20 | 24 | 348160 | 0x7f7393007700 | 0x7f72630379c0 | 65536 | 171254 | 22059944 | 12076323704321372 | 12076323704501520 | 12076323707542631 | 12076323707610255 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 330 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 12 | 24 | 350912 | 0x7f7393007600 | 0x7f7263037a00 | 131072 | 272624 | 34769824 | 12076323707638567 | 12076323707815270 | 12076323709354626 | 12076323709422493 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 332 | 920962 | 920967 | 4194304 | 256 | 0 | 0 | 12 | 24 | 0 | 0x7f7393007500 | 0x7f7263037a40 | 131072 | 267182 | 34191504 | 12076323709451777 | 12076323709631745 | 12076323714158451 | 12076323714224717 |