38 KiB
38 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | SQ_INSTS_LDS | SQ_INST_LEVEL_LDS | SQ_ACCUM_PREV_HIRES | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBufferAligned.kd | 0 | 0 | 0 | 225145 | 225145 | 33554432 | 256 | 0 | 0 | 4 | 32 | 4160 | 0x0 | 0x7f6011204280 | 0 | 0 | 0 | 17617373642292 | 17616657358066 | 17617521510730 | 17617521624110 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 225145 | 225145 | 32768 | 256 | 0 | 0 | 12 | 24 | 13888 | 0x0 | 0x7f6011223f80 | 0 | 0 | 0 | 17617526822058 | 17617521510730 | 17617526949772 | 17617526954185 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 5 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 14336 | 0x7f601422d380 | 0x7f6011223fc0 | 0 | 0 | 0 | 17617526989654 | 17617526949772 | 17617527316172 | 17617527318486 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 8 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15040 | 0x7f601422d200 | 0x7f6011224000 | 0 | 0 | 0 | 17617527355035 | 17617527316172 | 17617527717452 | 17617527719416 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 11 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15488 | 0x7f601422d080 | 0x7f6011224040 | 0 | 0 | 0 | 17617527752475 | 17617527717452 | 17617528112172 | 17617528114156 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 14 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 15936 | 0x7f6011acdf00 | 0x7f6011224080 | 0 | 0 | 0 | 17617528147005 | 17617528112172 | 17617528419052 | 17617528421068 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 17 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 16384 | 0x7f6011acdd80 | 0x7f60112240c0 | 0 | 0 | 0 | 17617528454017 | 17617528419052 | 17617528719692 | 17617528721660 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 20 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 17088 | 0x7f6011acdc00 | 0x7f6011224100 | 0 | 0 | 0 | 17617528765359 | 17617528719692 | 17617529019052 | 17617529020973 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 23 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 16 | 24 | 17792 | 0x7f6011acda80 | 0x7f6011224140 | 0 | 0 | 0 | 17617529052832 | 17617529019052 | 17617529407373 | 17617529409783 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 26 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 16 | 24 | 18496 | 0x7f6011acd900 | 0x7f6011224180 | 0 | 0 | 0 | 17617529441212 | 17617529407373 | 17617529796653 | 17617529798663 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 29 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19200 | 0x7f6011acd780 | 0x7f60112241c0 | 0 | 0 | 0 | 17617529830222 | 17617529796653 | 17617530116173 | 17617530118345 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 32 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19904 | 0x7f6011acd600 | 0x7f6011224200 | 0 | 0 | 0 | 17617530150604 | 17617530116173 | 17617530428653 | 17617530431017 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 35 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 20608 | 0x7f601422d480 | 0x7f6011224240 | 0 | 0 | 0 | 17617530470706 | 17617530428653 | 17617530725293 | 17617530726820 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 38 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 16 | 24 | 21312 | 0x7f601422d300 | 0x7f6011224280 | 0 | 0 | 0 | 17617530760449 | 17617530725293 | 17617531089293 | 17617531090840 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 41 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22016 | 0x7f601422d180 | 0x7f60112242c0 | 0 | 0 | 0 | 17617531123289 | 17617531089293 | 17617531450573 | 17617531452141 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 44 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22720 | 0x7f601422d000 | 0x7f6011224300 | 0 | 0 | 0 | 17617531484590 | 17617531450573 | 17617531725294 | 17617531726804 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 47 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 23424 | 0x7f6011acde80 | 0x7f6011224340 | 0 | 0 | 0 | 17617531758603 | 17617531725294 | 17617532009454 | 17617532010787 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 50 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 24128 | 0x7f6011acdd00 | 0x7f6011224380 | 0 | 0 | 0 | 17617532049466 | 17617532009454 | 17617532284334 | 17617532285800 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 53 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 16 | 24 | 24832 | 0x7f6011acdb80 | 0x7f60112243c0 | 0 | 0 | 0 | 17617532318189 | 17617532284334 | 17617532646094 | 17617532647451 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 56 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 16 | 24 | 25536 | 0x7f6011acda00 | 0x7f6011224400 | 0 | 0 | 0 | 17617532679750 | 17617532646094 | 17617533007054 | 17617533008542 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 59 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26240 | 0x7f6011acd880 | 0x7f6011224440 | 0 | 0 | 0 | 17617533040051 | 17617533007054 | 17617533285134 | 17617533286845 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 62 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26944 | 0x7f6011acd700 | 0x7f6011224480 | 0 | 0 | 0 | 17617533318864 | 17617533285134 | 17617533556334 | 17617533557708 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 65 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 27648 | 0x7f601422d580 | 0x7f60112244c0 | 0 | 0 | 0 | 17617533596337 | 17617533556334 | 17617533827534 | 17617533829061 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 68 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 16 | 24 | 28608 | 0x7f601422d400 | 0x7f6011224500 | 0 | 0 | 0 | 17617533860670 | 17617533827534 | 17617534189615 | 17617534191042 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 71 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 16 | 24 | 29312 | 0x7f601422d280 | 0x7f6011224540 | 0 | 0 | 0 | 17617534223531 | 17617534189615 | 17617534552495 | 17617534553882 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 74 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 30016 | 0x7f601422d100 | 0x7f6011224580 | 0 | 0 | 0 | 17617534584972 | 17617534552495 | 17617534828655 | 17617534830085 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 77 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 30976 | 0x7f6011acdf80 | 0x7f60112245c0 | 0 | 0 | 0 | 17617534862085 | 17617534828655 | 17617535102895 | 17617535104478 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 80 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 31680 | 0x7f6011acde00 | 0x7f6011224600 | 0 | 0 | 0 | 17617535143677 | 17617535102895 | 17617535374575 | 17617535376091 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 83 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 16 | 24 | 32640 | 0x7f6011acdc80 | 0x7f6011224640 | 0 | 0 | 0 | 17617535409341 | 17617535374575 | 17617535731695 | 17617535733162 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 86 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 33600 | 0x7f6011acdb00 | 0x7f6011224680 | 0 | 0 | 0 | 17617535764342 | 17617535731695 | 17617536088655 | 17617536090333 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 89 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 34560 | 0x7f6011acd980 | 0x7f60112246c0 | 0 | 0 | 0 | 17617536122022 | 17617536088655 | 17617536365455 | 17617536367116 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 92 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 35520 | 0x7f6011acd800 | 0x7f6011224700 | 0 | 0 | 0 | 17617536398155 | 17617536365455 | 17617536651375 | 17617536652909 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 95 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 36224 | 0x7f6011acd680 | 0x7f6011224740 | 0 | 0 | 0 | 17617536690968 | 17617536651375 | 17617536927696 | 17617536929142 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 98 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 37184 | 0x7f601422d500 | 0x7f6011224780 | 0 | 0 | 0 | 17617536961961 | 17617536927696 | 17617537287856 | 17617537289353 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 101 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 38144 | 0x7f601422d380 | 0x7f60112247c0 | 0 | 0 | 0 | 17617537321362 | 17617537287856 | 17617537650096 | 17617537651613 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 104 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 39104 | 0x7f601422d200 | 0x7f6011224800 | 0 | 0 | 0 | 17617537683823 | 17617537650096 | 17617537925136 | 17617537926746 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 107 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 40320 | 0x7f601422d080 | 0x7f6011224840 | 0 | 0 | 0 | 17617537958656 | 17617537925136 | 17617538207696 | 17617538209129 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 110 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 41280 | 0x7f6011acdf00 | 0x7f6011224880 | 0 | 0 | 0 | 17617538249708 | 17617538207696 | 17617538484016 | 17617538485432 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 113 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 42240 | 0x7f6011acdd80 | 0x7f60112248c0 | 0 | 0 | 0 | 17617538518491 | 17617538484016 | 17617538844656 | 17617538846203 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 116 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 43200 | 0x7f6011acdc00 | 0x7f6011224900 | 0 | 0 | 0 | 17617538877072 | 17617538844656 | 17617539208177 | 17617539209674 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 119 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 44160 | 0x7f6011acda80 | 0x7f6011224940 | 0 | 0 | 0 | 17617539241083 | 17617539208177 | 17617539478897 | 17617539480317 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 122 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 45376 | 0x7f6011acd900 | 0x7f6011224980 | 0 | 0 | 0 | 17617539511686 | 17617539478897 | 17617539750257 | 17617539751870 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 125 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 46336 | 0x7f6011acd780 | 0x7f60112249c0 | 0 | 0 | 0 | 17617539789489 | 17617539750257 | 17617540025137 | 17617540026513 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 128 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 47552 | 0x7f6011acd600 | 0x7f6011224a00 | 0 | 0 | 0 | 17617540058182 | 17617540025137 | 17617540381137 | 17617540382644 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 131 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 48512 | 0x7f601422d480 | 0x7f6011224a40 | 0 | 0 | 0 | 17617540414813 | 17617540381137 | 17617540740017 | 17617540741505 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 134 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 49472 | 0x7f601422d300 | 0x7f6011224a80 | 0 | 0 | 0 | 17617540773014 | 17617540740017 | 17617541012337 | 17617541013728 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 137 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 50688 | 0x7f601422d180 | 0x7f6011224ac0 | 0 | 0 | 0 | 17617541045667 | 17617541012337 | 17617541284337 | 17617541285751 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 140 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 51648 | 0x7f601422d000 | 0x7f6011224b00 | 0 | 0 | 0 | 17617541323680 | 17617541284337 | 17617541559057 | 17617541560504 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 143 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 52864 | 0x7f6011acde80 | 0x7f6011224b40 | 0 | 0 | 0 | 17617541591823 | 17617541559057 | 17617541918258 | 17617541919735 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 146 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 54080 | 0x7f6011acdd00 | 0x7f6011224b80 | 0 | 0 | 0 | 17617541953424 | 17617541918258 | 17617542290898 | 17617542292496 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 149 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 55296 | 0x7f6011acdb80 | 0x7f6011224bc0 | 0 | 0 | 0 | 17617542323815 | 17617542290898 | 17617542565458 | 17617542566939 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 152 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 56768 | 0x7f6011acda00 | 0x7f6011224c00 | 0 | 0 | 0 | 17617542597968 | 17617542565458 | 17617542837618 | 17617542839042 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 155 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 57728 | 0x7f6011acd880 | 0x7f6011224c40 | 0 | 0 | 0 | 17617542877741 | 17617542837618 | 17617543111858 | 17617543113295 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 158 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 58944 | 0x7f6011acd700 | 0x7f6011224c80 | 0 | 0 | 0 | 17617543145394 | 17617543111858 | 17617543469778 | 17617543471166 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 161 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 60160 | 0x7f601422d580 | 0x7f6011224cc0 | 0 | 0 | 0 | 17617543502855 | 17617543469778 | 17617543828978 | 17617543830397 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 164 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 61376 | 0x7f601422d400 | 0x7f6011224d00 | 0 | 0 | 0 | 17617543861706 | 17617543828978 | 17617544099698 | 17617544101090 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 167 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 62848 | 0x7f601422d280 | 0x7f6011224d40 | 0 | 0 | 0 | 17617544133159 | 17617544099698 | 17617544376659 | 17617544377983 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 170 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 63808 | 0x7f601422d100 | 0x7f6011224d80 | 0 | 0 | 0 | 17617544417452 | 17617544376659 | 17617544656339 | 17617544657766 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 173 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 65024 | 0x7f6011acdf80 | 0x7f6011224dc0 | 0 | 0 | 0 | 17617544689525 | 17617544656339 | 17617545017299 | 17617545018867 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 176 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 66240 | 0x7f6011acde00 | 0x7f6011224e00 | 0 | 0 | 0 | 17617545050856 | 17617545017299 | 17617545372979 | 17617545374468 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 179 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 67456 | 0x7f6011acdc80 | 0x7f6011224e40 | 0 | 0 | 0 | 17617545405597 | 17617545372979 | 17617545643379 | 17617545644811 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 182 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 68928 | 0x7f6011acdb00 | 0x7f6011224e80 | 0 | 0 | 0 | 17617545676170 | 17617545643379 | 17617545929939 | 17617545931253 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 185 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 70144 | 0x7f6011acd980 | 0x7f6011224ec0 | 0 | 0 | 0 | 17617545968743 | 17617545929939 | 17617546210739 | 17617546212246 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 188 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 71616 | 0x7f6011acd800 | 0x7f6011224f00 | 0 | 0 | 0 | 17617546243966 | 17617546210739 | 17617546575059 | 17617546576377 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 191 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 72832 | 0x7f6011acd680 | 0x7f6011224f40 | 0 | 0 | 0 | 17617546608446 | 17617546575059 | 17617546935380 | 17617546936818 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 194 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 74048 | 0x7f601422d500 | 0x7f6011224f80 | 0 | 0 | 0 | 17617546967947 | 17617546935380 | 17617547207060 | 17617547208651 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 197 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 75776 | 0x7f601422d380 | 0x7f6011224fc0 | 0 | 0 | 0 | 17617547239860 | 17617547207060 | 17617547494420 | 17617547495814 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 200 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 76992 | 0x7f601422d200 | 0x7f6011225000 | 0 | 0 | 0 | 17617547534863 | 17617547494420 | 17617547767380 | 17617547768937 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 203 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 78464 | 0x7f601422d080 | 0x7f6011225040 | 0 | 0 | 0 | 17617547800396 | 17617547767380 | 17617548131540 | 17617548133078 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 206 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 79936 | 0x7f6011acdf00 | 0x7f6011225080 | 0 | 0 | 0 | 17617548165057 | 17617548131540 | 17617548492340 | 17617548493869 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 209 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 81408 | 0x7f6011acdd80 | 0x7f60112250c0 | 0 | 0 | 0 | 17617548525858 | 17617548492340 | 17617548768980 | 17617548770372 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 212 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 83136 | 0x7f6011acdc00 | 0x7f6011225100 | 0 | 0 | 0 | 17617548801051 | 17617548768980 | 17617549062900 | 17617549064304 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 215 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 84352 | 0x7f6011acda80 | 0x7f6011225140 | 0 | 0 | 0 | 17617549113433 | 17617549062900 | 17617549337461 | 17617549338857 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 218 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 85824 | 0x7f6011acd900 | 0x7f6011225180 | 0 | 0 | 0 | 17617549370096 | 17617549337461 | 17617549710741 | 17617549712198 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 221 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 87296 | 0x7f6011acd780 | 0x7f60112251c0 | 0 | 0 | 0 | 17617549743687 | 17617549710741 | 17617550072021 | 17617550073539 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 224 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 88768 | 0x7f6011acd600 | 0x7f6011225200 | 0 | 0 | 0 | 17617550105618 | 17617550072021 | 17617550350581 | 17617550352101 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 227 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 90752 | 0x7f601422d480 | 0x7f6011225240 | 0 | 0 | 0 | 17617550383471 | 17617550350581 | 17617550650901 | 17617550652284 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 230 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 91968 | 0x7f601422d300 | 0x7f6011225280 | 0 | 0 | 0 | 17617550690823 | 17617550650901 | 17617550932981 | 17617550934397 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 233 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 93440 | 0x7f601422d180 | 0x7f60112252c0 | 0 | 0 | 0 | 17617550966226 | 17617550932981 | 17617551295541 | 17617551297107 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 236 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 94912 | 0x7f601422d000 | 0x7f6011225300 | 0 | 0 | 0 | 17617551329177 | 17617551295541 | 17617551658901 | 17617551660368 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 239 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 96384 | 0x7f6011acde80 | 0x7f6011225340 | 0 | 0 | 0 | 17617551692067 | 17617551658901 | 17617551933622 | 17617551935011 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 242 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 98368 | 0x7f6011acdd00 | 0x7f6011225380 | 0 | 0 | 0 | 17617551966931 | 17617551933622 | 17617552237142 | 17617552238644 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 245 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 99584 | 0x7f6011acdb80 | 0x7f60112253c0 | 0 | 0 | 0 | 17617552277203 | 17617552237142 | 17617552509782 | 17617552511337 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 248 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 101312 | 0x7f6011acda00 | 0x7f6011225400 | 0 | 0 | 0 | 17617552542106 | 17617552509782 | 17617552865622 | 17617552867108 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 251 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 102784 | 0x7f6011acd880 | 0x7f6011225440 | 0 | 0 | 0 | 17617552898317 | 17617552865622 | 17617553224182 | 17617553225649 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 254 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 104256 | 0x7f6011acd700 | 0x7f6011225480 | 0 | 0 | 0 | 17617553257078 | 17617553224182 | 17617553497782 | 17617553499362 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 257 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 106240 | 0x7f601422d580 | 0x7f60112254c0 | 0 | 0 | 0 | 17617553530831 | 17617553497782 | 17617553821462 | 17617553823013 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 260 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 107712 | 0x7f601422d400 | 0x7f6011225500 | 0 | 0 | 0 | 17617553861702 | 17617553821462 | 17617554100662 | 17617554102266 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 263 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 16 | 24 | 109440 | 0x7f601422d280 | 0x7f6011225540 | 0 | 0 | 0 | 17617554135076 | 17617554100662 | 17617554466263 | 17617554467857 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 266 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 16 | 24 | 111168 | 0x7f601422d100 | 0x7f6011225580 | 0 | 0 | 0 | 17617554500836 | 17617554466263 | 17617554826423 | 17617554827978 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 269 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 112896 | 0x7f6011acdf80 | 0x7f60112255c0 | 0 | 0 | 0 | 17617554859567 | 17617554826423 | 17617555100023 | 17617555101551 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 272 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 115136 | 0x7f6011acde00 | 0x7f6011225600 | 0 | 0 | 0 | 17617555132730 | 17617555100023 | 17617555414583 | 17617555416003 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 275 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 116608 | 0x7f6011acdc80 | 0x7f6011225640 | 0 | 0 | 0 | 17617555454622 | 17617555414583 | 17617555687383 | 17617555688946 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 278 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 16 | 24 | 118336 | 0x7f6011acdb00 | 0x7f6011225680 | 0 | 0 | 0 | 17617555719795 | 17617555687383 | 17617556043703 | 17617556045387 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 281 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 16 | 24 | 120064 | 0x7f6011acd980 | 0x7f60112256c0 | 0 | 0 | 0 | 17617556076296 | 17617556043703 | 17617556403223 | 17617556404758 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 284 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 121792 | 0x7f6011acd800 | 0x7f6011225700 | 0 | 0 | 0 | 17617556436007 | 17617556403223 | 17617556677303 | 17617556678691 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 287 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 124032 | 0x7f6011acd680 | 0x7f6011225740 | 0 | 0 | 0 | 17617556709920 | 17617556677303 | 17617557004184 | 17617557005703 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 290 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 125504 | 0x7f601422d500 | 0x7f6011225780 | 0 | 0 | 0 | 17617557047552 | 17617557004184 | 17617557277464 | 17617557278956 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 293 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 127488 | 0x7f601422d380 | 0x7f60112257c0 | 0 | 0 | 0 | 17617557310195 | 17617557277464 | 17617557635384 | 17617557636777 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 296 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 129216 | 0x7f601422d200 | 0x7f6011225800 | 0 | 0 | 0 | 17617557669036 | 17617557635384 | 17617557992984 | 17617557994588 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 299 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 130944 | 0x7f601422d080 | 0x7f6011225840 | 0 | 0 | 0 | 17617558025727 | 17617557992984 | 17617558284184 | 17617558285720 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 302 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 133440 | 0x7f6011acdf00 | 0x7f6011225880 | 0 | 0 | 0 | 17617558317880 | 17617558284184 | 17617558624184 | 17617558625842 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 305 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 134912 | 0x7f6011acdd80 | 0x7f60112258c0 | 0 | 0 | 0 | 17617558664591 | 17617558624184 | 17617558896344 | 17617558897775 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 308 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 136896 | 0x7f6011acdc00 | 0x7f6011225900 | 0 | 0 | 0 | 17617558928904 | 17617558896344 | 17617559254424 | 17617559255826 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 311 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 138880 | 0x7f6011acda80 | 0x7f6011225940 | 0 | 0 | 0 | 17617559287155 | 17617559254424 | 17617559610425 | 17617559611857 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 314 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 140864 | 0x7f6011acd900 | 0x7f6011225980 | 0 | 0 | 0 | 17617559643026 | 17617559610425 | 17617559891865 | 17617559893430 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 317 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 143616 | 0x7f6011acd780 | 0x7f60112259c0 | 0 | 0 | 0 | 17617559924479 | 17617559891865 | 17617560241785 | 17617560243271 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 320 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 145344 | 0x7f6011acd600 | 0x7f6011225a00 | 0 | 0 | 0 | 17617560281790 | 17617560241785 | 17617560516825 | 17617560518504 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 323 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 147584 | 0x7f601422d480 | 0x7f6011225a40 | 0 | 0 | 0 | 17617560549803 | 17617560516825 | 17617560875385 | 17617560876855 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 326 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 149568 | 0x7f601422d300 | 0x7f6011225a80 | 0 | 0 | 0 | 17617560909014 | 17617560875385 | 17617561232505 | 17617561233876 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 329 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 151552 | 0x7f601422d180 | 0x7f6011225ac0 | 0 | 0 | 0 | 17617561265565 | 17617561232505 | 17617561520345 | 17617561522008 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 332 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 154304 | 0x7f601422d000 | 0x7f6011225b00 | 0 | 0 | 0 | 17617561553348 | 17617561520345 | 17617561879546 | 17617561881059 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 335 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 156288 | 0x7f6011acde80 | 0x7f6011225b40 | 0 | 0 | 0 | 17617561919758 | 17617561879546 | 17617562161306 | 17617562162862 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 338 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 158784 | 0x7f6011acdd00 | 0x7f6011225b80 | 0 | 0 | 0 | 17617562196041 | 17617562161306 | 17617562526106 | 17617562527573 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 341 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 161024 | 0x7f6011acdb80 | 0x7f6011225bc0 | 0 | 0 | 0 | 17617562559762 | 17617562526106 | 17617562886266 | 17617562887624 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 344 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 163264 | 0x7f6011acda00 | 0x7f6011225c00 | 0 | 0 | 0 | 17617562919813 | 17617562886266 | 17617563195706 | 17617563197286 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 347 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 166528 | 0x7f6011acd880 | 0x7f6011225c40 | 0 | 0 | 0 | 17617563228415 | 17617563195706 | 17617563581306 | 17617563627295 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 350 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 168512 | 0x7f6011acd700 | 0x7f6011225c80 | 0 | 0 | 0 | 17617563645865 | 17617563581306 | 17617563906906 | 17617563908268 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 353 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 171264 | 0x7f601422d580 | 0x7f6011225cc0 | 0 | 0 | 0 | 17617563940177 | 17617563906906 | 17617564270746 | 17617564272239 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 356 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 173760 | 0x7f601422d400 | 0x7f6011225d00 | 0 | 0 | 0 | 17617564304178 | 17617564270746 | 17617564635067 | 17617564636509 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 359 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 176256 | 0x7f601422d280 | 0x7f6011225d40 | 0 | 0 | 0 | 17617564668449 | 17617564635067 | 17617564949147 | 17617564950451 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 362 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 179776 | 0x7f601422d100 | 0x7f6011225d80 | 0 | 0 | 0 | 17617564981881 | 17617564949147 | 17617565360347 | 17617565403230 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 365 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 182272 | 0x7f6011acdf80 | 0x7f6011225dc0 | 0 | 0 | 0 | 17617565422010 | 17617565360347 | 17617565711707 | 17617565713132 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 368 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 185536 | 0x7f6011acde00 | 0x7f6011225e00 | 0 | 0 | 0 | 17617565744121 | 17617565711707 | 17617566084027 | 17617566086623 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 371 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 188544 | 0x7f6011acdc80 | 0x7f6011225e40 | 0 | 0 | 0 | 17617566118272 | 17617566084027 | 17617566462427 | 17617566485903 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 374 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 191552 | 0x7f6011acdb00 | 0x7f6011225e80 | 0 | 0 | 0 | 17617566498042 | 17617566462427 | 17617566818588 | 17617566820094 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 377 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 195840 | 0x7f6011acd980 | 0x7f6011225ec0 | 0 | 0 | 0 | 17617566851333 | 17617566818588 | 17617567281308 | 17617567302392 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 380 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 198592 | 0x7f6011acd800 | 0x7f6011225f00 | 0 | 0 | 0 | 17617567321261 | 17617567281308 | 17617567617788 | 17617567619214 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 383 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 202368 | 0x7f6011acd680 | 0x7f6011225f40 | 0 | 0 | 0 | 17617567650873 | 17617567617788 | 17617568009468 | 17617568029683 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 386 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 205888 | 0x7f601422d500 | 0x7f6011225f80 | 0 | 0 | 0 | 17617568043053 | 17617568009468 | 17617568402268 | 17617568422443 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 389 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 209408 | 0x7f601422d380 | 0x7f6011225fc0 | 0 | 0 | 0 | 17617568435943 | 17617568402268 | 17617568776508 | 17617568777964 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 392 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 214464 | 0x7f601422d200 | 0x7f6011226000 | 0 | 0 | 0 | 17617568809754 | 17617568776508 | 17617569293468 | 17617569313371 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 395 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 217728 | 0x7f601422d080 | 0x7f6011226040 | 0 | 0 | 0 | 17617569334100 | 17617569293468 | 17617569646909 | 17617569648382 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 398 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 222016 | 0x7f6011acdf00 | 0x7f6011226080 | 0 | 0 | 0 | 17617569679902 | 17617569646909 | 17617570058429 | 17617570097621 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 401 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 226048 | 0x7f6011acdd80 | 0x7f60112260c0 | 0 | 0 | 0 | 17617570110081 | 17617570058429 | 17617570504989 | 17617570550000 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 404 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 230080 | 0x7f6011acdc00 | 0x7f6011226100 | 0 | 0 | 0 | 17617570562439 | 17617570504989 | 17617570931389 | 17617570978869 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 407 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 235904 | 0x7f6011acda80 | 0x7f6011226140 | 0 | 0 | 0 | 17617570991138 | 17617570931389 | 17617571531229 | 17617571573424 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 410 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 239424 | 0x7f6011acd900 | 0x7f6011226180 | 0 | 0 | 0 | 17617571592223 | 17617571531229 | 17617571925630 | 17617571927005 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 413 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 244224 | 0x7f6011acd780 | 0x7f60112261c0 | 0 | 0 | 0 | 17617571958424 | 17617571925630 | 17617572356030 | 17617572399003 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 416 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 248768 | 0x7f6011acd600 | 0x7f6011226200 | 0 | 0 | 0 | 17617572411482 | 17617572356030 | 17617572817150 | 17617572859911 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 419 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 253312 | 0x7f601422d480 | 0x7f6011226240 | 0 | 0 | 0 | 17617572871331 | 17617572817150 | 17617573264350 | 17617573306480 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 422 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 255552 | 0x7f601422d300 | 0x7f6011226280 | 0 | 0 | 0 | 17617573318479 | 17617573264350 | 17617573927710 | 17617573947643 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 425 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 259840 | 0x7f601422d180 | 0x7f60112262c0 | 0 | 0 | 0 | 17617573969173 | 17617573927710 | 17617574344190 | 17617574391252 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 428 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 265664 | 0x7f601422d000 | 0x7f6011226300 | 0 | 0 | 0 | 17617574403462 | 17617574344190 | 17617574861471 | 17617574905879 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 431 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 271232 | 0x7f6011acde80 | 0x7f6011226340 | 0 | 0 | 0 | 17617574916769 | 17617574861471 | 17617575376831 | 17617575423176 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 434 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 273216 | 0x7f6011acdd00 | 0x7f6011226380 | 0 | 0 | 0 | 17617575434706 | 17617575376831 | 17617575887391 | 17617575929613 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 437 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 275712 | 0x7f6011acdb80 | 0x7f60112263c0 | 0 | 0 | 0 | 17617575941063 | 17617575887391 | 17617576653151 | 17617576695054 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 440 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 280768 | 0x7f6011acda00 | 0x7f6011226400 | 0 | 0 | 0 | 17617576714243 | 17617576653151 | 17617577129312 | 17617577171232 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 443 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 287616 | 0x7f6011acd880 | 0x7f6011226440 | 0 | 0 | 0 | 17617577183371 | 17617577129312 | 17617577690112 | 17617577732907 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 446 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 294208 | 0x7f6011acd700 | 0x7f6011226480 | 0 | 0 | 0 | 17617577745047 | 17617577690112 | 17617578253792 | 17617578298543 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 449 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 296960 | 0x7f601422d580 | 0x7f60112264c0 | 0 | 0 | 0 | 17617578311273 | 17617578253792 | 17617578819872 | 17617578863899 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 452 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 299968 | 0x7f601422d400 | 0x7f6011226500 | 0 | 0 | 0 | 17617578875309 | 17617578819872 | 17617579692193 | 17617579737037 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 455 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 302720 | 0x7f601422d280 | 0x7f6011226540 | 0 | 0 | 0 | 17617579756256 | 17617579692193 | 17617580351713 | 17617580394280 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 458 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 305472 | 0x7f601422d100 | 0x7f6011226580 | 0 | 0 | 0 | 17617580405870 | 17617580351713 | 17617581023713 | 17617581066433 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 461 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 308224 | 0x7f6011acdf80 | 0x7f60112265c0 | 0 | 0 | 0 | 17617581077973 | 17617581023713 | 17617581700033 | 17617581742536 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 464 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 310976 | 0x7f6011acde00 | 0x7f6011226600 | 0 | 0 | 0 | 17617581753926 | 17617581700033 | 17617582360674 | 17617582407249 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 467 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 313216 | 0x7f6011acdc80 | 0x7f6011226640 | 0 | 0 | 0 | 17617582419829 | 17617582360674 | 17617583457634 | 17617583504071 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 470 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 315968 | 0x7f6011acdb00 | 0x7f6011226680 | 0 | 0 | 0 | 17617583524191 | 17617583457473 | 17617584555073 | 17617584626223 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 473 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 318720 | 0x7f6011acd980 | 0x7f60112266c0 | 0 | 0 | 0 | 17617584637902 | 17617584555073 | 17617585691233 | 17617585755844 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 476 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 321472 | 0x7f6011acd800 | 0x7f6011226700 | 0 | 0 | 0 | 17617585768084 | 17617585691233 | 17617586817153 | 17617586888225 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 479 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 324224 | 0x7f6011acd680 | 0x7f6011226740 | 0 | 0 | 0 | 17617586900485 | 17617586817153 | 17617587936353 | 17617588002997 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 482 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 326464 | 0x7f601422d500 | 0x7f6011226780 | 0 | 0 | 0 | 17617588015017 | 17617587936353 | 17617589885633 | 17617589953588 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 485 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 329216 | 0x7f601422d380 | 0x7f60112267c0 | 0 | 0 | 0 | 17617589973427 | 17617589885633 | 17617591812033 | 17617591884639 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 488 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 331968 | 0x7f601422d200 | 0x7f6011226800 | 0 | 0 | 0 | 17617591896388 | 17617591812033 | 17617593803073 | 17617593868648 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 491 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 12 | 24 | 334720 | 0x7f601422d080 | 0x7f6011226840 | 0 | 0 | 0 | 17617593880348 | 17617593803073 | 17617595765953 | 17617595838408 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 494 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 337472 | 0x7f6011acdf00 | 0x7f6011226880 | 0 | 0 | 0 | 17617595851508 | 17617595765953 | 17617597700514 | 17617597768509 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 497 | 225145 | 225145 | 4194304 | 256 | 0 | 0 | 8 | 24 | 0 | 0x7f6011acdd80 | 0x7f60112268c0 | 0 | 0 | 0 | 17617597780579 | 17617597700514 | 17617601308354 | 17617601387817 |