38 KiB
38 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | SQ_INSTS_LDS | SQ_INST_LEVEL_LDS | SQ_ACCUM_PREV_HIRES | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBufferAligned.kd | 0 | 0 | 0 | 238351 | 238351 | 33554432 | 256 | 0 | 0 | 4 | 32 | 4160 | 0x0 | 0x7ff888c04280 | 0 | 0 | 0 | 17852547584485 | 17851838027568 | 17852691164011 | 17852691281181 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 238351 | 238351 | 32768 | 256 | 0 | 0 | 12 | 24 | 13888 | 0x0 | 0x7ff888c23f80 | 0 | 0 | 0 | 17852696444493 | 17852691164011 | 17852696572335 | 17852696576630 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 5 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 14336 | 0x7ff88bbf2380 | 0x7ff888c23fc0 | 0 | 0 | 0 | 17852696612159 | 17852696572335 | 17852696936336 | 17852696938901 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 8 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15040 | 0x7ff88bbf2200 | 0x7ff888c24000 | 0 | 0 | 0 | 17852696975310 | 17852696936336 | 17852697339696 | 17852697341881 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 11 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15488 | 0x7ff88bbf2080 | 0x7ff888c24040 | 0 | 0 | 0 | 17852697374780 | 17852697339696 | 17852697731856 | 17852697734031 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 14 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 15936 | 0x7ff8895c3f00 | 0x7ff888c24080 | 0 | 0 | 0 | 17852697767750 | 17852697731856 | 17852698036497 | 17852698038724 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 17 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 16384 | 0x7ff8895c3d80 | 0x7ff888c240c0 | 0 | 0 | 0 | 17852698071613 | 17852698036497 | 17852698369937 | 17852698372295 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 20 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 17088 | 0x7ff8895c3c00 | 0x7ff888c24100 | 0 | 0 | 0 | 17852698416344 | 17852698369937 | 17852698676817 | 17852698679058 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 23 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 16 | 24 | 17792 | 0x7ff8895c3a80 | 0x7ff888c24140 | 0 | 0 | 0 | 17852698711897 | 17852698676817 | 17852699069297 | 17852699071448 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 26 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 16 | 24 | 18496 | 0x7ff8895c3900 | 0x7ff888c24180 | 0 | 0 | 0 | 17852699103227 | 17852699069297 | 17852699460338 | 17852699462438 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 29 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19200 | 0x7ff8895c3780 | 0x7ff888c241c0 | 0 | 0 | 0 | 17852699493987 | 17852699460338 | 17852699775858 | 17852699778060 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 32 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19904 | 0x7ff8895c3600 | 0x7ff888c24200 | 0 | 0 | 0 | 17852699811880 | 17852699775858 | 17852700079858 | 17852700082053 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 35 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 20608 | 0x7ff88bbf2480 | 0x7ff888c24240 | 0 | 0 | 0 | 17852700121632 | 17852700079858 | 17852700381299 | 17852700383605 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 38 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 16 | 24 | 21312 | 0x7ff88bbf2300 | 0x7ff888c24280 | 0 | 0 | 0 | 17852700416035 | 17852700381299 | 17852700771219 | 17852700773606 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 41 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22016 | 0x7ff88bbf2180 | 0x7ff888c242c0 | 0 | 0 | 0 | 17852700806585 | 17852700771219 | 17852701160019 | 17852701162216 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 44 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22720 | 0x7ff88bbf2000 | 0x7ff888c24300 | 0 | 0 | 0 | 17852701194595 | 17852701160019 | 17852701463540 | 17852701465598 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 47 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 23424 | 0x7ff8895c3e80 | 0x7ff888c24340 | 0 | 0 | 0 | 17852701497488 | 17852701463540 | 17852701764020 | 17852701766161 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 50 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 24128 | 0x7ff8895c3d00 | 0x7ff888c24380 | 0 | 0 | 0 | 17852701805360 | 17852701764020 | 17852702066100 | 17852702068263 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 53 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 16 | 24 | 24832 | 0x7ff8895c3b80 | 0x7ff888c243c0 | 0 | 0 | 0 | 17852702100643 | 17852702066100 | 17852702483061 | 17852702485313 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 56 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 16 | 24 | 25536 | 0x7ff8895c3a00 | 0x7ff888c24400 | 0 | 0 | 0 | 17852702517042 | 17852702483061 | 17852702873781 | 17852702876263 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 59 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26240 | 0x7ff8895c3880 | 0x7ff888c24440 | 0 | 0 | 0 | 17852702907713 | 17852702873781 | 17852703202901 | 17852703204495 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 62 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26944 | 0x7ff8895c3700 | 0x7ff888c24480 | 0 | 0 | 0 | 17852703236464 | 17852703202901 | 17852703480181 | 17852703481768 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 65 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 27648 | 0x7ff88bbf2580 | 0x7ff888c244c0 | 0 | 0 | 0 | 17852703520677 | 17852703480181 | 17852703754102 | 17852703755692 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 68 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 16 | 24 | 28608 | 0x7ff88bbf2400 | 0x7ff888c24500 | 0 | 0 | 0 | 17852703787681 | 17852703754102 | 17852704118422 | 17852704119943 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 71 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 16 | 24 | 29312 | 0x7ff88bbf2280 | 0x7ff888c24540 | 0 | 0 | 0 | 17852704151352 | 17852704118422 | 17852704479702 | 17852704481324 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 74 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 30016 | 0x7ff88bbf2100 | 0x7ff888c24580 | 0 | 0 | 0 | 17852704513273 | 17852704479702 | 17852704755543 | 17852704757237 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 77 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 30976 | 0x7ff8895c3f80 | 0x7ff888c245c0 | 0 | 0 | 0 | 17852704788706 | 17852704755543 | 17852705030423 | 17852705031860 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 80 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 31680 | 0x7ff8895c3e00 | 0x7ff888c24600 | 0 | 0 | 0 | 17852705070659 | 17852705030423 | 17852705307703 | 17852705309243 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 83 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 16 | 24 | 32640 | 0x7ff8895c3c80 | 0x7ff888c24640 | 0 | 0 | 0 | 17852705340412 | 17852705307703 | 17852705671863 | 17852705673524 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 86 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 33600 | 0x7ff8895c3b00 | 0x7ff888c24680 | 0 | 0 | 0 | 17852705705043 | 17852705671863 | 17852706035864 | 17852706037365 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 89 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 34560 | 0x7ff8895c3980 | 0x7ff888c246c0 | 0 | 0 | 0 | 17852706068534 | 17852706035864 | 17852706325464 | 17852706327138 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 92 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 35520 | 0x7ff8895c3800 | 0x7ff888c24700 | 0 | 0 | 0 | 17852706359087 | 17852706325464 | 17852706606264 | 17852706607841 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 95 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 36224 | 0x7ff8895c3680 | 0x7ff888c24740 | 0 | 0 | 0 | 17852706646370 | 17852706606264 | 17852706884824 | 17852706886384 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 98 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 37184 | 0x7ff88bbf2500 | 0x7ff888c24780 | 0 | 0 | 0 | 17852706918173 | 17852706884824 | 17852707251065 | 17852707252885 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 101 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 38144 | 0x7ff88bbf2380 | 0x7ff888c247c0 | 0 | 0 | 0 | 17852707284684 | 17852707251065 | 17852707615225 | 17852707616896 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 104 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 39104 | 0x7ff88bbf2200 | 0x7ff888c24800 | 0 | 0 | 0 | 17852707648045 | 17852707615225 | 17852707890105 | 17852707891729 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 107 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 40320 | 0x7ff88bbf2080 | 0x7ff888c24840 | 0 | 0 | 0 | 17852707923748 | 17852707890105 | 17852708166586 | 17852708168142 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 110 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 41280 | 0x7ff8895c3f00 | 0x7ff888c24880 | 0 | 0 | 0 | 17852708207321 | 17852708166586 | 17852708442906 | 17852708444485 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 113 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 42240 | 0x7ff8895c3d80 | 0x7ff888c248c0 | 0 | 0 | 0 | 17852708475035 | 17852708442906 | 17852708806266 | 17852708807837 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 116 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 43200 | 0x7ff8895c3c00 | 0x7ff888c24900 | 0 | 0 | 0 | 17852708841876 | 17852708806266 | 17852709173467 | 17852709175027 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 119 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 44160 | 0x7ff8895c3a80 | 0x7ff888c24940 | 0 | 0 | 0 | 17852709206277 | 17852709173467 | 17852709449627 | 17852709451181 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 122 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 45376 | 0x7ff8895c3900 | 0x7ff888c24980 | 0 | 0 | 0 | 17852709482650 | 17852709449627 | 17852709728507 | 17852709730184 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 125 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 46336 | 0x7ff8895c3780 | 0x7ff888c249c0 | 0 | 0 | 0 | 17852709769003 | 17852709728507 | 17852710007227 | 17852710008837 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 128 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 47552 | 0x7ff8895c3600 | 0x7ff888c24a00 | 0 | 0 | 0 | 17852710040166 | 17852710007227 | 17852710383548 | 17852710385288 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 131 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 48512 | 0x7ff88bbf2480 | 0x7ff888c24a40 | 0 | 0 | 0 | 17852710418027 | 17852710383548 | 17852710744348 | 17852710745959 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 134 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 49472 | 0x7ff88bbf2300 | 0x7ff888c24a80 | 0 | 0 | 0 | 17852710777528 | 17852710744348 | 17852711022108 | 17852711023662 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 137 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 50688 | 0x7ff88bbf2180 | 0x7ff888c24ac0 | 0 | 0 | 0 | 17852711055971 | 17852711022108 | 17852711304188 | 17852711305805 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 140 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 51648 | 0x7ff88bbf2000 | 0x7ff888c24b00 | 0 | 0 | 0 | 17852711344854 | 17852711304188 | 17852711596989 | 17852711598567 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 143 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 52864 | 0x7ff8895c3e80 | 0x7ff888c24b40 | 0 | 0 | 0 | 17852711631277 | 17852711596989 | 17852711961149 | 17852711962848 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 146 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 54080 | 0x7ff8895c3d00 | 0x7ff888c24b80 | 0 | 0 | 0 | 17852711994718 | 17852711961149 | 17852712327549 | 17852712329159 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 149 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 55296 | 0x7ff8895c3b80 | 0x7ff888c24bc0 | 0 | 0 | 0 | 17852712360458 | 17852712327549 | 17852712604990 | 17852712606592 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 152 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 56768 | 0x7ff8895c3a00 | 0x7ff888c24c00 | 0 | 0 | 0 | 17852712639062 | 17852712604990 | 17852712884670 | 17852712886435 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 155 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 57728 | 0x7ff8895c3880 | 0x7ff888c24c40 | 0 | 0 | 0 | 17852712924404 | 17852712884670 | 17852713165470 | 17852713167208 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 158 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 58944 | 0x7ff8895c3700 | 0x7ff888c24c80 | 0 | 0 | 0 | 17852713198898 | 17852713165470 | 17852713529470 | 17852713531039 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 161 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 60160 | 0x7ff88bbf2580 | 0x7ff888c24cc0 | 0 | 0 | 0 | 17852713563649 | 17852713529470 | 17852713890751 | 17852713892420 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 164 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 61376 | 0x7ff88bbf2400 | 0x7ff888c24d00 | 0 | 0 | 0 | 17852713925370 | 17852713890751 | 17852714168991 | 17852714170743 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 167 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 62848 | 0x7ff88bbf2280 | 0x7ff888c24d40 | 0 | 0 | 0 | 17852714203523 | 17852714168991 | 17852714452831 | 17852714454446 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 170 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 63808 | 0x7ff88bbf2100 | 0x7ff888c24d80 | 0 | 0 | 0 | 17852714493475 | 17852714452831 | 17852714730591 | 17852714732310 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 173 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 65024 | 0x7ff8895c3f80 | 0x7ff888c24dc0 | 0 | 0 | 0 | 17852714764259 | 17852714730591 | 17852715097952 | 17852715099550 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 176 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 66240 | 0x7ff8895c3e00 | 0x7ff888c24e00 | 0 | 0 | 0 | 17852715131050 | 17852715097952 | 17852715461792 | 17852715463411 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 179 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 67456 | 0x7ff8895c3c80 | 0x7ff888c24e40 | 0 | 0 | 0 | 17852715495341 | 17852715461792 | 17852715739392 | 17852715741084 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 182 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 68928 | 0x7ff8895c3b00 | 0x7ff888c24e80 | 0 | 0 | 0 | 17852715773124 | 17852715739392 | 17852716027073 | 17852716028717 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 185 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 70144 | 0x7ff8895c3980 | 0x7ff888c24ec0 | 0 | 0 | 0 | 17852716068486 | 17852716027073 | 17852716306273 | 17852716307960 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 188 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 71616 | 0x7ff8895c3800 | 0x7ff888c24f00 | 0 | 0 | 0 | 17852716339270 | 17852716306273 | 17852716670113 | 17852716671741 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 191 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 72832 | 0x7ff8895c3680 | 0x7ff888c24f40 | 0 | 0 | 0 | 17852716703121 | 17852716670113 | 17852717030914 | 17852717032592 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 194 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 74048 | 0x7ff88bbf2500 | 0x7ff888c24f80 | 0 | 0 | 0 | 17852717063472 | 17852717030914 | 17852717308194 | 17852717309915 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 197 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 75776 | 0x7ff88bbf2380 | 0x7ff888c24fc0 | 0 | 0 | 0 | 17852717341515 | 17852717308194 | 17852717598274 | 17852717599928 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 200 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 76992 | 0x7ff88bbf2200 | 0x7ff888c25000 | 0 | 0 | 0 | 17852717638877 | 17852717598274 | 17852717874434 | 17852717875971 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 203 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 78464 | 0x7ff88bbf2080 | 0x7ff888c25040 | 0 | 0 | 0 | 17852717908151 | 17852717874434 | 17852718242115 | 17852718243902 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 206 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 79936 | 0x7ff8895c3f00 | 0x7ff888c25080 | 0 | 0 | 0 | 17852718276721 | 17852718242115 | 17852718610275 | 17852718611863 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 209 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 81408 | 0x7ff8895c3d80 | 0x7ff888c250c0 | 0 | 0 | 0 | 17852718642982 | 17852718610275 | 17852718888355 | 17852718889966 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 212 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 83136 | 0x7ff8895c3c00 | 0x7ff888c25100 | 0 | 0 | 0 | 17852718921995 | 17852718888355 | 17852719182915 | 17852719184579 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 215 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 84352 | 0x7ff8895c3a80 | 0x7ff888c25140 | 0 | 0 | 0 | 17852719235378 | 17852719182915 | 17852719468836 | 17852719471082 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 218 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 85824 | 0x7ff8895c3900 | 0x7ff888c25180 | 0 | 0 | 0 | 17852719502231 | 17852719468836 | 17852719841796 | 17852719843403 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 221 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 87296 | 0x7ff8895c3780 | 0x7ff888c251c0 | 0 | 0 | 0 | 17852719875732 | 17852719841796 | 17852720205156 | 17852720206694 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 224 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 88768 | 0x7ff8895c3600 | 0x7ff888c25200 | 0 | 0 | 0 | 17852720237623 | 17852720205156 | 17852720485637 | 17852720487157 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 227 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 90752 | 0x7ff88bbf2480 | 0x7ff888c25240 | 0 | 0 | 0 | 17852720519426 | 17852720485637 | 17852720791237 | 17852720792699 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 230 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 91968 | 0x7ff88bbf2300 | 0x7ff888c25280 | 0 | 0 | 0 | 17852720831258 | 17852720791237 | 17852721075237 | 17852721076832 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 233 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 93440 | 0x7ff88bbf2180 | 0x7ff888c252c0 | 0 | 0 | 0 | 17852721109001 | 17852721075237 | 17852721437477 | 17852721439053 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 236 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 94912 | 0x7ff88bbf2000 | 0x7ff888c25300 | 0 | 0 | 0 | 17852721471482 | 17852721437477 | 17852721802598 | 17852721804134 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 239 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 96384 | 0x7ff8895c3e80 | 0x7ff888c25340 | 0 | 0 | 0 | 17852721835423 | 17852721802598 | 17852722083878 | 17852722085677 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 242 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 98368 | 0x7ff8895c3d00 | 0x7ff888c25380 | 0 | 0 | 0 | 17852722117666 | 17852722083878 | 17852722394278 | 17852722395829 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 245 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 99584 | 0x7ff8895c3b80 | 0x7ff888c253c0 | 0 | 0 | 0 | 17852722435088 | 17852722394278 | 17852722680359 | 17852722681982 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 248 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 101312 | 0x7ff8895c3a00 | 0x7ff888c25400 | 0 | 0 | 0 | 17852722713441 | 17852722680359 | 17852723043079 | 17852723044603 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 251 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 102784 | 0x7ff8895c3880 | 0x7ff888c25440 | 0 | 0 | 0 | 17852723076572 | 17852723043079 | 17852723405639 | 17852723407144 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 254 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 104256 | 0x7ff8895c3700 | 0x7ff888c25480 | 0 | 0 | 0 | 17852723438243 | 17852723405639 | 17852723683239 | 17852723685007 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 257 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 106240 | 0x7ff88bbf2580 | 0x7ff888c254c0 | 0 | 0 | 0 | 17852723715457 | 17852723683239 | 17852723995560 | 17852723997160 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 260 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 107712 | 0x7ff88bbf2400 | 0x7ff888c25500 | 0 | 0 | 0 | 17852724036629 | 17852723995560 | 17852724280840 | 17852724282463 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 263 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 16 | 24 | 109440 | 0x7ff88bbf2280 | 0x7ff888c25540 | 0 | 0 | 0 | 17852724316172 | 17852724280840 | 17852724642760 | 17852724644394 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 266 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 16 | 24 | 111168 | 0x7ff88bbf2100 | 0x7ff888c25580 | 0 | 0 | 0 | 17852724677643 | 17852724642760 | 17852725009481 | 17852725011105 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 269 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 112896 | 0x7ff8895c3f80 | 0x7ff888c255c0 | 0 | 0 | 0 | 17852725042374 | 17852725009481 | 17852725289481 | 17852725291098 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 272 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 115136 | 0x7ff8895c3e00 | 0x7ff888c25600 | 0 | 0 | 0 | 17852725322377 | 17852725289481 | 17852725611561 | 17852725613050 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 275 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 116608 | 0x7ff8895c3c80 | 0x7ff888c25640 | 0 | 0 | 0 | 17852725651419 | 17852725611561 | 17852725888361 | 17852725889983 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 278 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 16 | 24 | 118336 | 0x7ff8895c3b00 | 0x7ff888c25680 | 0 | 0 | 0 | 17852725921822 | 17852725888361 | 17852726253482 | 17852726255414 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 281 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 16 | 24 | 120064 | 0x7ff8895c3980 | 0x7ff888c256c0 | 0 | 0 | 0 | 17852726287623 | 17852726253482 | 17852726617482 | 17852726619115 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 284 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 121792 | 0x7ff8895c3800 | 0x7ff888c25700 | 0 | 0 | 0 | 17852726650804 | 17852726617482 | 17852726896522 | 17852726898098 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 287 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 124032 | 0x7ff8895c3680 | 0x7ff888c25740 | 0 | 0 | 0 | 17852726929667 | 17852726896522 | 17852727227403 | 17852727229120 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 290 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 125504 | 0x7ff88bbf2500 | 0x7ff888c25780 | 0 | 0 | 0 | 17852727267889 | 17852727227403 | 17852727511403 | 17852727512983 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 293 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 127488 | 0x7ff88bbf2380 | 0x7ff888c257c0 | 0 | 0 | 0 | 17852727544622 | 17852727511403 | 17852727872523 | 17852727874214 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 296 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 129216 | 0x7ff88bbf2200 | 0x7ff888c25800 | 0 | 0 | 0 | 17852727907013 | 17852727872523 | 17852728235724 | 17852728237265 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 299 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 130944 | 0x7ff88bbf2080 | 0x7ff888c25840 | 0 | 0 | 0 | 17852728269444 | 17852728235724 | 17852728517484 | 17852728519148 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 302 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 133440 | 0x7ff8895c3f00 | 0x7ff888c25880 | 0 | 0 | 0 | 17852728551127 | 17852728517484 | 17852728857804 | 17852728859459 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 305 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 134912 | 0x7ff8895c3d80 | 0x7ff888c258c0 | 0 | 0 | 0 | 17852728899498 | 17852728857804 | 17852729139564 | 17852729141132 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 308 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 136896 | 0x7ff8895c3c00 | 0x7ff888c25900 | 0 | 0 | 0 | 17852729172291 | 17852729139564 | 17852729503085 | 17852729504743 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 311 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 138880 | 0x7ff8895c3a80 | 0x7ff888c25940 | 0 | 0 | 0 | 17852729536092 | 17852729503085 | 17852729865485 | 17852729867124 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 314 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 140864 | 0x7ff8895c3900 | 0x7ff888c25980 | 0 | 0 | 0 | 17852729898743 | 17852729865485 | 17852730155885 | 17852730157467 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 317 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 143616 | 0x7ff8895c3780 | 0x7ff888c259c0 | 0 | 0 | 0 | 17852730190876 | 17852730155885 | 17852730525006 | 17852730526718 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 320 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 145344 | 0x7ff8895c3600 | 0x7ff888c25a00 | 0 | 0 | 0 | 17852730566577 | 17852730525006 | 17852730809646 | 17852730811381 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 323 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 147584 | 0x7ff88bbf2480 | 0x7ff888c25a40 | 0 | 0 | 0 | 17852730843010 | 17852730809646 | 17852731174126 | 17852731175882 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 326 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 149568 | 0x7ff88bbf2300 | 0x7ff888c25a80 | 0 | 0 | 0 | 17852731208061 | 17852731174126 | 17852731537327 | 17852731538893 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 329 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 151552 | 0x7ff88bbf2180 | 0x7ff888c25ac0 | 0 | 0 | 0 | 17852731570982 | 17852731537327 | 17852731833807 | 17852731835685 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 332 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 154304 | 0x7ff88bbf2000 | 0x7ff888c25b00 | 0 | 0 | 0 | 17852731867534 | 17852731833807 | 17852732204527 | 17852732206096 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 335 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 156288 | 0x7ff8895c3e80 | 0x7ff888c25b40 | 0 | 0 | 0 | 17852732245085 | 17852732204527 | 17852732490287 | 17852732491899 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 338 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 158784 | 0x7ff8895c3d00 | 0x7ff888c25b80 | 0 | 0 | 0 | 17852732523968 | 17852732490287 | 17852732856048 | 17852732857640 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 341 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 161024 | 0x7ff8895c3b80 | 0x7ff888c25bc0 | 0 | 0 | 0 | 17852732889679 | 17852732856048 | 17852733221648 | 17852733223301 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 344 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 163264 | 0x7ff8895c3a00 | 0x7ff888c25c00 | 0 | 0 | 0 | 17852733254330 | 17852733221648 | 17852733532368 | 17852733533913 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 347 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 166528 | 0x7ff8895c3880 | 0x7ff888c25c40 | 0 | 0 | 0 | 17852733565032 | 17852733532368 | 17852733921009 | 17852733941203 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 350 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 168512 | 0x7ff8895c3700 | 0x7ff888c25c80 | 0 | 0 | 0 | 17852733962372 | 17852733921009 | 17852734230929 | 17852734232796 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 353 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 171264 | 0x7ff88bbf2580 | 0x7ff888c25cc0 | 0 | 0 | 0 | 17852734265345 | 17852734230929 | 17852734609169 | 17852734610756 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 356 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 173760 | 0x7ff88bbf2400 | 0x7ff888c25d00 | 0 | 0 | 0 | 17852734643356 | 17852734609169 | 17852734977490 | 17852734979187 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 359 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 176256 | 0x7ff88bbf2280 | 0x7ff888c25d40 | 0 | 0 | 0 | 17852735011617 | 17852734977490 | 17852735296850 | 17852735298459 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 362 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 179776 | 0x7ff88bbf2100 | 0x7ff888c25d80 | 0 | 0 | 0 | 17852735330689 | 17852735296850 | 17852735711570 | 17852735757648 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 365 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 182272 | 0x7ff8895c3f80 | 0x7ff888c25dc0 | 0 | 0 | 0 | 17852735776978 | 17852735711570 | 17852736060851 | 17852736062501 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 368 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 185536 | 0x7ff8895c3e00 | 0x7ff888c25e00 | 0 | 0 | 0 | 17852736095700 | 17852736060851 | 17852736435731 | 17852736480820 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 371 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 188544 | 0x7ff8895c3c80 | 0x7ff888c25e40 | 0 | 0 | 0 | 17852736492510 | 17852736435731 | 17852736844051 | 17852736886710 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 374 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 191552 | 0x7ff8895c3b00 | 0x7ff888c25e80 | 0 | 0 | 0 | 17852736898080 | 17852736844051 | 17852737223732 | 17852737225292 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 377 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 195840 | 0x7ff8895c3980 | 0x7ff888c25ec0 | 0 | 0 | 0 | 17852737256371 | 17852737223732 | 17852737690612 | 17852737709180 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 380 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 198592 | 0x7ff8895c3800 | 0x7ff888c25f00 | 0 | 0 | 0 | 17852737731449 | 17852737690612 | 17852738033812 | 17852738035472 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 383 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 202368 | 0x7ff8895c3680 | 0x7ff888c25f40 | 0 | 0 | 0 | 17852738067181 | 17852738033812 | 17852738456693 | 17852738501100 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 386 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 205888 | 0x7ff88bbf2500 | 0x7ff888c25f80 | 0 | 0 | 0 | 17852738514830 | 17852738456693 | 17852738880533 | 17852738924380 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 389 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 209408 | 0x7ff88bbf2380 | 0x7ff888c25fc0 | 0 | 0 | 0 | 17852738935080 | 17852738880533 | 17852739282613 | 17852739284311 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 392 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 214464 | 0x7ff88bbf2200 | 0x7ff888c26000 | 0 | 0 | 0 | 17852739317490 | 17852739282613 | 17852739807414 | 17852739849997 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 395 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 217728 | 0x7ff88bbf2080 | 0x7ff888c26040 | 0 | 0 | 0 | 17852739870166 | 17852739807414 | 17852740189654 | 17852740191278 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 398 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 222016 | 0x7ff8895c3f00 | 0x7ff888c26080 | 0 | 0 | 0 | 17852740223318 | 17852740189654 | 17852740603735 | 17852740646387 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 401 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 226048 | 0x7ff8895c3d80 | 0x7ff888c260c0 | 0 | 0 | 0 | 17852740658167 | 17852740603735 | 17852741053975 | 17852741096566 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 404 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 230080 | 0x7ff8895c3c00 | 0x7ff888c26100 | 0 | 0 | 0 | 17852741108566 | 17852741053975 | 17852741477975 | 17852741520635 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 407 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 235904 | 0x7ff8895c3a80 | 0x7ff888c26140 | 0 | 0 | 0 | 17852741533345 | 17852741477975 | 17852742078136 | 17852742100261 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 410 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 239424 | 0x7ff8895c3900 | 0x7ff888c26180 | 0 | 0 | 0 | 17852742120520 | 17852742078136 | 17852742462616 | 17852742464122 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 413 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 244224 | 0x7ff8895c3780 | 0x7ff888c261c0 | 0 | 0 | 0 | 17852742495881 | 17852742462616 | 17852742899737 | 17852742943690 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 416 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 248768 | 0x7ff8895c3600 | 0x7ff888c26200 | 0 | 0 | 0 | 17852742956040 | 17852742899737 | 17852743360697 | 17852743403809 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 419 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 253312 | 0x7ff88bbf2480 | 0x7ff888c26240 | 0 | 0 | 0 | 17852743415308 | 17852743360697 | 17852743812697 | 17852743854887 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 422 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 255552 | 0x7ff88bbf2300 | 0x7ff888c26280 | 0 | 0 | 0 | 17852743866867 | 17852743812697 | 17852744476858 | 17852744519781 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 425 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 259840 | 0x7ff88bbf2180 | 0x7ff888c262c0 | 0 | 0 | 0 | 17852744539460 | 17852744476858 | 17852744921338 | 17852744964010 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 428 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 265664 | 0x7ff88bbf2000 | 0x7ff888c26300 | 0 | 0 | 0 | 17852744975050 | 17852744921338 | 17852745435899 | 17852745478337 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 431 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 271232 | 0x7ff8895c3e80 | 0x7ff888c26340 | 0 | 0 | 0 | 17852745489317 | 17852745435899 | 17852745954459 | 17852745972385 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 434 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 273216 | 0x7ff8895c3d00 | 0x7ff888c26380 | 0 | 0 | 0 | 17852745987614 | 17852745954459 | 17852746446940 | 17852746492852 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 437 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 275712 | 0x7ff8895c3b80 | 0x7ff888c263c0 | 0 | 0 | 0 | 17852746505581 | 17852746446940 | 17852747219101 | 17852747263173 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 440 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 280768 | 0x7ff8895c3a00 | 0x7ff888c26400 | 0 | 0 | 0 | 17852747283402 | 17852747219101 | 17852747705501 | 17852747747881 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 443 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 287616 | 0x7ff8895c3880 | 0x7ff888c26440 | 0 | 0 | 0 | 17852747758960 | 17852747705501 | 17852748268861 | 17852748310857 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 446 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 294208 | 0x7ff8895c3700 | 0x7ff888c26480 | 0 | 0 | 0 | 17852748322856 | 17852748268861 | 17852748832222 | 17852748874753 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 449 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 296960 | 0x7ff88bbf2580 | 0x7ff888c264c0 | 0 | 0 | 0 | 17852748886262 | 17852748832222 | 17852749394782 | 17852749436889 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 452 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 299968 | 0x7ff88bbf2400 | 0x7ff888c26500 | 0 | 0 | 0 | 17852749448568 | 17852749394782 | 17852750264863 | 17852750309257 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 455 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 302720 | 0x7ff88bbf2280 | 0x7ff888c26540 | 0 | 0 | 0 | 17852750329727 | 17852750264863 | 17852750929824 | 17852750974811 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 458 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 305472 | 0x7ff88bbf2100 | 0x7ff888c26580 | 0 | 0 | 0 | 17852750986220 | 17852750929824 | 17852751608704 | 17852751652374 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 461 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 308224 | 0x7ff8895c3f80 | 0x7ff888c265c0 | 0 | 0 | 0 | 17852751663494 | 17852751608704 | 17852752288225 | 17852752330757 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 464 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 310976 | 0x7ff8895c3e00 | 0x7ff888c26600 | 0 | 0 | 0 | 17852752342157 | 17852752288225 | 17852752950306 | 17852752993191 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 467 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 313216 | 0x7ff8895c3c80 | 0x7ff888c26640 | 0 | 0 | 0 | 17852753004871 | 17852752950014 | 17852754036414 | 17852754087294 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 470 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 315968 | 0x7ff8895c3b00 | 0x7ff888c26680 | 0 | 0 | 0 | 17852754107573 | 17852754036414 | 17852755116734 | 17852755164557 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 473 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 318720 | 0x7ff8895c3980 | 0x7ff888c266c0 | 0 | 0 | 0 | 17852755175877 | 17852755116734 | 17852756216414 | 17852756283989 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 476 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 321472 | 0x7ff8895c3800 | 0x7ff888c26700 | 0 | 0 | 0 | 17852756296649 | 17852756216414 | 17852757347295 | 17852757413671 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 479 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 324224 | 0x7ff8895c3680 | 0x7ff888c26740 | 0 | 0 | 0 | 17852757426311 | 17852757347295 | 17852758455615 | 17852758522824 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 482 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 326464 | 0x7ff88bbf2500 | 0x7ff888c26780 | 0 | 0 | 0 | 17852758535493 | 17852758455615 | 17852760413696 | 17852760480545 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 485 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 329216 | 0x7ff88bbf2380 | 0x7ff888c267c0 | 0 | 0 | 0 | 17852760500774 | 17852760413696 | 17852762329856 | 17852762401667 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 488 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 331968 | 0x7ff88bbf2200 | 0x7ff888c26800 | 0 | 0 | 0 | 17852762414887 | 17852762329856 | 17852764331937 | 17852764406557 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 491 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 12 | 24 | 334720 | 0x7ff88bbf2080 | 0x7ff888c26840 | 0 | 0 | 0 | 17852764419157 | 17852764331937 | 17852766312417 | 17852766383018 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 494 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 337472 | 0x7ff8895c3f00 | 0x7ff888c26880 | 0 | 0 | 0 | 17852766395418 | 17852766312417 | 17852768250018 | 17852768319691 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 497 | 238351 | 238351 | 4194304 | 256 | 0 | 0 | 8 | 24 | 0 | 0x7ff8895c3d80 | 0x7ff888c268c0 | 0 | 0 | 0 | 17852768331360 | 17852768250018 | 17852771863459 | 17852771934051 |