41 KiB
41 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | SQ_INSTS_SMEM | SQ_INST_LEVEL_SMEM | SQ_ACCUM_PREV_HIRES | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBufferAligned.kd | 0 | 0 | 0 | 236281 | 236281 | 33554432 | 256 | 0 | 0 | 4 | 32 | 4160 | 0x0 | 0x7f45e2204280 | 3670016 | 2905706 | 325081704 | 17832236843728 | 17803198665120 | 17832381629870 | 17832381744199 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 236281 | 236281 | 32768 | 256 | 0 | 0 | 12 | 24 | 13888 | 0x0 | 0x7f45e2223f80 | 512 | 96664 | 10851288 | 17832386900391 | 17832381629870 | 17832387026519 | 17832387030968 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 5 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 14336 | 0x7f45e517a380 | 0x7f45e2223fc0 | 65536 | 639926 | 71704992 | 17832387064037 | 17832387026519 | 17832387402360 | 17832387404639 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 8 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15040 | 0x7f45e517a200 | 0x7f45e2224000 | 65536 | 663120 | 74284736 | 17832387441058 | 17832387402360 | 17832387801401 | 17832387803559 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 11 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15488 | 0x7f45e517a080 | 0x7f45e2224040 | 65536 | 655382 | 73467248 | 17832387836288 | 17832387801401 | 17832388189241 | 17832388191179 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 14 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 15936 | 0x7f45e2b1bf00 | 0x7f45e2224080 | 65536 | 670706 | 75144576 | 17832388224148 | 17832388189241 | 17832388486682 | 17832388488802 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 17 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 16384 | 0x7f45e2b1bd80 | 0x7f45e22240c0 | 65536 | 633498 | 70989896 | 17832388520711 | 17832388486682 | 17832388784443 | 17832388786445 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 20 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 17088 | 0x7f45e2b1bc00 | 0x7f45e2224100 | 131072 | 682790 | 76349744 | 17832388829873 | 17832388784443 | 17832389085883 | 17832389087897 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 23 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 16 | 24 | 17792 | 0x7f45e2b1ba80 | 0x7f45e2224140 | 131072 | 702258 | 78734048 | 17832389118956 | 17832389085883 | 17832389470044 | 17832389472077 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 26 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 16 | 24 | 18496 | 0x7f45e2b1b900 | 0x7f45e2224180 | 65536 | 662922 | 74237584 | 17832389504707 | 17832389470044 | 17832389854525 | 17832389856588 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 29 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19200 | 0x7f45e2b1b780 | 0x7f45e22241c0 | 131072 | 702944 | 78662512 | 17832389887017 | 17832389854525 | 17832390165245 | 17832390167590 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 32 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19904 | 0x7f45e2b1b600 | 0x7f45e2224200 | 131072 | 698592 | 78249728 | 17832390198889 | 17832390165245 | 17832390471646 | 17832390473623 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 35 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 20608 | 0x7f45e517a480 | 0x7f45e2224240 | 131072 | 699922 | 78366656 | 17832390513972 | 17832390471646 | 17832390771166 | 17832390773245 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 38 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 16 | 24 | 21312 | 0x7f45e517a300 | 0x7f45e2224280 | 131072 | 697276 | 78078456 | 17832390805924 | 17832390771166 | 17832391153567 | 17832391155576 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 41 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22016 | 0x7f45e517a180 | 0x7f45e22242c0 | 65536 | 671700 | 75200016 | 17832391187125 | 17832391153567 | 17832391540928 | 17832391542376 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 44 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22720 | 0x7f45e517a000 | 0x7f45e2224300 | 131072 | 717992 | 80318352 | 17832391573205 | 17832391540928 | 17832391812928 | 17832391814339 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 47 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 23424 | 0x7f45e2b1be80 | 0x7f45e2224340 | 131072 | 697818 | 78152560 | 17832391845858 | 17832391812928 | 17832392080929 | 17832392082433 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 50 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 24128 | 0x7f45e2b1bd00 | 0x7f45e2224380 | 131072 | 696712 | 78052840 | 17832392120672 | 17832392080929 | 17832392351169 | 17832392352656 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 53 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 16 | 24 | 24832 | 0x7f45e2b1bb80 | 0x7f45e22243c0 | 131072 | 703960 | 78811040 | 17832392384785 | 17832392351169 | 17832392707170 | 17832392708527 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 56 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 16 | 24 | 25536 | 0x7f45e2b1ba00 | 0x7f45e2224400 | 65536 | 627350 | 70207160 | 17832392739226 | 17832392707170 | 17832393068610 | 17832393070078 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 59 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26240 | 0x7f45e2b1b880 | 0x7f45e2224440 | 131072 | 705208 | 78953168 | 17832393101637 | 17832393068610 | 17832393338211 | 17832393339751 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 62 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26944 | 0x7f45e2b1b700 | 0x7f45e2224480 | 131072 | 700642 | 78443288 | 17832393370981 | 17832393338211 | 17832393606371 | 17832393607955 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 65 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 27648 | 0x7f45e517a580 | 0x7f45e22244c0 | 131072 | 680574 | 76153088 | 17832393646754 | 17832393606371 | 17832393874692 | 17832393876138 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 68 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 16 | 24 | 28608 | 0x7f45e517a400 | 0x7f45e2224500 | 131072 | 698066 | 78128680 | 17832393908427 | 17832393874692 | 17832394231173 | 17832394232719 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 71 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 16 | 24 | 29312 | 0x7f45e517a280 | 0x7f45e2224540 | 65536 | 676560 | 75754120 | 17832394265568 | 17832394231173 | 17832394590853 | 17832394592260 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 74 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 30016 | 0x7f45e517a100 | 0x7f45e2224580 | 131072 | 665052 | 74494800 | 17832394623679 | 17832394590853 | 17832394868134 | 17832394869783 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 77 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 30976 | 0x7f45e2b1bf80 | 0x7f45e22245c0 | 131072 | 662312 | 74088520 | 17832394901143 | 17832394868134 | 17832395136934 | 17832395138407 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 80 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 31680 | 0x7f45e2b1be00 | 0x7f45e2224600 | 131072 | 693582 | 77663792 | 17832395177386 | 17832395136934 | 17832395409415 | 17832395411120 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 83 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 16 | 24 | 32640 | 0x7f45e2b1bc80 | 0x7f45e2224640 | 131072 | 655674 | 73523104 | 17832395443299 | 17832395409415 | 17832395769415 | 17832395770881 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 86 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 33600 | 0x7f45e2b1bb00 | 0x7f45e2224680 | 65536 | 651942 | 73037144 | 17832395802430 | 17832395769415 | 17832396129576 | 17832396130962 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 89 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 34560 | 0x7f45e2b1b980 | 0x7f45e22246c0 | 131072 | 697390 | 78190624 | 17832396162821 | 17832396129576 | 17832396399017 | 17832396400575 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 92 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 35520 | 0x7f45e2b1b800 | 0x7f45e2224700 | 131072 | 705040 | 79031632 | 17832396431605 | 17832396399017 | 17832396669097 | 17832396670499 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 95 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 36224 | 0x7f45e2b1b680 | 0x7f45e2224740 | 131072 | 702546 | 78684376 | 17832396708378 | 17832396669097 | 17832396940938 | 17832396942292 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 98 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 37184 | 0x7f45e517a500 | 0x7f45e2224780 | 131072 | 700556 | 78359944 | 17832396974431 | 17832396940938 | 17832397298858 | 17832397300373 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 101 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 38144 | 0x7f45e517a380 | 0x7f45e22247c0 | 65536 | 670702 | 75074704 | 17832397332152 | 17832397298858 | 17832397656299 | 17832397657764 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 104 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 39104 | 0x7f45e517a200 | 0x7f45e2224800 | 131072 | 702376 | 78607656 | 17832397689003 | 17832397656299 | 17832397926859 | 17832397928307 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 107 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 40320 | 0x7f45e517a080 | 0x7f45e2224840 | 131072 | 699218 | 78252712 | 17832397960087 | 17832397926859 | 17832398207180 | 17832398208690 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 110 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 41280 | 0x7f45e2b1bf00 | 0x7f45e2224880 | 131072 | 702102 | 78566800 | 17832398247609 | 17832398207180 | 17832398479980 | 17832398481434 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 113 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 42240 | 0x7f45e2b1bd80 | 0x7f45e22248c0 | 131072 | 700322 | 78462936 | 17832398512783 | 17832398479980 | 17832398840781 | 17832398842455 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 116 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 43200 | 0x7f45e2b1bc00 | 0x7f45e2224900 | 65536 | 678432 | 76016512 | 17832398873754 | 17832398840781 | 17832399198542 | 17832399199956 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 119 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 44160 | 0x7f45e2b1ba80 | 0x7f45e2224940 | 131072 | 704048 | 78850440 | 17832399231725 | 17832399198542 | 17832399474062 | 17832399475529 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 122 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 45376 | 0x7f45e2b1b900 | 0x7f45e2224980 | 131072 | 704294 | 78762488 | 17832399506378 | 17832399474062 | 17832399744143 | 17832399745442 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 125 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 46336 | 0x7f45e2b1b780 | 0x7f45e22249c0 | 131072 | 697140 | 78101344 | 17832399784061 | 17832399744143 | 17832400014383 | 17832400015876 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 128 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 47552 | 0x7f45e2b1b600 | 0x7f45e2224a00 | 131072 | 691764 | 77411472 | 17832400047205 | 17832400014383 | 17832400368784 | 17832400370137 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 131 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 48512 | 0x7f45e517a480 | 0x7f45e2224a40 | 65536 | 658738 | 73825056 | 17832400402316 | 17832400368784 | 17832400729425 | 17832400730838 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 134 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 49472 | 0x7f45e517a300 | 0x7f45e2224a80 | 131072 | 656782 | 73609296 | 17832400762547 | 17832400729425 | 17832401000945 | 17832401002361 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 137 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 50688 | 0x7f45e517a180 | 0x7f45e2224ac0 | 131072 | 661124 | 73973224 | 17832401033580 | 17832401000945 | 17832401268146 | 17832401269644 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 140 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 51648 | 0x7f45e517a000 | 0x7f45e2224b00 | 131072 | 684772 | 76598464 | 17832401308053 | 17832401268146 | 17832401548306 | 17832401549867 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 143 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 52864 | 0x7f45e2b1be80 | 0x7f45e2224b40 | 131072 | 697314 | 78128704 | 17832401581147 | 17832401548306 | 17832401907827 | 17832401909338 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 146 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 54080 | 0x7f45e2b1bd00 | 0x7f45e2224b80 | 65536 | 669406 | 75005032 | 17832401940558 | 17832401907827 | 17832402263187 | 17832402264740 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 149 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 55296 | 0x7f45e2b1bb80 | 0x7f45e2224bc0 | 131072 | 700766 | 78516768 | 17832402296189 | 17832402263187 | 17832402535028 | 17832402536433 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 152 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 56768 | 0x7f45e2b1ba00 | 0x7f45e2224c00 | 131072 | 697972 | 78177560 | 17832402567832 | 17832402535028 | 17832402805428 | 17832402806846 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 155 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 57728 | 0x7f45e2b1b880 | 0x7f45e2224c40 | 131072 | 700048 | 78487152 | 17832402845555 | 17832402805428 | 17832403074549 | 17832403076009 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 158 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 58944 | 0x7f45e2b1b700 | 0x7f45e2224c80 | 131072 | 688204 | 77079128 | 17832403107349 | 17832403074549 | 17832403429910 | 17832403431351 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 161 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 60160 | 0x7f45e517a580 | 0x7f45e2224cc0 | 65536 | 671532 | 75179760 | 17832403463430 | 17832403429910 | 17832403788470 | 17832403789782 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 164 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 61376 | 0x7f45e517a400 | 0x7f45e2224d00 | 131072 | 698470 | 78218528 | 17832403821291 | 17832403788470 | 17832404062391 | 17832404063835 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 167 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 62848 | 0x7f45e517a280 | 0x7f45e2224d40 | 131072 | 719656 | 80602296 | 17832404096124 | 17832404062391 | 17832404342871 | 17832404344338 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 170 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 63808 | 0x7f45e517a100 | 0x7f45e2224d80 | 131072 | 699666 | 78335584 | 17832404383487 | 17832404342871 | 17832404613432 | 17832404614811 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 173 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 65024 | 0x7f45e2b1bf80 | 0x7f45e2224dc0 | 131072 | 670210 | 75060888 | 17832404646430 | 17832404613432 | 17832404968153 | 17832404969772 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 176 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 66240 | 0x7f45e2b1be00 | 0x7f45e2224e00 | 65536 | 633208 | 71027776 | 17832405001072 | 17832404968153 | 17832405322393 | 17832405323704 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 179 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 67456 | 0x7f45e2b1bc80 | 0x7f45e2224e40 | 131072 | 707008 | 79154256 | 17832405354693 | 17832405322393 | 17832405595354 | 17832405596747 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 182 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 68928 | 0x7f45e2b1bb00 | 0x7f45e2224e80 | 131072 | 697218 | 78137600 | 17832405628186 | 17832405595354 | 17832405873754 | 17832405875160 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 185 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 70144 | 0x7f45e2b1b980 | 0x7f45e2224ec0 | 131072 | 658308 | 73724672 | 17832405914139 | 17832405873754 | 17832406146555 | 17832406148433 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 188 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 71616 | 0x7f45e2b1b800 | 0x7f45e2224f00 | 131072 | 701386 | 78527344 | 17832406179992 | 17832406146555 | 17832406507515 | 17832406508934 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 191 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 72832 | 0x7f45e2b1b680 | 0x7f45e2224f40 | 65536 | 699622 | 78401520 | 17832406541013 | 17832406507515 | 17832406867356 | 17832406868765 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 194 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 74048 | 0x7f45e517a500 | 0x7f45e2224f80 | 131072 | 671958 | 75210128 | 17832406900764 | 17832406867356 | 17832407143517 | 17832407144948 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 197 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 75776 | 0x7f45e517a380 | 0x7f45e2224fc0 | 131072 | 672706 | 75381112 | 17832407176878 | 17832407143517 | 17832407432797 | 17832407434251 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 200 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 76992 | 0x7f45e517a200 | 0x7f45e2225000 | 131072 | 696606 | 78217128 | 17832407473750 | 17832407432797 | 17832407701278 | 17832407702625 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 203 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 78464 | 0x7f45e517a080 | 0x7f45e2225040 | 131072 | 708204 | 79429472 | 17832407734074 | 17832407701278 | 17832408057758 | 17832408059176 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 206 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 79936 | 0x7f45e2b1bf00 | 0x7f45e2225080 | 65536 | 661894 | 74152040 | 17832408090825 | 17832408057758 | 17832408412799 | 17832408414247 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 209 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 81408 | 0x7f45e2b1bd80 | 0x7f45e22250c0 | 131072 | 702324 | 78672392 | 17832408445166 | 17832408412799 | 17832408681919 | 17832408683310 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 212 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 83136 | 0x7f45e2b1bc00 | 0x7f45e2225100 | 131072 | 703198 | 78825248 | 17832408714829 | 17832408681919 | 17832408971200 | 17832408972643 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 215 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 84352 | 0x7f45e2b1ba80 | 0x7f45e2225140 | 131072 | 699814 | 78254928 | 17832409022692 | 17832408971200 | 17832409251040 | 17832409253036 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 218 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 85824 | 0x7f45e2b1b900 | 0x7f45e2225180 | 131072 | 698260 | 78216224 | 17832409284085 | 17832409251040 | 17832409610241 | 17832409611667 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 221 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 87296 | 0x7f45e2b1b780 | 0x7f45e22251c0 | 65536 | 663758 | 74336688 | 17832409642866 | 17832409610241 | 17832409971362 | 17832409972698 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 224 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 88768 | 0x7f45e2b1b600 | 0x7f45e2225200 | 131072 | 694200 | 77848640 | 17832410003777 | 17832409971362 | 17832410252802 | 17832410254361 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 227 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 90752 | 0x7f45e517a480 | 0x7f45e2225240 | 131072 | 704834 | 78911016 | 17832410286130 | 17832410252802 | 17832410551683 | 17832410553104 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 230 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 91968 | 0x7f45e517a300 | 0x7f45e2225280 | 131072 | 696928 | 78035104 | 17832410592273 | 17832410551683 | 17832410821763 | 17832410823297 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 233 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 93440 | 0x7f45e517a180 | 0x7f45e22252c0 | 131072 | 696032 | 78057056 | 17832410854646 | 17832410821763 | 17832411183844 | 17832411185188 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 236 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 94912 | 0x7f45e517a000 | 0x7f45e2225300 | 65536 | 660730 | 74005000 | 17832411216967 | 17832411183844 | 17832411540005 | 17832411541479 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 239 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 96384 | 0x7f45e2b1be80 | 0x7f45e2225340 | 131072 | 701146 | 78404240 | 17832411573128 | 17832411540005 | 17832411810245 | 17832411811662 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 242 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 98368 | 0x7f45e2b1bd00 | 0x7f45e2225380 | 131072 | 703188 | 78765024 | 17832411843102 | 17832411810245 | 17832412111046 | 17832412112465 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 245 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 99584 | 0x7f45e2b1bb80 | 0x7f45e22253c0 | 131072 | 709754 | 79435024 | 17832412151434 | 17832412111046 | 17832412380646 | 17832412382098 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 248 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 101312 | 0x7f45e2b1ba00 | 0x7f45e2225400 | 131072 | 658220 | 73796752 | 17832412413128 | 17832412380646 | 17832412737927 | 17832412739229 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 251 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 102784 | 0x7f45e2b1b880 | 0x7f45e2225440 | 65536 | 668226 | 74799216 | 17832412770779 | 17832412737927 | 17832413094088 | 17832413095561 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 254 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 104256 | 0x7f45e2b1b700 | 0x7f45e2225480 | 131072 | 699192 | 78287464 | 17832413126350 | 17832413094088 | 17832413365928 | 17832413367344 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 257 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 106240 | 0x7f45e517a580 | 0x7f45e22254c0 | 131072 | 664510 | 74391096 | 17832413398753 | 17832413365928 | 17832413681769 | 17832413683106 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 260 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 107712 | 0x7f45e517a400 | 0x7f45e2225500 | 131072 | 661860 | 74059304 | 17832413722855 | 17832413681769 | 17832413960329 | 17832413961969 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 263 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 16 | 24 | 109440 | 0x7f45e517a280 | 0x7f45e2225540 | 131072 | 737226 | 82573104 | 17832413994028 | 17832413960329 | 17832414330890 | 17832414332460 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 266 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 16 | 24 | 111168 | 0x7f45e517a100 | 0x7f45e2225580 | 65536 | 649046 | 72669000 | 17832414364689 | 17832414330890 | 17832414688490 | 17832414689831 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 269 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 112896 | 0x7f45e2b1bf80 | 0x7f45e22255c0 | 131072 | 700704 | 78428320 | 17832414720670 | 17832414688490 | 17832414958891 | 17832414960154 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 272 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 115136 | 0x7f45e2b1be00 | 0x7f45e2225600 | 131072 | 698686 | 78251168 | 17832414991923 | 17832414958891 | 17832415269932 | 17832415271487 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 275 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 116608 | 0x7f45e2b1bc80 | 0x7f45e2225640 | 131072 | 710916 | 79668896 | 17832415310446 | 17832415269932 | 17832415545452 | 17832415546950 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 278 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 16 | 24 | 118336 | 0x7f45e2b1bb00 | 0x7f45e2225680 | 131072 | 697310 | 78148408 | 17832415578609 | 17832415545452 | 17832415904493 | 17832415905951 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 281 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 16 | 24 | 120064 | 0x7f45e2b1b980 | 0x7f45e22256c0 | 65536 | 661938 | 74012344 | 17832415937590 | 17832415904493 | 17832416259213 | 17832416260682 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 284 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 121792 | 0x7f45e2b1b800 | 0x7f45e2225700 | 131072 | 692126 | 77552648 | 17832416291961 | 17832416259213 | 17832416530734 | 17832416532145 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 287 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 124032 | 0x7f45e2b1b680 | 0x7f45e2225740 | 131072 | 698988 | 78259728 | 17832416563474 | 17832416530734 | 17832416860815 | 17832416862117 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 290 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 125504 | 0x7f45e517a500 | 0x7f45e2225780 | 131072 | 702876 | 78705712 | 17832416901386 | 17832416860815 | 17832417138575 | 17832417139930 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 293 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 127488 | 0x7f45e517a380 | 0x7f45e22257c0 | 131072 | 693560 | 77710752 | 17832417171159 | 17832417138575 | 17832417493776 | 17832417495141 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 296 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 129216 | 0x7f45e517a200 | 0x7f45e2225800 | 65536 | 662746 | 74090080 | 17832417526740 | 17832417493776 | 17832417851056 | 17832417852392 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 299 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 130944 | 0x7f45e517a080 | 0x7f45e2225840 | 131072 | 661518 | 74056760 | 17832417884092 | 17832417851056 | 17832418128657 | 17832418130245 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 302 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 133440 | 0x7f45e2b1bf00 | 0x7f45e2225880 | 131072 | 653794 | 73269240 | 17832418161545 | 17832418128657 | 17832418465617 | 17832418467077 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 305 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 134912 | 0x7f45e2b1bd80 | 0x7f45e22258c0 | 131072 | 1023684 | 114753968 | 17832418506686 | 17832418465617 | 17832418740658 | 17832418742170 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 308 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 136896 | 0x7f45e2b1bc00 | 0x7f45e2225900 | 131072 | 671298 | 75085032 | 17832418772820 | 17832418740658 | 17832419097139 | 17832419098551 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 311 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 138880 | 0x7f45e2b1ba80 | 0x7f45e2225940 | 65536 | 666606 | 74689200 | 17832419130061 | 17832419097139 | 17832419452179 | 17832419453583 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 314 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 140864 | 0x7f45e2b1b900 | 0x7f45e2225980 | 131072 | 710322 | 79534256 | 17832419484652 | 17832419452179 | 17832419735860 | 17832419737316 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 317 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 143616 | 0x7f45e2b1b780 | 0x7f45e22259c0 | 131072 | 736312 | 82503920 | 17832419768565 | 17832419735860 | 17832420079220 | 17832420080667 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 320 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 145344 | 0x7f45e2b1b600 | 0x7f45e2225a00 | 131072 | 1046700 | 117191448 | 17832420119236 | 17832420079220 | 17832420355061 | 17832420356480 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 323 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 147584 | 0x7f45e517a480 | 0x7f45e2225a40 | 131072 | 700300 | 78439888 | 17832420388939 | 17832420355061 | 17832420711382 | 17832420712981 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 326 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 149568 | 0x7f45e517a300 | 0x7f45e2225a80 | 65536 | 656180 | 73492696 | 17832420744921 | 17832420711382 | 17832421067382 | 17832421068772 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 329 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 151552 | 0x7f45e517a180 | 0x7f45e2225ac0 | 131072 | 661490 | 74168088 | 17832421100522 | 17832421067382 | 17832421352503 | 17832421354025 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 332 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 154304 | 0x7f45e517a000 | 0x7f45e2225b00 | 131072 | 661534 | 74112968 | 17832421385605 | 17832421352503 | 17832421716504 | 17832421717916 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 335 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 156288 | 0x7f45e2b1be80 | 0x7f45e2225b40 | 131072 | 1011892 | 113488728 | 17832421756865 | 17832421716504 | 17832421995064 | 17832421996489 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 338 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 158784 | 0x7f45e2b1bd00 | 0x7f45e2225b80 | 131072 | 696698 | 78101400 | 17832422028569 | 17832421995064 | 17832422368025 | 17832422369470 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 341 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 161024 | 0x7f45e2b1bb80 | 0x7f45e2225bc0 | 65536 | 656468 | 73595576 | 17832422401369 | 17832422368025 | 17832422728505 | 17832422729841 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 344 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 163264 | 0x7f45e2b1ba00 | 0x7f45e2225c00 | 131072 | 669734 | 74996912 | 17832422761010 | 17832422728505 | 17832423026106 | 17832423027444 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 347 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 166528 | 0x7f45e2b1b880 | 0x7f45e2225c40 | 131072 | 690138 | 77349608 | 17832423059093 | 17832423026106 | 17832423408987 | 17832423453883 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 350 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 168512 | 0x7f45e2b1b700 | 0x7f45e2225c80 | 131072 | 1046912 | 117125320 | 17832423472813 | 17832423408987 | 17832423731227 | 17832423732736 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 353 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 171264 | 0x7f45e517a580 | 0x7f45e2225cc0 | 131072 | 690800 | 77384592 | 17832423764206 | 17832423731227 | 17832424090748 | 17832424092097 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 356 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 173760 | 0x7f45e517a400 | 0x7f45e2225d00 | 65536 | 668714 | 74860336 | 17832424123747 | 17832424090748 | 17832424452029 | 17832424453478 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 359 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 176256 | 0x7f45e517a280 | 0x7f45e2225d40 | 131072 | 706034 | 79144240 | 17832424484928 | 17832424452029 | 17832424772349 | 17832424773870 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 362 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 179776 | 0x7f45e517a100 | 0x7f45e2225d80 | 131072 | 714758 | 80019096 | 17832424805450 | 17832424772349 | 17832425182430 | 17832425201200 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 365 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 182272 | 0x7f45e2b1bf80 | 0x7f45e2225dc0 | 131072 | 970250 | 108608768 | 17832425223149 | 17832425182430 | 17832425496191 | 17832425497572 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 368 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 185536 | 0x7f45e2b1be00 | 0x7f45e2225e00 | 131072 | 692588 | 77495152 | 17832425529002 | 17832425496191 | 17832425861951 | 17832425879323 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 371 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 188544 | 0x7f45e2b1bc80 | 0x7f45e2225e40 | 65536 | 661972 | 74144176 | 17832425894783 | 17832425861951 | 17832426234112 | 17832426255404 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 374 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 191552 | 0x7f45e2b1bb00 | 0x7f45e2225e80 | 131072 | 651518 | 72914568 | 17832426267983 | 17832426234112 | 17832426585633 | 17832426587085 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 377 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 195840 | 0x7f45e2b1b980 | 0x7f45e2225ec0 | 131072 | 671736 | 75207744 | 17832426618065 | 17832426585633 | 17832427044993 | 17832427089873 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 380 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 198592 | 0x7f45e2b1b800 | 0x7f45e2225f00 | 131072 | 1062116 | 119035424 | 17832427108662 | 17832427044993 | 17832427406114 | 17832427407555 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 383 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 202368 | 0x7f45e2b1b680 | 0x7f45e2225f40 | 131072 | 1014410 | 113677256 | 17832427438934 | 17832427406114 | 17832427792675 | 17832427838994 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 386 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 205888 | 0x7f45e517a500 | 0x7f45e2225f80 | 65536 | 657096 | 73695568 | 17832427849524 | 17832427792675 | 17832428213636 | 17832428255404 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 389 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 209408 | 0x7f45e517a380 | 0x7f45e2225fc0 | 131072 | 717844 | 80385384 | 17832428266174 | 17832428213636 | 17832428607556 | 17832428608975 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 392 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 214464 | 0x7f45e517a200 | 0x7f45e2226000 | 131072 | 673558 | 75636112 | 17832428640334 | 17832428607556 | 17832429119877 | 17832429137962 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 395 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 217728 | 0x7f45e517a080 | 0x7f45e2226040 | 131072 | 1041120 | 116628760 | 17832429161011 | 17832429119877 | 17832429472198 | 17832429473604 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 398 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 222016 | 0x7f45e2b1bf00 | 0x7f45e2226080 | 131072 | 1067732 | 119659152 | 17832429504273 | 17832429472198 | 17832429880679 | 17832429898823 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 401 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 226048 | 0x7f45e2b1bd80 | 0x7f45e22260c0 | 65536 | 658714 | 73729664 | 17832429916513 | 17832429880679 | 17832430292359 | 17832430311503 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 404 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 230080 | 0x7f45e2b1bc00 | 0x7f45e2226100 | 131072 | 687526 | 77019512 | 17832430325033 | 17832430292359 | 17832430692680 | 17832430710793 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 407 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 235904 | 0x7f45e2b1ba80 | 0x7f45e2226140 | 131072 | 666764 | 74771272 | 17832430725573 | 17832430692680 | 17832431260201 | 17832431301548 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 410 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 239424 | 0x7f45e2b1b900 | 0x7f45e2226180 | 131072 | 1033082 | 115803352 | 17832431320018 | 17832431260201 | 17832431648682 | 17832431650150 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 413 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 244224 | 0x7f45e2b1b780 | 0x7f45e22261c0 | 131072 | 1018378 | 114085552 | 17832431681109 | 17832431648682 | 17832432079403 | 17832432098638 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 416 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 248768 | 0x7f45e2b1b600 | 0x7f45e2226200 | 65536 | 707324 | 79208912 | 17832432112608 | 17832432079403 | 17832432509483 | 17832432526118 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 419 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 253312 | 0x7f45e517a480 | 0x7f45e2226240 | 131072 | 686242 | 76860856 | 17832432542687 | 17832432509483 | 17832432925964 | 17832432942478 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 422 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 255552 | 0x7f45e517a300 | 0x7f45e2226280 | 131072 | 1038276 | 116262688 | 17832432959327 | 17832432925964 | 17832433558125 | 17832433575932 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 425 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 259840 | 0x7f45e517a180 | 0x7f45e22262c0 | 131072 | 1036858 | 116181864 | 17832433599221 | 17832433558125 | 17832433967406 | 17832433983822 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 428 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 265664 | 0x7f45e517a000 | 0x7f45e2226300 | 131072 | 987292 | 110725048 | 17832433999841 | 17832433967406 | 17832434454927 | 17832434474699 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 431 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 271232 | 0x7f45e2b1be80 | 0x7f45e2226340 | 65536 | 659696 | 73899072 | 17832434488319 | 17832434454927 | 17832434939088 | 17832434981337 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 434 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 273216 | 0x7f45e2b1bd00 | 0x7f45e2226380 | 131072 | 1018800 | 114147848 | 17832434992227 | 17832434939088 | 17832435444529 | 17832435485844 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 437 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 275712 | 0x7f45e2b1bb80 | 0x7f45e22263c0 | 131072 | 1031960 | 115523272 | 17832435497234 | 17832435444529 | 17832436205010 | 17832436246585 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 440 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 280768 | 0x7f45e2b1ba00 | 0x7f45e2226400 | 131072 | 1016270 | 113811808 | 17832436266265 | 17832436205010 | 17832436678611 | 17832436698824 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 443 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 287616 | 0x7f45e2b1b880 | 0x7f45e2226440 | 131072 | 1032280 | 115828136 | 17832436711484 | 17832436678611 | 17832437211732 | 17832437228671 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 446 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 294208 | 0x7f45e2b1b700 | 0x7f45e2226480 | 65536 | 659624 | 74051544 | 17832437245851 | 17832437211732 | 17832437740053 | 17832437759248 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 449 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 296960 | 0x7f45e517a580 | 0x7f45e22264c0 | 131072 | 1005662 | 112551368 | 17832437773327 | 17832437740053 | 17832438273334 | 17832438291035 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 452 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 299968 | 0x7f45e517a400 | 0x7f45e2226500 | 131072 | 1031584 | 115645744 | 17832438307094 | 17832438273334 | 17832439116216 | 17832439157763 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 455 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 302720 | 0x7f45e517a280 | 0x7f45e2226540 | 131072 | 1061368 | 118941344 | 17832439177303 | 17832439116216 | 17832439767897 | 17832439809577 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 458 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 305472 | 0x7f45e517a100 | 0x7f45e2226580 | 131072 | 1036776 | 116154712 | 17832439821457 | 17832439767897 | 17832440436698 | 17832440477671 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 461 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 308224 | 0x7f45e2b1bf80 | 0x7f45e22265c0 | 65536 | 660196 | 73909144 | 17832440489810 | 17832440436698 | 17832441100379 | 17832441118625 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 464 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 310976 | 0x7f45e2b1be00 | 0x7f45e2226600 | 131072 | 1039478 | 116410952 | 17832441132995 | 17832441100379 | 17832441730941 | 17832441747759 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 467 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 313216 | 0x7f45e2b1bc80 | 0x7f45e2226640 | 131072 | 1041146 | 116504080 | 17832441763269 | 17832441730941 | 17832442784223 | 17832442804243 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 470 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 315968 | 0x7f45e2b1bb00 | 0x7f45e2226680 | 131072 | 1038534 | 116343664 | 17832442825283 | 17832442784223 | 17832443822464 | 17832443864977 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 473 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 318720 | 0x7f45e2b1b980 | 0x7f45e22266c0 | 131072 | 1034026 | 115706080 | 17832443875707 | 17832443822308 | 17832444914309 | 17832444984019 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 476 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 321472 | 0x7f45e2b1b800 | 0x7f45e2226700 | 65536 | 654370 | 73416832 | 17832444995709 | 17832444914309 | 17832446040870 | 17832446107291 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 479 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 324224 | 0x7f45e2b1b680 | 0x7f45e2226740 | 131072 | 1062574 | 118922864 | 17832446119451 | 17832446040870 | 17832447150632 | 17832447217504 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 482 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 326464 | 0x7f45e517a500 | 0x7f45e2226780 | 131072 | 1035830 | 116234136 | 17832447229853 | 17832447150632 | 17832449094475 | 17832449159095 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 485 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 329216 | 0x7f45e517a380 | 0x7f45e22267c0 | 131072 | 1037504 | 116109664 | 17832449179225 | 17832449094475 | 17832451003758 | 17832451072248 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 488 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 331968 | 0x7f45e517a200 | 0x7f45e2226800 | 131072 | 1028376 | 115273240 | 17832451084307 | 17832451003758 | 17832452981041 | 17832453045679 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 491 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 12 | 24 | 334720 | 0x7f45e517a080 | 0x7f45e2226840 | 65536 | 662726 | 74252864 | 17832453056848 | 17832452981041 | 17832454942484 | 17832455013900 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 494 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 337472 | 0x7f45e2b1bf00 | 0x7f45e2226880 | 131072 | 1040770 | 116756344 | 17832455025780 | 17832454942484 | 17832456880247 | 17832456947382 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 497 | 236281 | 236281 | 4194304 | 256 | 0 | 0 | 8 | 24 | 0 | 0x7f45e2b1bd80 | 0x7f45e22268c0 | 131072 | 1035694 | 116127016 | 17832456958722 | 17832456880247 | 17832460486492 | 17832460564112 |