89 KiB
89 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | SQ_CYCLES | SQ_BUSY_CYCLES | SQ_WAVES | SQ_BUSY_CU_CYCLES | SQ_WAVE_CYCLES | GRBM_COUNT | GRBM_GUI_ACTIVE | TA_TA_BUSY_sum | TA_SH_FIFO_BUSY_sum | CPC_CPC_STAT_BUSY | CPC_CPC_STAT_IDLE | TA_SH_FIFO_CMD_BUSY_sum | TA_SH_FIFO_ADDR_BUSY_sum | CPC_CPC_TCIU_BUSY | CPC_CPC_TCIU_IDLE | TA_BUFFER_COALESCED_READ_CYCLES_sum | TA_BUFFER_COALESCED_WRITE_CYCLES_sum | TA_ADDR_STALLED_BY_TC_CYCLES_sum | TA_TOTAL_WAVEFRONTS_sum | TA_ADDR_STALLED_BY_TD_CYCLES_sum | TA_DATA_STALLED_BY_TC_CYCLES_sum | TA_FLAT_WAVEFRONTS_sum | TA_FLAT_READ_WAVEFRONTS_sum | TA_FLAT_WRITE_WAVEFRONTS_sum | TA_FLAT_ATOMIC_WAVEFRONTS_sum | TA_FLAT_COALESCEABLE_WAVEFRONTS_sum | TA_SH_FIFO_DATA_BUSY_sum | TA_SH_FIFO_DATA_SFIFO_BUSY_sum | CPC_CPC_STAT_STALL | CPC_UTCL1_STALL_ON_TRANSLATION | TA_SH_FIFO_DATA_TFIFO_BUSY_sum | TA_SQ_TA_CMD_CYCLES_sum | CPC_CPC_UTCL2IU_BUSY | CPC_CPC_UTCL2IU_IDLE | TA_SP_TA_ADDR_CYCLES_sum | TA_SP_TA_DATA_CYCLES_sum | CPC_CPC_UTCL2IU_STALL | CPC_ME1_BUSY_FOR_PACKET_DECODE | TA_SH_FIFO_ADDR_STARVED_WHILE_BUSY_CYCLES_sum | TA_SH_FIFO_CMD_STARVED_WHILE_BUSY_CYCLES_sum | CPC_ME1_DC0_SPI_BUSY | TA_SH_FIFO_DATA_STARVED_WHILE_BUSY_CYCLES_sum | TA_TA_SH_FIFO_STARVED_sum | TA_BUFFER_WAVEFRONTS_sum | TA_BUFFER_READ_WAVEFRONTS_sum | TA_BUFFER_WRITE_WAVEFRONTS_sum | TA_BUFFER_ATOMIC_WAVEFRONTS_sum | TA_BUFFER_TOTAL_CYCLES_sum | TA_BUFFER_COALESCABLE_WAVEFRONTS_sum | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBufferAligned.kd | 0 | 0 | 0 | 165605 | 165605 | 33554432 | 256 | 0 | 0 | 4 | 32 | 4160 | 0x0 | 0x7fc640604280 | 3058048 | 2972073 | 524288 | 38600369 | 239545380 | 382255 | 382255 | 30887826.0 | 30512995.0 | 382255 | 0 | 28043621.0 | 27295529.0 | 302 | 379838 | 0.0 | 0.0 | 19868578.0 | 524288.0 | 0.0 | 20898975.0 | 524288.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 29753468.0 | 28054132.0 | 8360 | 900 | 29389392.0 | 2097152.0 | 1234 | 382437 | 2097152.0 | 4194304.0 | 0 | 8953 | 64.0 | 0.0 | 370417 | 0.0 | 1287339.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597143607745 | 16609156749657 | 16609156989655 | 16597287175645 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 165605 | 165605 | 32768 | 256 | 0 | 0 | 12 | 24 | 13888 | 0x0 | 0x7fc640623f80 | 265200 | 158951 | 512 | 1384849 | 1677609 | 33149 | 33149 | 36320.0 | 33364.0 | 33149 | 0 | 31904.0 | 30544.0 | 302 | 33529 | 0.0 | 0.0 | 24422.0 | 4096.0 | 0.0 | 0.0 | 4096.0 | 4096.0 | 0.0 | 0.0 | 4096.0 | 0.0 | 0.0 | 11312 | 1026 | 0.0 | 16384.0 | 1353 | 32187 | 16384.0 | 0.0 | 0 | 29773 | 2.0 | 0.0 | 497 | 0.0 | 1393.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4096.0 | 16597292632314 | 16609161812981 | 16609161826261 | 16597292827068 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 5 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 14336 | 0x7fc65ed2e380 | 0x7fc640623fc0 | 1386256 | 1259569 | 65536 | 16297251 | 81916630 | 173281 | 173281 | 11933110.0 | 11603768.0 | 173281 | 0 | 11686577.0 | 11647908.0 | 351 | 178495 | 0.0 | 0.0 | 11705475.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 19095 | 4135 | 0.0 | 2097152.0 | 4644 | 168667 | 2097152.0 | 0.0 | 0 | 20836 | 209.0 | 0.0 | 154668 | 0.0 | 21793.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597293117657 | 16609161893620 | 16609161985780 | 16597293401058 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 8 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15040 | 0x7fc65ed2e200 | 0x7fc640624000 | 2475872 | 2367244 | 65536 | 30670635 | 211853711 | 309483 | 309483 | 29293695.0 | 29283994.0 | 309483 | 0 | 29522077.0 | 29521245.0 | 302 | 311336 | 0.0 | 0.0 | 20625723.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 20489 | 4523 | 0.0 | 2097152.0 | 2391 | 307548 | 2097152.0 | 0.0 | 0 | 24115 | 0.0 | 0.0 | 283819 | 0.0 | 832.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597293695827 | 16609162020339 | 16609162199698 | 16597294157281 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 11 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15488 | 0x7fc65ed2e080 | 0x7fc640624040 | 2507536 | 2380730 | 65536 | 30823760 | 218693481 | 313441 | 313441 | 28763828.0 | 28740887.0 | 313441 | 0 | 29100191.0 | 29098879.0 | 351 | 315446 | 0.0 | 0.0 | 20718484.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 20550 | 4128 | 0.0 | 2097152.0 | 3651 | 308903 | 2097152.0 | 0.0 | 0 | 24263 | 0.0 | 0.0 | 284261 | 0.0 | 1811.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597294447441 | 16609162226098 | 16609162404977 | 16597294817508 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 14 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 15936 | 0x7fc65c5cdf00 | 0x7fc640624080 | 1317752 | 1212559 | 65536 | 15696331 | 80172259 | 164718 | 164718 | 13338084.0 | 13084733.0 | 164718 | 0 | 11765799.0 | 11732992.0 | 351 | 173791 | 0.0 | 0.0 | 10693836.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 32454 | 3760 | 0.0 | 2097152.0 | 4701 | 169937 | 2097152.0 | 0.0 | 0 | 16362 | 154.0 | 0.0 | 154386 | 0.0 | 33648.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597295108208 | 16609162430097 | 16609162523696 | 16597295476065 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 17 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 16384 | 0x7fc65c5cdd80 | 0x7fc6406240c0 | 1321472 | 1214095 | 65536 | 15701105 | 75868803 | 165183 | 165183 | 13356255.0 | 13041175.0 | 165183 | 0 | 12011363.0 | 11963181.0 | 302 | 164647 | 0.0 | 0.0 | 11263325.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9830 | 2681 | 0.0 | 2097152.0 | 2378 | 161997 | 2097152.0 | 0.0 | 0 | 14857 | 85.0 | 0.0 | 149628 | 0.0 | 34219.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597295762495 | 16609162549616 | 16609162641775 | 16597296111223 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 20 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 17088 | 0x7fc65c5cdc00 | 0x7fc640624100 | 1311936 | 1209551 | 65536 | 15648694 | 96793490 | 163991 | 163991 | 13949268.0 | 13815493.0 | 163991 | 0 | 13649449.0 | 13632324.0 | 302 | 162753 | 0.0 | 0.0 | 11811563.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10075 | 2421 | 0.0 | 2097152.0 | 2932 | 160973 | 2097152.0 | 0.0 | 0 | 16816 | 42.0 | 0.0 | 145941 | 0.0 | 16875.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597296410172 | 16609162681135 | 16609162773774 | 16597296768630 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 23 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 16 | 24 | 17792 | 0x7fc65c5cda80 | 0x7fc640624140 | 2445104 | 2341766 | 65536 | 30355838 | 210341679 | 305637 | 305637 | 28983875.0 | 28973454.0 | 305637 | 0 | 29242984.0 | 29242152.0 | 302 | 308689 | 0.0 | 0.0 | 20795126.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11350 | 2431 | 0.0 | 2097152.0 | 2897 | 307868 | 2097152.0 | 0.0 | 0 | 24788 | 0.0 | 0.0 | 283394 | 0.0 | 832.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597297055260 | 16609162806414 | 16609162987053 | 16597297479165 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 26 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 16 | 24 | 18496 | 0x7fc65c5cd900 | 0x7fc640624180 | 2477960 | 2371269 | 65536 | 30702417 | 221341792 | 309744 | 309744 | 29301470.0 | 29285145.0 | 309744 | 0 | 29477529.0 | 29473636.0 | 302 | 310915 | 0.0 | 0.0 | 20898909.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10900 | 2414 | 0.0 | 2097152.0 | 2402 | 308062 | 2097152.0 | 0.0 | 0 | 24495 | 0.0 | 0.0 | 284776 | 0.0 | 3920.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597297766205 | 16609163029612 | 16609163211051 | 16597298195130 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 29 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19200 | 0x7fc65c5cd780 | 0x7fc6406241c0 | 1420768 | 1293410 | 65536 | 16649607 | 94851913 | 177595 | 177595 | 12706026.0 | 12573239.0 | 177595 | 0 | 13540882.0 | 13495559.0 | 302 | 165843 | 0.0 | 0.0 | 11504098.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10072 | 2878 | 0.0 | 2097152.0 | 2389 | 162091 | 2097152.0 | 0.0 | 0 | 15666 | 100.0 | 0.0 | 147614 | 0.0 | 31936.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597298482830 | 16609163243691 | 16609163338410 | 16597298756230 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 32 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19904 | 0x7fc65c5cd600 | 0x7fc640624200 | 1319048 | 1213590 | 65536 | 15695442 | 85801084 | 164880 | 164880 | 13475743.0 | 13292208.0 | 164880 | 0 | 13478906.0 | 13443560.0 | 302 | 164475 | 0.0 | 0.0 | 11461537.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9820 | 2421 | 0.0 | 2097152.0 | 2375 | 161850 | 2097152.0 | 0.0 | 0 | 15394 | 93.0 | 0.0 | 148198 | 0.0 | 19908.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597299045900 | 16609163375530 | 16609163468489 | 16597299417157 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 35 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 20608 | 0x7fc65ed2e480 | 0x7fc640624240 | 1314224 | 1210613 | 65536 | 15667698 | 89315085 | 164277 | 164277 | 13600217.0 | 13481242.0 | 164277 | 0 | 12908347.0 | 12864609.0 | 302 | 164401 | 0.0 | 0.0 | 11726007.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9969 | 2433 | 0.0 | 2097152.0 | 2403 | 160778 | 2097152.0 | 0.0 | 0 | 15628 | 29.0 | 0.0 | 145846 | 0.0 | 17772.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597299713417 | 16609163500649 | 16609163594569 | 16597300058115 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 38 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 16 | 24 | 21312 | 0x7fc65ed2e300 | 0x7fc640624280 | 2482864 | 2377058 | 65536 | 30770936 | 229184268 | 310357 | 310357 | 29373757.0 | 29363974.0 | 310357 | 0 | 29362913.0 | 29362081.0 | 302 | 309543 | 0.0 | 0.0 | 20858128.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11183 | 2424 | 0.0 | 2097152.0 | 2389 | 306983 | 2097152.0 | 0.0 | 0 | 24712 | 0.0 | 0.0 | 283753 | 0.0 | 832.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597300347115 | 16609163627528 | 16609163807847 | 16597300787519 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 41 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22016 | 0x7fc65ed2e180 | 0x7fc6406242c0 | 2479664 | 2374228 | 65536 | 30750387 | 223897929 | 309957 | 309957 | 29329998.0 | 29312036.0 | 309957 | 0 | 29239406.0 | 29235456.0 | 302 | 308935 | 0.0 | 0.0 | 20681783.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10992 | 2412 | 0.0 | 2097152.0 | 2391 | 302641 | 2097152.0 | 0.0 | 0 | 22559 | 0.0 | 0.0 | 284459 | 0.0 | 3880.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597301075349 | 16609163837607 | 16609164018086 | 16597301559012 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 44 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22720 | 0x7fc65ed2e000 | 0x7fc640624300 | 1312296 | 1210262 | 65536 | 15663154 | 97599821 | 164036 | 164036 | 13946790.0 | 13860822.0 | 164036 | 0 | 14297613.0 | 14293125.0 | 302 | 163885 | 0.0 | 0.0 | 11787366.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9894 | 2430 | 0.0 | 2097152.0 | 2381 | 162328 | 2097152.0 | 0.0 | 0 | 15659 | 35.0 | 0.0 | 146818 | 0.0 | 43078.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597301847642 | 16609164047205 | 16609164140325 | 16597302245298 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 47 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 23424 | 0x7fc65c5cde80 | 0x7fc640624340 | 1317880 | 1213138 | 65536 | 15698012 | 90553541 | 164734 | 164734 | 13607368.0 | 13427857.0 | 164734 | 0 | 13715123.0 | 13689688.0 | 302 | 163333 | 0.0 | 0.0 | 11252547.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9869 | 2413 | 0.0 | 2097152.0 | 2385 | 161508 | 2097152.0 | 0.0 | 0 | 14930 | 66.0 | 0.0 | 147341 | 0.0 | 31785.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597302533468 | 16609164179044 | 16609164272644 | 16597302884726 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 50 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 24128 | 0x7fc65c5cdd00 | 0x7fc640624380 | 1302336 | 1200566 | 65536 | 15540841 | 98334312 | 162791 | 162791 | 14089967.0 | 14017140.0 | 162791 | 0 | 14244136.0 | 14241699.0 | 302 | 163773 | 0.0 | 0.0 | 12057141.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10089 | 2424 | 0.0 | 2097152.0 | 2393 | 161754 | 2097152.0 | 0.0 | 0 | 15202 | 103.0 | 0.0 | 145904 | 0.0 | 9980.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597303179726 | 16609164305924 | 16609164398723 | 16597303560222 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 53 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 16 | 24 | 24832 | 0x7fc65c5cdb80 | 0x7fc6406243c0 | 2478624 | 2373308 | 65536 | 30765648 | 229966206 | 309827 | 309827 | 29393396.0 | 29382958.0 | 309827 | 0 | 29417345.0 | 29416513.0 | 302 | 309437 | 0.0 | 0.0 | 20871691.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11098 | 2412 | 0.0 | 2097152.0 | 2395 | 305846 | 2097152.0 | 0.0 | 0 | 24237 | 0.0 | 0.0 | 283660 | 0.0 | 832.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597303846152 | 16609164430723 | 16609164611361 | 16597304293077 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 56 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 16 | 24 | 25536 | 0x7fc65c5cda00 | 0x7fc640624400 | 2442208 | 2334792 | 65536 | 30251658 | 215995144 | 305275 | 305275 | 28954275.0 | 28938212.0 | 305275 | 0 | 28761279.0 | 28757431.0 | 302 | 302467 | 0.0 | 0.0 | 20575531.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10981 | 2966 | 0.0 | 2097152.0 | 2397 | 307140 | 2097152.0 | 0.0 | 0 | 23208 | 0.0 | 0.0 | 282252 | 0.0 | 3928.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597304583057 | 16609164639041 | 16609164820960 | 16597305036241 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 59 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26240 | 0x7fc65c5cd880 | 0x7fc640624440 | 1307656 | 1206082 | 65536 | 15609844 | 100324849 | 163456 | 163456 | 13858396.0 | 13758103.0 | 163456 | 0 | 13977269.0 | 13966563.0 | 302 | 162583 | 0.0 | 0.0 | 11613432.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10019 | 2418 | 0.0 | 2097152.0 | 2946 | 161003 | 2097152.0 | 0.0 | 0 | 14773 | 78.0 | 0.0 | 146894 | 0.0 | 32283.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597305322251 | 16609164853120 | 16609164946879 | 16597305677238 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 62 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26944 | 0x7fc65c5cd700 | 0x7fc640624480 | 1310888 | 1206271 | 65536 | 15602370 | 106098645 | 163860 | 163860 | 14186740.0 | 14101504.0 | 163860 | 0 | 13566813.0 | 13483478.0 | 302 | 162447 | 0.0 | 0.0 | 11148887.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9732 | 2414 | 0.0 | 2097152.0 | 2405 | 160470 | 2097152.0 | 0.0 | 0 | 15729 | 41.0 | 0.0 | 145964 | 0.0 | 55715.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597305963748 | 16609164977439 | 16609165070558 | 16597306326815 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 65 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 27648 | 0x7fc65ed2e580 | 0x7fc6406244c0 | 1296624 | 1193317 | 65536 | 15450365 | 92417140 | 162077 | 162077 | 13612674.0 | 13521341.0 | 162077 | 0 | 13881496.0 | 13868223.0 | 302 | 162584 | 0.0 | 0.0 | 11689616.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9986 | 2413 | 0.0 | 2097152.0 | 3704 | 160234 | 2097152.0 | 0.0 | 0 | 16014 | 34.0 | 0.0 | 144843 | 0.0 | 17546.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597306620995 | 16609165104798 | 16609165196957 | 16597306973993 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 68 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 16 | 24 | 28608 | 0x7fc65ed2e400 | 0x7fc640624500 | 2476936 | 2372381 | 65536 | 30733214 | 226817436 | 309616 | 309616 | 29307411.0 | 29293103.0 | 309616 | 0 | 29365755.0 | 29364923.0 | 302 | 309069 | 0.0 | 0.0 | 20690754.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11250 | 2428 | 0.0 | 2097152.0 | 2396 | 304974 | 2097152.0 | 0.0 | 0 | 24066 | 0.0 | 0.0 | 281417 | 0.0 | 837.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597307259833 | 16609165236477 | 16609165415996 | 16597307704247 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 71 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 16 | 24 | 29312 | 0x7fc65ed2e280 | 0x7fc640624540 | 2450376 | 2338216 | 65536 | 30261691 | 212211360 | 306296 | 306296 | 28876520.0 | 28857877.0 | 306296 | 0 | 29195254.0 | 29191407.0 | 302 | 307481 | 0.0 | 0.0 | 20863261.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10750 | 2419 | 0.0 | 2097152.0 | 2394 | 301665 | 2097152.0 | 0.0 | 0 | 22981 | 4.0 | 0.0 | 284507 | 0.0 | 3808.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597307990498 | 16609165443676 | 16609165625914 | 16597308432232 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 74 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 30016 | 0x7fc65ed2e100 | 0x7fc640624580 | 1296040 | 1196206 | 65536 | 15491331 | 94703495 | 162004 | 162004 | 13591948.0 | 13431801.0 | 162004 | 0 | 13456898.0 | 13410017.0 | 302 | 162002 | 0.0 | 0.0 | 11737276.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9824 | 2409 | 0.0 | 2097152.0 | 2393 | 159752 | 2097152.0 | 0.0 | 0 | 14867 | 99.0 | 0.0 | 146078 | 0.0 | 36764.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597308718692 | 16609165672474 | 16609165764634 | 16597309083549 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 77 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 30976 | 0x7fc65c5cdf80 | 0x7fc6406245c0 | 1301056 | 1198689 | 65536 | 15486468 | 87990159 | 162631 | 162631 | 13218865.0 | 12739764.0 | 162631 | 0 | 12685881.0 | 12353835.0 | 302 | 162440 | 0.0 | 0.0 | 10808480.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9904 | 2425 | 0.0 | 2097152.0 | 2394 | 158852 | 2097152.0 | 0.0 | 0 | 14916 | 244.0 | 0.0 | 145658 | 0.0 | 263743.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597309370989 | 16609165790713 | 16609165882553 | 16597309739067 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 80 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 31680 | 0x7fc65c5cde00 | 0x7fc640624600 | 1296432 | 1192691 | 65536 | 15439144 | 102843527 | 162053 | 162053 | 14056948.0 | 14024999.0 | 162053 | 0 | 13971859.0 | 13960516.0 | 302 | 162223 | 0.0 | 0.0 | 12040345.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10096 | 2920 | 0.0 | 2097152.0 | 2368 | 160771 | 2097152.0 | 0.0 | 0 | 15527 | 55.0 | 0.0 | 146006 | 0.0 | 16274.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597310033986 | 16609165917112 | 16609166010872 | 16597310354885 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 83 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 16 | 24 | 32640 | 0x7fc65c5cdc80 | 0x7fc640624640 | 2444248 | 2336412 | 65536 | 30263562 | 220102564 | 305530 | 305530 | 29003954.0 | 28997010.0 | 305530 | 0 | 28956762.0 | 28955930.0 | 302 | 303790 | 0.0 | 0.0 | 20541604.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11245 | 2851 | 0.0 | 2097152.0 | 2399 | 304146 | 2097152.0 | 0.0 | 0 | 24593 | 0.0 | 0.0 | 280240 | 0.0 | 832.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597310642685 | 16609166050232 | 16609166230870 | 16597311043861 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 86 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 33600 | 0x7fc65c5cdb00 | 0x7fc640624680 | 2463776 | 2352158 | 65536 | 30460605 | 215900066 | 307971 | 307971 | 29063371.0 | 29047840.0 | 307971 | 0 | 29135431.0 | 29132394.0 | 302 | 307243 | 0.0 | 0.0 | 20600349.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10800 | 2730 | 0.0 | 2097152.0 | 2394 | 304414 | 2097152.0 | 0.0 | 0 | 23327 | 3.0 | 0.0 | 282526 | 0.0 | 3285.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597311330881 | 16609166257590 | 16609166439669 | 16597311732577 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 89 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 34560 | 0x7fc65c5cd980 | 0x7fc6406246c0 | 1309288 | 1204255 | 65536 | 15584578 | 95725251 | 163660 | 163660 | 13685606.0 | 13517821.0 | 163660 | 0 | 13237496.0 | 13154917.0 | 302 | 163363 | 0.0 | 0.0 | 11354624.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10015 | 2890 | 0.0 | 2097152.0 | 2377 | 159790 | 2097152.0 | 0.0 | 0 | 15422 | 108.0 | 0.0 | 145537 | 0.0 | 71460.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597312020807 | 16609166464309 | 16609166557908 | 16597312387194 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 92 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 35520 | 0x7fc65c5cd800 | 0x7fc640624700 | 1308328 | 1204630 | 65536 | 15556955 | 88933544 | 163540 | 163540 | 13026239.0 | 12415034.0 | 163540 | 0 | 12141777.0 | 11738748.0 | 302 | 162465 | 0.0 | 0.0 | 10742080.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9800 | 2934 | 0.0 | 2097152.0 | 2428 | 160560 | 2097152.0 | 0.0 | 0 | 15352 | 271.0 | 0.0 | 145421 | 0.0 | 296301.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597312676034 | 16609166583668 | 16609166677907 | 16597313037821 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 95 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 36224 | 0x7fc65c5cd680 | 0x7fc640624740 | 1306656 | 1202286 | 65536 | 15541745 | 85512878 | 163331 | 163331 | 13199368.0 | 12926770.0 | 163331 | 0 | 13611802.0 | 13554217.0 | 302 | 162195 | 0.0 | 0.0 | 11370622.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9807 | 2419 | 0.0 | 2097152.0 | 2372 | 160241 | 2097152.0 | 0.0 | 0 | 14539 | 183.0 | 0.0 | 147262 | 0.0 | 70462.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597313332801 | 16609166713907 | 16609166807826 | 16597313650620 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 98 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 37184 | 0x7fc65ed2e500 | 0x7fc640624780 | 2440816 | 2334823 | 65536 | 30257248 | 223805730 | 305101 | 305101 | 28995372.0 | 28983606.0 | 305101 | 0 | 29149631.0 | 29148799.0 | 302 | 306576 | 0.0 | 0.0 | 20752866.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11159 | 2420 | 0.0 | 2097152.0 | 3023 | 303107 | 2097152.0 | 0.0 | 0 | 23745 | 0.0 | 0.0 | 279965 | 0.0 | 832.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597313938560 | 16609166831986 | 16609167012305 | 16597314404673 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 101 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 38144 | 0x7fc65ed2e380 | 0x7fc6406247c0 | 2440848 | 2337304 | 65536 | 30287840 | 209746695 | 305105 | 305105 | 28909949.0 | 28894960.0 | 305105 | 0 | 29122912.0 | 29119912.0 | 302 | 306733 | 0.0 | 0.0 | 20674334.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10847 | 2923 | 0.0 | 2097152.0 | 2392 | 301421 | 2097152.0 | 0.0 | 0 | 23738 | 0.0 | 0.0 | 282076 | 0.0 | 3021.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597314692723 | 16609167038225 | 16609167219503 | 16597315103629 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 104 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 39104 | 0x7fc65ed2e200 | 0x7fc640624800 | 1304592 | 1197364 | 65536 | 15476358 | 97759636 | 163073 | 163073 | 13972837.0 | 13788918.0 | 163073 | 0 | 13763855.0 | 13686325.0 | 302 | 161544 | 0.0 | 0.0 | 10377501.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9981 | 2438 | 0.0 | 2097152.0 | 2387 | 161563 | 2097152.0 | 0.0 | 0 | 15499 | 133.0 | 0.0 | 146222 | 0.0 | 195653.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597315392119 | 16609167245903 | 16609167339503 | 16597315758526 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 107 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 40320 | 0x7fc65ed2e080 | 0x7fc640624840 | 1307744 | 1200181 | 65536 | 15483645 | 91147787 | 163467 | 163467 | 13461308.0 | 12650060.0 | 163467 | 0 | 12158436.0 | 11530361.0 | 302 | 161787 | 0.0 | 0.0 | 10119713.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10038 | 2420 | 0.0 | 2097152.0 | 2896 | 160851 | 2097152.0 | 0.0 | 0 | 15356 | 479.0 | 0.0 | 145812 | 0.0 | 775163.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597316046136 | 16609167365422 | 16609167459022 | 16597316410093 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 110 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 41280 | 0x7fc65c5cdf00 | 0x7fc640624880 | 1304800 | 1201210 | 65536 | 15529876 | 94934987 | 163099 | 163099 | 13583899.0 | 13354618.0 | 163099 | 0 | 13150835.0 | 13050956.0 | 302 | 162363 | 0.0 | 0.0 | 11699389.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9765 | 2415 | 0.0 | 2097152.0 | 2392 | 160213 | 2097152.0 | 0.0 | 0 | 14337 | 147.0 | 0.0 | 145691 | 0.0 | 63354.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597316706463 | 16609167491342 | 16609167584781 | 16597317016002 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 113 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 42240 | 0x7fc65c5cdd80 | 0x7fc6406248c0 | 2418400 | 2306178 | 65536 | 29880316 | 197238172 | 302299 | 302299 | 28522459.0 | 28510176.0 | 302299 | 0 | 29009428.0 | 29008596.0 | 302 | 306055 | 0.0 | 0.0 | 20730422.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11212 | 2423 | 0.0 | 2097152.0 | 2398 | 301423 | 2097152.0 | 0.0 | 0 | 24021 | 0.0 | 0.0 | 280739 | 0.0 | 832.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597317303852 | 16609167609741 | 16609167788940 | 16597317708168 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 116 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 43200 | 0x7fc65c5cdc00 | 0x7fc640624900 | 2429968 | 2318452 | 65536 | 30040235 | 211163426 | 303745 | 303745 | 28794367.0 | 28784344.0 | 303745 | 0 | 28943719.0 | 28940311.0 | 302 | 304133 | 0.0 | 0.0 | 20621592.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10878 | 2920 | 0.0 | 2097152.0 | 2930 | 301934 | 2097152.0 | 0.0 | 0 | 23149 | 3.0 | 0.0 | 279170 | 0.0 | 3309.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597317995978 | 16609167814059 | 16609167994378 | 16597318474281 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 119 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 44160 | 0x7fc65c5cda80 | 0x7fc640624940 | 1297592 | 1195121 | 65536 | 15455615 | 88479540 | 162198 | 162198 | 13352987.0 | 12937696.0 | 162198 | 0 | 12774321.0 | 12580264.0 | 302 | 162120 | 0.0 | 0.0 | 11100222.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9877 | 2414 | 0.0 | 2097152.0 | 2404 | 160175 | 2097152.0 | 0.0 | 0 | 14916 | 129.0 | 0.0 | 146778 | 0.0 | 134029.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597318761411 | 16609168019978 | 16609168114217 | 16597319077260 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 122 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 45376 | 0x7fc65c5cd900 | 0x7fc640624980 | 1320856 | 1213900 | 65536 | 15629666 | 95759299 | 165106 | 165106 | 13027515.0 | 11984488.0 | 165106 | 0 | 11205468.0 | 10449467.0 | 302 | 162261 | 0.0 | 0.0 | 10065688.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9980 | 2420 | 0.0 | 2097152.0 | 2793 | 159383 | 2097152.0 | 0.0 | 0 | 16074 | 507.0 | 0.0 | 146217 | 0.0 | 736853.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597319365890 | 16609168137737 | 16609168232616 | 16597319732377 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 125 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 46336 | 0x7fc65c5cd780 | 0x7fc6406249c0 | 1298504 | 1195678 | 65536 | 15454876 | 94188009 | 162312 | 162312 | 13319353.0 | 12990016.0 | 162312 | 0 | 12907028.0 | 12668709.0 | 302 | 161923 | 0.0 | 0.0 | 11437749.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9934 | 2946 | 0.0 | 2097152.0 | 2385 | 159829 | 2097152.0 | 0.0 | 0 | 14773 | 157.0 | 0.0 | 146262 | 0.0 | 242719.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597320025817 | 16609168264456 | 16609168357736 | 16597320382384 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 128 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 47552 | 0x7fc65c5cd600 | 0x7fc640624a00 | 2434448 | 2328417 | 65536 | 30154008 | 222790382 | 304305 | 304305 | 28776750.0 | 28772252.0 | 304305 | 0 | 28956782.0 | 28955950.0 | 302 | 305072 | 0.0 | 0.0 | 20525813.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11198 | 2414 | 0.0 | 2097152.0 | 2396 | 302959 | 2097152.0 | 0.0 | 0 | 24949 | 0.0 | 0.0 | 280432 | 0.0 | 832.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597320669844 | 16609168383655 | 16609168563014 | 16597321117739 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 131 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 48512 | 0x7fc65ed2e480 | 0x7fc640624a40 | 2440880 | 2332944 | 65536 | 30215215 | 212738507 | 305109 | 305109 | 28876485.0 | 28862563.0 | 305109 | 0 | 28634867.0 | 28631063.0 | 302 | 301822 | 0.0 | 0.0 | 20072988.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10917 | 2410 | 0.0 | 2097152.0 | 2673 | 299088 | 2097152.0 | 0.0 | 0 | 22842 | 0.0 | 0.0 | 280972 | 0.0 | 3761.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597321405279 | 16609168589414 | 16609168768933 | 16597321863443 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 134 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 49472 | 0x7fc65ed2e300 | 0x7fc640624a80 | 1290040 | 1183323 | 65536 | 15308378 | 88530434 | 161254 | 161254 | 13138959.0 | 12660886.0 | 161254 | 0 | 12591771.0 | 12249136.0 | 302 | 162106 | 0.0 | 0.0 | 10823619.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9903 | 2689 | 0.0 | 2097152.0 | 2390 | 159557 | 2097152.0 | 0.0 | 0 | 14780 | 215.0 | 0.0 | 145244 | 0.0 | 299934.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597322152692 | 16609168794533 | 16609168887332 | 16597322522860 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 137 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 50688 | 0x7fc65ed2e180 | 0x7fc640624ac0 | 1291656 | 1184751 | 65536 | 15275527 | 94504000 | 161456 | 161456 | 11825058.0 | 9742853.0 | 161456 | 0 | 9868681.0 | 8982045.0 | 302 | 161116 | 0.0 | 0.0 | 7825147.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9808 | 2410 | 0.0 | 2097152.0 | 2395 | 159740 | 2097152.0 | 0.0 | 0 | 17284 | 525.0 | 0.0 | 143597 | 0.0 | 866976.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597322810369 | 16609168914372 | 16609169007171 | 16597323178807 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 140 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 51648 | 0x7fc65ed2e000 | 0x7fc640624b00 | 1285824 | 1182084 | 65536 | 15284229 | 83495285 | 160727 | 160727 | 12987207.0 | 12460040.0 | 160727 | 0 | 12665763.0 | 12407813.0 | 302 | 159904 | 0.0 | 0.0 | 10798213.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9807 | 2416 | 0.0 | 2097152.0 | 2849 | 158777 | 2097152.0 | 0.0 | 0 | 14487 | 275.0 | 0.0 | 144671 | 0.0 | 369604.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597323473126 | 16609169039811 | 16609169131810 | 16597323827254 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 143 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 52864 | 0x7fc65c5cde80 | 0x7fc640624b40 | 2409872 | 2302198 | 65536 | 29822529 | 208021704 | 301233 | 301233 | 28460370.0 | 28450581.0 | 301233 | 0 | 28457721.0 | 28456889.0 | 302 | 299849 | 0.0 | 0.0 | 20509949.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10771 | 2938 | 0.0 | 2097152.0 | 2928 | 300430 | 2097152.0 | 0.0 | 0 | 25116 | 0.0 | 0.0 | 278437 | 0.0 | 832.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597324115014 | 16609169158530 | 16609169338849 | 16597324558798 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 146 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 54080 | 0x7fc65c5cdd00 | 0x7fc640624b80 | 2425224 | 2323824 | 65536 | 30105329 | 216250386 | 303152 | 303152 | 28745018.0 | 28730311.0 | 303152 | 0 | 28513787.0 | 28510458.0 | 302 | 301001 | 0.0 | 0.0 | 20264203.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10989 | 2420 | 0.0 | 2097152.0 | 2394 | 298363 | 2097152.0 | 0.0 | 0 | 22566 | 0.0 | 0.0 | 278029 | 0.0 | 3154.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597324846938 | 16609169364609 | 16609169544767 | 16597325296022 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 149 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 55296 | 0x7fc65c5cdb80 | 0x7fc640624bc0 | 1306912 | 1200091 | 65536 | 15478879 | 82622810 | 163363 | 163363 | 12676186.0 | 11857336.0 | 163363 | 0 | 12654120.0 | 12317014.0 | 302 | 162511 | 0.0 | 0.0 | 10226848.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9882 | 2563 | 0.0 | 2097152.0 | 2715 | 161207 | 2097152.0 | 0.0 | 0 | 14801 | 308.0 | 0.0 | 146068 | 0.0 | 405241.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597325582532 | 16609169569247 | 16609169663647 | 16597325943070 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 152 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 56768 | 0x7fc65c5cda00 | 0x7fc640624c00 | 1296568 | 1194790 | 65536 | 15410132 | 101968300 | 162070 | 162070 | 10536353.0 | 8224493.0 | 162070 | 0 | 7934073.0 | 6730989.0 | 302 | 161668 | 0.0 | 0.0 | 5260325.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10103 | 2885 | 0.0 | 2097152.0 | 2372 | 160624 | 2097152.0 | 0.0 | 0 | 16106 | 444.0 | 0.0 | 144864 | 0.0 | 1170778.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597326230730 | 16609169688766 | 16609169784446 | 16597326599477 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 155 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 57728 | 0x7fc65c5cd880 | 0x7fc640624c40 | 1298640 | 1195809 | 65536 | 15458545 | 97325155 | 162329 | 162329 | 13628875.0 | 13223693.0 | 162329 | 0 | 13236639.0 | 13061460.0 | 302 | 160881 | 0.0 | 0.0 | 10461930.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10013 | 2638 | 0.0 | 2097152.0 | 2407 | 159612 | 2097152.0 | 0.0 | 0 | 14128 | 204.0 | 0.0 | 145203 | 0.0 | 107616.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597326892727 | 16609169818526 | 16609169911645 | 16597327262074 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 158 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 58944 | 0x7fc65c5cd700 | 0x7fc640624c80 | 2419336 | 2313588 | 65536 | 29982192 | 216936864 | 302416 | 302416 | 28657866.0 | 28650171.0 | 302416 | 0 | 28859975.0 | 28859142.0 | 302 | 304127 | 0.0 | 0.0 | 20332993.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11036 | 3068 | 0.0 | 2097152.0 | 2393 | 298263 | 2097152.0 | 0.0 | 0 | 23844 | 0.0 | 0.0 | 277408 | 0.0 | 841.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597327549234 | 16609169938205 | 16609170118363 | 16597328001858 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 161 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 60160 | 0x7fc65ed2e580 | 0x7fc640624cc0 | 2440080 | 2331197 | 65536 | 30170679 | 216563650 | 305009 | 305009 | 28759321.0 | 28742066.0 | 305009 | 0 | 28513100.0 | 28509322.0 | 302 | 300702 | 0.0 | 0.0 | 20074681.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10784 | 2409 | 0.0 | 2097152.0 | 2926 | 297309 | 2097152.0 | 0.0 | 0 | 22985 | 2.0 | 0.0 | 279958 | 0.0 | 5147.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597328288908 | 16609170142683 | 16609170323962 | 16597328743022 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 164 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 61376 | 0x7fc65ed2e400 | 0x7fc640624d00 | 1304904 | 1199575 | 65536 | 15462089 | 90379567 | 163112 | 163112 | 13114856.0 | 12501221.0 | 163112 | 0 | 12135932.0 | 11668257.0 | 302 | 163219 | 0.0 | 0.0 | 10336500.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9780 | 2442 | 0.0 | 2097152.0 | 2389 | 159921 | 2097152.0 | 0.0 | 0 | 15481 | 346.0 | 0.0 | 146427 | 0.0 | 456223.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597329030052 | 16609170347962 | 16609170442681 | 16597329392119 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 167 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 62848 | 0x7fc65ed2e280 | 0x7fc640624d40 | 1352640 | 1248337 | 65536 | 16104264 | 97888044 | 169079 | 169079 | 8671528.0 | 6259456.0 | 169079 | 0 | 6240404.0 | 5107178.0 | 302 | 169103 | 0.0 | 0.0 | 3906495.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9988 | 2514 | 0.0 | 2097152.0 | 2397 | 165091 | 2097152.0 | 0.0 | 0 | 16587 | 301.0 | 0.0 | 150729 | 0.0 | 1114775.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597329678230 | 16609170468121 | 16609170567960 | 16597330055576 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 170 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 63808 | 0x7fc65ed2e100 | 0x7fc640624d80 | 1305568 | 1203175 | 65536 | 15540936 | 89512125 | 163195 | 163195 | 13134068.0 | 12551407.0 | 163195 | 0 | 12845362.0 | 12585723.0 | 302 | 161576 | 0.0 | 0.0 | 10729190.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10018 | 2813 | 0.0 | 2097152.0 | 2399 | 159522 | 2097152.0 | 0.0 | 0 | 15031 | 288.0 | 0.0 | 145326 | 0.0 | 347955.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597330349926 | 16609170601240 | 16609170695159 | 16597330713723 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 173 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 65024 | 0x7fc65c5cdf80 | 0x7fc640624dc0 | 2413744 | 2307583 | 65536 | 29883780 | 210530289 | 301717 | 301717 | 28578429.0 | 28571650.0 | 301717 | 0 | 28436706.0 | 28435738.0 | 302 | 298904 | 0.0 | 0.0 | 20327926.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10915 | 2425 | 0.0 | 2097152.0 | 2374 | 299211 | 2097152.0 | 0.0 | 0 | 23766 | 0.0 | 0.0 | 277636 | 0.0 | 832.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597331001203 | 16609170723159 | 16609170902358 | 16597331449428 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 176 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 66240 | 0x7fc65c5cde00 | 0x7fc640624e00 | 2391624 | 2282988 | 65536 | 29587828 | 210025252 | 298952 | 298952 | 28359258.0 | 28345050.0 | 298952 | 0 | 28559521.0 | 28556045.0 | 302 | 301340 | 0.0 | 0.0 | 19998927.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10848 | 2412 | 0.0 | 2097152.0 | 2379 | 298289 | 2097152.0 | 0.0 | 0 | 23481 | 2.0 | 0.0 | 278318 | 0.0 | 3638.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597331736547 | 16609170927798 | 16609171106837 | 16597332192162 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 179 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 67456 | 0x7fc65c5cdc80 | 0x7fc640624e40 | 1313208 | 1204470 | 65536 | 15533829 | 89556829 | 164150 | 164150 | 13068894.0 | 12267375.0 | 164150 | 0 | 11995997.0 | 11496824.0 | 302 | 163187 | 0.0 | 0.0 | 10345564.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9927 | 2401 | 0.0 | 2097152.0 | 2397 | 160493 | 2097152.0 | 0.0 | 0 | 15485 | 266.0 | 0.0 | 146041 | 0.0 | 470476.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597332478761 | 16609171130836 | 16609171225716 | 16597332844269 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 182 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 68928 | 0x7fc65c5cdb00 | 0x7fc640624e80 | 1416616 | 1309770 | 65536 | 16889962 | 105408271 | 177076 | 177076 | 8482619.0 | 5964370.0 | 177076 | 0 | 5838529.0 | 4642502.0 | 302 | 175811 | 0.0 | 0.0 | 2855018.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10035 | 2410 | 0.0 | 2097152.0 | 2386 | 173179 | 2097152.0 | 0.0 | 0 | 17749 | 345.0 | 0.0 | 158027 | 0.0 | 1168622.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597333131249 | 16609171249556 | 16609171353075 | 16597333509465 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 185 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 70144 | 0x7fc65c5cd980 | 0x7fc640624ec0 | 1293712 | 1182764 | 65536 | 15277458 | 87454518 | 161713 | 161713 | 12914971.0 | 12240105.0 | 161713 | 0 | 12600135.0 | 12293596.0 | 302 | 161569 | 0.0 | 0.0 | 10516886.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9977 | 2422 | 0.0 | 2097152.0 | 2397 | 158600 | 2097152.0 | 0.0 | 0 | 15680 | 222.0 | 0.0 | 144090 | 0.0 | 324583.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597333802615 | 16609171385235 | 16609171478194 | 16597334163792 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 188 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 71616 | 0x7fc65c5cd800 | 0x7fc640624f00 | 2405728 | 2295463 | 65536 | 29744732 | 209536382 | 300715 | 300715 | 28320319.0 | 28312076.0 | 300715 | 0 | 28550645.0 | 28549571.0 | 302 | 302057 | 0.0 | 0.0 | 20206346.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11137 | 2726 | 0.0 | 2097152.0 | 2389 | 297424 | 2097152.0 | 0.0 | 0 | 23764 | 0.0 | 0.0 | 277558 | 0.0 | 1438.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597334450802 | 16609171502994 | 16609171684113 | 16597334907297 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 191 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 72832 | 0x7fc65c5cd680 | 0x7fc640624f40 | 2400888 | 2291383 | 65536 | 29683031 | 208596855 | 300110 | 300110 | 28331482.0 | 28323480.0 | 300110 | 0 | 28603831.0 | 28602975.0 | 302 | 302641 | 0.0 | 0.0 | 20145811.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10941 | 2914 | 0.0 | 2097152.0 | 2393 | 299328 | 2097152.0 | 0.0 | 0 | 24516 | 0.0 | 0.0 | 277463 | 0.0 | 832.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597335193677 | 16609171709872 | 16609171890191 | 16597335651231 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 194 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 74048 | 0x7fc65ed2e500 | 0x7fc640624f80 | 1296336 | 1190404 | 65536 | 15341489 | 85186506 | 162041 | 162041 | 12701914.0 | 11604573.0 | 162041 | 0 | 12556353.0 | 12000725.0 | 302 | 160878 | 0.0 | 0.0 | 9002244.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10052 | 2420 | 0.0 | 2097152.0 | 3132 | 157971 | 2097152.0 | 0.0 | 0 | 15024 | 544.0 | 0.0 | 144461 | 0.0 | 1186682.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597335937741 | 16609171913231 | 16609172006670 | 16597336317697 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 197 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 75776 | 0x7fc65ed2e380 | 0x7fc640624fc0 | 1470712 | 1359233 | 65536 | 17539407 | 108365488 | 183838 | 183838 | 7754807.0 | 5158544.0 | 183838 | 0 | 4702996.0 | 3347278.0 | 302 | 180934 | 0.0 | 0.0 | 1759278.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10012 | 2413 | 0.0 | 2097152.0 | 2392 | 180054 | 2097152.0 | 0.0 | 0 | 17309 | 351.0 | 0.0 | 164090 | 0.0 | 1342833.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597336606197 | 16609172032590 | 16609172139309 | 16597336990244 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 200 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 76992 | 0x7fc65ed2e200 | 0x7fc640625000 | 1296496 | 1192268 | 65536 | 15387290 | 91108484 | 162061 | 162061 | 12870745.0 | 12259923.0 | 162061 | 0 | 12641837.0 | 12355048.0 | 302 | 160802 | 0.0 | 0.0 | 10558090.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9996 | 2797 | 0.0 | 2097152.0 | 2394 | 159656 | 2097152.0 | 0.0 | 0 | 15520 | 241.0 | 0.0 | 143641 | 0.0 | 296095.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597337295503 | 16609172172429 | 16609172266349 | 16597337655141 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 203 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 78464 | 0x7fc65ed2e080 | 0x7fc640625040 | 2402472 | 2293067 | 65536 | 29701845 | 211291951 | 300308 | 300308 | 28291251.0 | 28273331.0 | 300308 | 0 | 28229962.0 | 28228962.0 | 302 | 298483 | 0.0 | 0.0 | 19904386.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11100 | 2418 | 0.0 | 2097152.0 | 2367 | 299280 | 2097152.0 | 0.0 | 0 | 21830 | 0.0 | 0.0 | 274716 | 0.0 | 1032.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597337943131 | 16609172290988 | 16609172470187 | 16597338406055 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 206 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 79936 | 0x7fc65c5cdf00 | 0x7fc640625080 | 2431744 | 2313865 | 65536 | 29969030 | 215978253 | 303967 | 303967 | 28609876.0 | 28600366.0 | 303967 | 0 | 28231641.0 | 28230809.0 | 302 | 299119 | 0.0 | 0.0 | 20404868.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10982 | 2412 | 0.0 | 2097152.0 | 3119 | 300362 | 2097152.0 | 0.0 | 0 | 22249 | 1.0 | 0.0 | 277019 | 0.0 | 889.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597338693634 | 16609172496427 | 16609172675946 | 16597339154058 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 209 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 81408 | 0x7fc65c5cdd80 | 0x7fc6406250c0 | 1299016 | 1196087 | 65536 | 15423831 | 89284901 | 162376 | 162376 | 12576555.0 | 11127815.0 | 162376 | 0 | 11324773.0 | 10599889.0 | 302 | 162288 | 0.0 | 0.0 | 9737677.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9982 | 2903 | 0.0 | 2097152.0 | 2381 | 159534 | 2097152.0 | 0.0 | 0 | 15900 | 546.0 | 0.0 | 145862 | 0.0 | 727058.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597339440678 | 16609172702346 | 16609172797385 | 16597339817525 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 212 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 83136 | 0x7fc65c5cdc00 | 0x7fc640625100 | 1545184 | 1431202 | 65536 | 18459579 | 106506832 | 193147 | 193147 | 6606615.0 | 4401285.0 | 193147 | 0 | 4164384.0 | 3010750.0 | 302 | 191722 | 0.0 | 0.0 | 1514469.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10290 | 2416 | 0.0 | 2097152.0 | 2349 | 188510 | 2097152.0 | 0.0 | 0 | 17916 | 264.0 | 0.0 | 171757 | 0.0 | 1134336.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597340104515 | 16609172823305 | 16609172938344 | 16597340495521 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 215 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 84352 | 0x7fc65c5cda80 | 0x7fc640625140 | 1288536 | 1188301 | 65536 | 15347407 | 103419507 | 161066 | 161066 | 13654477.0 | 13272294.0 | 161066 | 0 | 12418706.0 | 12051988.0 | 302 | 160825 | 0.0 | 0.0 | 11260790.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9815 | 2656 | 0.0 | 2097152.0 | 2391 | 158742 | 2097152.0 | 0.0 | 0 | 15195 | 237.0 | 0.0 | 144086 | 0.0 | 246965.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597340788761 | 16609172984744 | 16609173078823 | 16597341150838 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 218 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 85824 | 0x7fc65c5cd900 | 0x7fc640625180 | 2429560 | 2323684 | 65536 | 30083624 | 220132366 | 303694 | 303694 | 28648578.0 | 28638096.0 | 303694 | 0 | 28183757.0 | 28182573.0 | 302 | 298338 | 0.0 | 0.0 | 20033783.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10623 | 3012 | 0.0 | 2097152.0 | 2390 | 299788 | 2097152.0 | 0.0 | 0 | 24488 | 0.0 | 0.0 | 277121 | 0.0 | 840.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597341436848 | 16609173104263 | 16609173284582 | 16597341886973 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 221 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 87296 | 0x7fc65c5cd780 | 0x7fc6406251c0 | 2411720 | 2305126 | 65536 | 29836408 | 213650007 | 301464 | 301464 | 28483398.0 | 28471008.0 | 301464 | 0 | 28131939.0 | 28131075.0 | 302 | 297467 | 0.0 | 0.0 | 19912156.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10836 | 2674 | 0.0 | 2097152.0 | 2376 | 296167 | 2097152.0 | 0.0 | 0 | 17529 | 0.0 | 0.0 | 276457 | 0.0 | 1005.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597342176173 | 16609173310821 | 16609173489860 | 16597342645146 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 224 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 88768 | 0x7fc65c5cd600 | 0x7fc640625200 | 1303648 | 1192300 | 65536 | 15366312 | 95752564 | 162955 | 162955 | 12731114.0 | 11294259.0 | 162955 | 0 | 11139966.0 | 10384192.0 | 302 | 161025 | 0.0 | 0.0 | 8608533.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10033 | 2430 | 0.0 | 2097152.0 | 2369 | 160624 | 2097152.0 | 0.0 | 0 | 15551 | 550.0 | 0.0 | 145048 | 0.0 | 938035.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597342932536 | 16609173515300 | 16609173610499 | 16597343298283 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 227 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 90752 | 0x7fc65ed2e480 | 0x7fc640625240 | 1613472 | 1504410 | 65536 | 19388801 | 130122243 | 201683 | 201683 | 6731468.0 | 4473442.0 | 201683 | 0 | 3992936.0 | 2864581.0 | 302 | 200780 | 0.0 | 0.0 | 1486938.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10313 | 2414 | 0.0 | 2097152.0 | 2394 | 198965 | 2097152.0 | 0.0 | 0 | 17658 | 319.0 | 0.0 | 182244 | 0.0 | 1140934.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597343585253 | 16609173635459 | 16609173757378 | 16597343990029 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 230 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 91968 | 0x7fc65ed2e300 | 0x7fc640625280 | 1289544 | 1184561 | 65536 | 15296305 | 88103765 | 161192 | 161192 | 12799314.0 | 12007497.0 | 161192 | 0 | 11693197.0 | 11206680.0 | 302 | 161477 | 0.0 | 0.0 | 10097054.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9854 | 2430 | 0.0 | 2097152.0 | 2389 | 158329 | 2097152.0 | 0.0 | 0 | 15352 | 312.0 | 0.0 | 144421 | 0.0 | 259596.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597344283279 | 16609173789858 | 16609173884737 | 16597344650246 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 233 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 93440 | 0x7fc65ed2e180 | 0x7fc6406252c0 | 2427256 | 2319595 | 65536 | 30024773 | 221649170 | 303406 | 303406 | 28566329.0 | 28555234.0 | 303406 | 0 | 28447609.0 | 28446749.0 | 302 | 300935 | 0.0 | 0.0 | 20018971.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11215 | 2847 | 0.0 | 2097152.0 | 2387 | 298968 | 2097152.0 | 0.0 | 0 | 24118 | 0.0 | 0.0 | 276301 | 0.0 | 861.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597344936716 | 16609173909537 | 16609174088896 | 16597345396030 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 236 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 94912 | 0x7fc65ed2e000 | 0x7fc640625300 | 2385232 | 2283471 | 65536 | 29590630 | 205937246 | 298153 | 298153 | 28235814.0 | 28223253.0 | 298153 | 0 | 28427016.0 | 28425283.0 | 302 | 300498 | 0.0 | 0.0 | 19893433.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10638 | 2965 | 0.0 | 2097152.0 | 2856 | 295231 | 2097152.0 | 0.0 | 0 | 22032 | 0.0 | 0.0 | 275630 | 0.0 | 3016.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597345681640 | 16609174114336 | 16609174293855 | 16597346134755 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 239 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 96384 | 0x7fc65c5cde80 | 0x7fc640625340 | 1296824 | 1193256 | 65536 | 15387481 | 98435578 | 162102 | 162102 | 12268476.0 | 10463815.0 | 162102 | 0 | 10310624.0 | 9413616.0 | 302 | 161499 | 0.0 | 0.0 | 8579615.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10012 | 3006 | 0.0 | 2097152.0 | 2406 | 159433 | 2097152.0 | 0.0 | 0 | 15875 | 544.0 | 0.0 | 145120 | 0.0 | 868531.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597346421514 | 16609174319454 | 16609174414174 | 16597346800071 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 242 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 98368 | 0x7fc65c5cdd00 | 0x7fc640625380 | 1695440 | 1584394 | 65536 | 20424872 | 126370296 | 211929 | 211929 | 6272079.0 | 4155234.0 | 211929 | 0 | 3875878.0 | 2694400.0 | 302 | 211167 | 0.0 | 0.0 | 933627.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10500 | 2408 | 0.0 | 2097152.0 | 2397 | 208495 | 2097152.0 | 0.0 | 0 | 18163 | 277.0 | 0.0 | 189371 | 0.0 | 1169682.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597347086631 | 16609174441694 | 16609174568093 | 16597347491627 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 245 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 99584 | 0x7fc65c5cdb80 | 0x7fc6406253c0 | 1293936 | 1190586 | 65536 | 15364392 | 92793192 | 161741 | 161741 | 12940095.0 | 12130786.0 | 161741 | 0 | 10758072.0 | 10025773.0 | 302 | 160608 | 0.0 | 0.0 | 9467480.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9908 | 2416 | 0.0 | 2097152.0 | 2406 | 157691 | 2097152.0 | 0.0 | 0 | 15076 | 333.0 | 0.0 | 145193 | 0.0 | 447324.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597347784467 | 16609174602012 | 16609174696572 | 16597348149704 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 248 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 101312 | 0x7fc65c5cda00 | 0x7fc640625400 | 2403424 | 2297203 | 65536 | 29741949 | 207944727 | 300427 | 300427 | 28404173.0 | 28392527.0 | 300427 | 0 | 28239748.0 | 28238868.0 | 302 | 298432 | 0.0 | 0.0 | 19828703.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10951 | 3018 | 0.0 | 2097152.0 | 2394 | 297687 | 2097152.0 | 0.0 | 0 | 24006 | 0.0 | 0.0 | 273550 | 0.0 | 856.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597348437144 | 16609174721692 | 16609174901690 | 16597348897418 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 251 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 102784 | 0x7fc65c5cd880 | 0x7fc640625440 | 2386880 | 2284320 | 65536 | 29575527 | 211410437 | 298359 | 298359 | 28192129.0 | 28175736.0 | 298359 | 0 | 28207052.0 | 28204458.0 | 302 | 298380 | 0.0 | 0.0 | 19958305.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10934 | 2415 | 0.0 | 2097152.0 | 2377 | 295886 | 2097152.0 | 0.0 | 0 | 22991 | 1.0 | 0.0 | 276103 | 0.0 | 2429.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597349184208 | 16609174926970 | 16609175108089 | 16597349637482 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 254 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 104256 | 0x7fc65c5cd700 | 0x7fc640625480 | 1294192 | 1191430 | 65536 | 15373116 | 96934484 | 161773 | 161773 | 11741611.0 | 9577500.0 | 161773 | 0 | 9199616.0 | 8180539.0 | 302 | 159840 | 0.0 | 0.0 | 7076521.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10051 | 2427 | 0.0 | 2097152.0 | 3128 | 159527 | 2097152.0 | 0.0 | 0 | 16646 | 503.0 | 0.0 | 143440 | 0.0 | 1021733.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597349923182 | 16609175133369 | 16609175226968 | 16597350296809 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 257 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 106240 | 0x7fc65ed2e580 | 0x7fc6406254c0 | 1755760 | 1640125 | 65536 | 21184916 | 129603426 | 219469 | 219469 | 5881012.0 | 3895864.0 | 219469 | 0 | 3694750.0 | 2546102.0 | 302 | 218542 | 0.0 | 0.0 | 801126.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10536 | 2429 | 0.0 | 2097152.0 | 2394 | 217041 | 2097152.0 | 0.0 | 0 | 18854 | 300.0 | 0.0 | 198109 | 0.0 | 1164244.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597350584349 | 16609175251608 | 16609175382807 | 16597350993754 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 260 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 107712 | 0x7fc65ed2e400 | 0x7fc640625500 | 1286536 | 1182943 | 65536 | 15244658 | 91186933 | 160816 | 160816 | 12709039.0 | 11814329.0 | 160816 | 0 | 11508960.0 | 10913113.0 | 302 | 160775 | 0.0 | 0.0 | 9543282.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9849 | 2418 | 0.0 | 2097152.0 | 3172 | 156940 | 2097152.0 | 0.0 | 0 | 14811 | 416.0 | 0.0 | 143482 | 0.0 | 544283.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597351286534 | 16609175415447 | 16609175508886 | 16597351653022 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 263 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 16 | 24 | 109440 | 0x7fc65ed2e280 | 0x7fc640625540 | 2380576 | 2278960 | 65536 | 29517655 | 213020255 | 297571 | 297571 | 28140068.0 | 28132431.0 | 297571 | 0 | 28131987.0 | 28131075.0 | 302 | 298259 | 0.0 | 0.0 | 19977974.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10831 | 2930 | 0.0 | 2097152.0 | 2393 | 294454 | 2097152.0 | 0.0 | 0 | 23277 | 0.0 | 0.0 | 274826 | 0.0 | 888.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597351940801 | 16609175535606 | 16609175714165 | 16597352391316 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 266 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 16 | 24 | 111168 | 0x7fc65ed2e100 | 0x7fc640625580 | 2386952 | 2286758 | 65536 | 29583800 | 215165098 | 298368 | 298368 | 28190363.0 | 28177799.0 | 298368 | 0 | 28115689.0 | 28113608.0 | 302 | 296796 | 0.0 | 0.0 | 19796209.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11086 | 2407 | 0.0 | 2097152.0 | 2392 | 295177 | 2097152.0 | 0.0 | 0 | 22757 | 2.0 | 0.0 | 275629 | 0.0 | 2025.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597352679026 | 16609175739605 | 16609175920243 | 16597353134330 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 269 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 112896 | 0x7fc65c5cdf80 | 0x7fc6406255c0 | 1294560 | 1193813 | 65536 | 15394057 | 100241623 | 161819 | 161819 | 10967600.0 | 8484547.0 | 161819 | 0 | 8376944.0 | 7207055.0 | 302 | 162569 | 0.0 | 0.0 | 6273717.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9967 | 2426 | 0.0 | 2097152.0 | 3135 | 159866 | 2097152.0 | 0.0 | 0 | 15969 | 575.0 | 0.0 | 145134 | 0.0 | 1152689.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597353421230 | 16609175946003 | 16609176042163 | 16597353786017 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 272 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 115136 | 0x7fc65c5cde00 | 0x7fc640625600 | 1832080 | 1722169 | 65536 | 22255014 | 152468490 | 229009 | 229009 | 5676919.0 | 3790892.0 | 229009 | 0 | 3676229.0 | 2580886.0 | 302 | 227925 | 0.0 | 0.0 | 818255.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10660 | 2992 | 0.0 | 2097152.0 | 3001 | 225587 | 2097152.0 | 0.0 | 0 | 19447 | 290.0 | 0.0 | 207966 | 0.0 | 1116574.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597354074337 | 16609176066002 | 16609176204561 | 16597354503752 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 275 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 116608 | 0x7fc65c5cdc80 | 0x7fc640625640 | 1289920 | 1183476 | 65536 | 15275145 | 90539291 | 161239 | 161239 | 12602838.0 | 11546682.0 | 161239 | 0 | 11890386.0 | 11391442.0 | 302 | 160873 | 0.0 | 0.0 | 10544033.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10124 | 2413 | 0.0 | 2097152.0 | 2388 | 158811 | 2097152.0 | 0.0 | 0 | 16161 | 383.0 | 0.0 | 144089 | 0.0 | 604721.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597354796462 | 16609176237201 | 16609176331441 | 16597355162409 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 278 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 16 | 24 | 118336 | 0x7fc65c5cdb00 | 0x7fc640625680 | 2396736 | 2286915 | 65536 | 29619728 | 202728262 | 299591 | 299591 | 28152808.0 | 28141309.0 | 299591 | 0 | 28120285.0 | 28119345.0 | 302 | 297695 | 0.0 | 0.0 | 19876320.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10620 | 2996 | 0.0 | 2097152.0 | 2873 | 295400 | 2097152.0 | 0.0 | 0 | 22165 | 0.0 | 0.0 | 274698 | 0.0 | 1040.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597355449779 | 16609176356880 | 16609176538959 | 16597355913493 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 281 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 16 | 24 | 120064 | 0x7fc65c5cd980 | 0x7fc6406256c0 | 2397792 | 2293257 | 65536 | 29708091 | 213490353 | 299723 | 299723 | 28288937.0 | 28275238.0 | 299723 | 0 | 28188472.0 | 28185387.0 | 302 | 298287 | 0.0 | 0.0 | 19934515.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10771 | 2364 | 0.0 | 2097152.0 | 2397 | 294963 | 2097152.0 | 0.0 | 0 | 21889 | 0.0 | 0.0 | 275456 | 0.0 | 2992.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597356199733 | 16609176564559 | 16609176744078 | 16597356650777 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 284 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 121792 | 0x7fc65c5cd800 | 0x7fc640625700 | 1310648 | 1208554 | 65536 | 15604857 | 100903753 | 163830 | 163830 | 10285773.0 | 7747764.0 | 163830 | 0 | 7430403.0 | 6121787.0 | 302 | 163511 | 0.0 | 0.0 | 4527583.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9999 | 2423 | 0.0 | 2097152.0 | 2412 | 160969 | 2097152.0 | 0.0 | 0 | 15962 | 417.0 | 0.0 | 146531 | 0.0 | 1327870.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597356937427 | 16609176768398 | 16609176866317 | 16597357301884 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 287 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 124032 | 0x7fc65c5cd680 | 0x7fc640625740 | 1924608 | 1813022 | 65536 | 23432425 | 160560120 | 240575 | 240575 | 5894442.0 | 3900064.0 | 240575 | 0 | 3592662.0 | 2503242.0 | 302 | 238523 | 0.0 | 0.0 | 685240.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10603 | 2419 | 0.0 | 2097152.0 | 2396 | 236376 | 2097152.0 | 0.0 | 0 | 20054 | 351.0 | 0.0 | 216406 | 0.0 | 1099868.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597357587834 | 16609176891917 | 16609177037356 | 16597358003070 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 290 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 125504 | 0x7fc65ed2e500 | 0x7fc640625780 | 1297896 | 1191202 | 65536 | 15330061 | 95912793 | 162236 | 162236 | 12219363.0 | 10735545.0 | 162236 | 0 | 10578590.0 | 9825742.0 | 302 | 160976 | 0.0 | 0.0 | 8739414.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 9881 | 2902 | 0.0 | 2097152.0 | 2404 | 157855 | 2097152.0 | 0.0 | 0 | 15550 | 488.0 | 0.0 | 143183 | 0.0 | 659466.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597358296759 | 16609177070795 | 16609177166155 | 16597358664676 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 293 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 127488 | 0x7fc65ed2e380 | 0x7fc6406257c0 | 2387568 | 2280285 | 65536 | 29504517 | 185367116 | 298445 | 298445 | 28086965.0 | 28059779.0 | 298445 | 0 | 28137462.0 | 28130685.0 | 302 | 298601 | 0.0 | 0.0 | 19664356.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10580 | 2407 | 0.0 | 2097152.0 | 2392 | 294747 | 2097152.0 | 0.0 | 0 | 22719 | 4.0 | 0.0 | 275592 | 0.0 | 7362.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597358952186 | 16609177191115 | 16609177373353 | 16597359405470 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 296 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 129216 | 0x7fc65ed2e200 | 0x7fc640625800 | 2396320 | 2286241 | 65536 | 29601942 | 201753192 | 299539 | 299539 | 28188319.0 | 28172411.0 | 299539 | 0 | 28036603.0 | 28031399.0 | 302 | 297735 | 0.0 | 0.0 | 19668212.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10632 | 2411 | 0.0 | 2097152.0 | 2392 | 296042 | 2097152.0 | 0.0 | 0 | 22433 | 0.0 | 0.0 | 274784 | 0.0 | 7115.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597359693050 | 16609177397993 | 16609177581192 | 16597360153294 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 299 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 130944 | 0x7fc65ed2e080 | 0x7fc640625840 | 1372560 | 1266284 | 65536 | 16341598 | 103801512 | 171569 | 171569 | 9394852.0 | 6840280.0 | 171569 | 0 | 6254018.0 | 5049245.0 | 302 | 170611 | 0.0 | 0.0 | 4205185.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10135 | 2495 | 0.0 | 2097152.0 | 2398 | 167281 | 2097152.0 | 0.0 | 0 | 16091 | 323.0 | 0.0 | 152176 | 0.0 | 1251243.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597360439604 | 16609177606472 | 16609177706471 | 16597360813501 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 302 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 133440 | 0x7fc65c5cdf00 | 0x7fc640625880 | 2077152 | 1966282 | 65536 | 25403452 | 169897676 | 259643 | 259643 | 4574681.0 | 3285493.0 | 259643 | 0 | 3235443.0 | 2547792.0 | 302 | 259293 | 0.0 | 0.0 | 553465.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10686 | 2410 | 0.0 | 2097152.0 | 2388 | 257396 | 2097152.0 | 0.0 | 0 | 20867 | 450.0 | 0.0 | 235268 | 0.0 | 701830.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597361100651 | 16609177732711 | 16609177888390 | 16597361537746 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 305 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 134912 | 0x7fc65c5cdd80 | 0x7fc6406258c0 | 1303200 | 1195155 | 65536 | 15387050 | 97361701 | 162899 | 162899 | 11392594.0 | 9440570.0 | 162899 | 0 | 8879679.0 | 7795483.0 | 302 | 160228 | 0.0 | 0.0 | 6888916.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10136 | 2848 | 0.0 | 2097152.0 | 2397 | 159948 | 2097152.0 | 0.0 | 0 | 15892 | 455.0 | 0.0 | 143792 | 0.0 | 1078145.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597361831225 | 16609177920870 | 16609178015749 | 16597362192673 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 308 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 136896 | 0x7fc65c5cdc00 | 0x7fc640625900 | 2368136 | 2261474 | 65536 | 29267540 | 195658253 | 296016 | 296016 | 27870889.0 | 27840759.0 | 296016 | 0 | 27877859.0 | 27869979.0 | 302 | 295810 | 0.0 | 0.0 | 19535297.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10655 | 3098 | 0.0 | 2097152.0 | 2350 | 293810 | 2097152.0 | 0.0 | 0 | 22482 | 2.0 | 0.0 | 274590 | 0.0 | 9274.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597362479423 | 16609178042149 | 16609178221827 | 16597362941067 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 311 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 138880 | 0x7fc65c5cda80 | 0x7fc640625940 | 2387672 | 2276087 | 65536 | 29440424 | 198915891 | 298458 | 298458 | 28013551.0 | 28001777.0 | 298458 | 0 | 27878284.0 | 27875306.0 | 302 | 296443 | 0.0 | 0.0 | 19617022.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10905 | 2951 | 0.0 | 2097152.0 | 2392 | 292994 | 2097152.0 | 0.0 | 0 | 21861 | 0.0 | 0.0 | 273504 | 0.0 | 2654.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597363227807 | 16609178246947 | 16609178430146 | 16597363682741 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 314 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 140864 | 0x7fc65c5cd900 | 0x7fc640625980 | 1429264 | 1321097 | 65536 | 17053134 | 105751765 | 178657 | 178657 | 8276086.0 | 5421977.0 | 178657 | 0 | 4973396.0 | 3569445.0 | 302 | 178725 | 0.0 | 0.0 | 1936471.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10206 | 2947 | 0.0 | 2097152.0 | 2398 | 177373 | 2097152.0 | 0.0 | 0 | 16342 | 203.0 | 0.0 | 159222 | 0.0 | 1390973.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597363969141 | 16609178453986 | 16609178561505 | 16597364347068 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 317 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 143616 | 0x7fc65c5cd780 | 0x7fc6406259c0 | 2255568 | 2141631 | 65536 | 27678346 | 186991269 | 281945 | 281945 | 4639021.0 | 3339550.0 | 281945 | 0 | 3168717.0 | 2479096.0 | 302 | 279990 | 0.0 | 0.0 | 549223.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10875 | 2432 | 0.0 | 2097152.0 | 2407 | 277222 | 2097152.0 | 0.0 | 0 | 21983 | 520.0 | 0.0 | 255544 | 0.0 | 687555.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597364633538 | 16609178586785 | 16609178757344 | 16597365073142 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 320 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 145344 | 0x7fc65c5cd600 | 0x7fc640625a00 | 1299888 | 1193268 | 65536 | 15384472 | 97975405 | 162485 | 162485 | 10061722.0 | 7713627.0 | 162485 | 0 | 7851337.0 | 6585207.0 | 302 | 160936 | 0.0 | 0.0 | 5280029.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10076 | 2406 | 0.0 | 2097152.0 | 2395 | 159178 | 2097152.0 | 0.0 | 0 | 16294 | 313.0 | 0.0 | 143871 | 0.0 | 1291913.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597365366372 | 16609178790144 | 16609178886463 | 16597365726870 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 323 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 147584 | 0x7fc65ed2e480 | 0x7fc640625a40 | 2372144 | 2261009 | 65536 | 29235203 | 198044200 | 296517 | 296517 | 27637469.0 | 27619870.0 | 296517 | 0 | 27631749.0 | 27626594.0 | 302 | 293902 | 0.0 | 0.0 | 19360575.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10788 | 2600 | 0.0 | 2097152.0 | 2377 | 294122 | 2097152.0 | 0.0 | 0 | 22236 | 2.0 | 0.0 | 274561 | 0.0 | 3715.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597366015330 | 16609178911583 | 16609179093661 | 16597366428615 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 326 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 149568 | 0x7fc65ed2e300 | 0x7fc640625a80 | 2375304 | 2266151 | 65536 | 29325099 | 191132422 | 296912 | 296912 | 27803920.0 | 27757259.0 | 296912 | 0 | 24310509.0 | 24144548.0 | 402 | 307754 | 0.0 | 0.0 | 19517280.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10666 | 2412 | 0.0 | 2097152.0 | 2391 | 296146 | 2097152.0 | 0.0 | 0 | 22437 | 1.0 | 0.0 | 273431 | 0.0 | 2800.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597366714665 | 16609179118621 | 16609179301820 | 16597367169299 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 329 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 151552 | 0x7fc65ed2e180 | 0x7fc640625ac0 | 1500296 | 1386419 | 65536 | 17866973 | 104587261 | 187536 | 187536 | 7998291.0 | 4935335.0 | 187536 | 0 | 4575952.0 | 3048003.0 | 302 | 186169 | 0.0 | 0.0 | 1386558.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10237 | 2413 | 0.0 | 2097152.0 | 2387 | 184207 | 2097152.0 | 0.0 | 0 | 17047 | 163.0 | 0.0 | 166520 | 0.0 | 1534553.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597367456049 | 16609179326620 | 16609179436539 | 16597367847655 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 332 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 154304 | 0x7fc65ed2e000 | 0x7fc640625b00 | 2397632 | 2286359 | 65536 | 29544693 | 203216328 | 299703 | 299703 | 4558664.0 | 3293250.0 | 299703 | 0 | 3437044.0 | 2736427.0 | 402 | 332328 | 0.0 | 0.0 | 543555.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10979 | 2418 | 0.0 | 2097152.0 | 2390 | 297473 | 2097152.0 | 0.0 | 0 | 23456 | 406.0 | 0.0 | 274548 | 0.0 | 691752.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597368133715 | 16609179460699 | 16609179641658 | 16597368594499 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 335 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 156288 | 0x7fc65c5cde80 | 0x7fc640625b40 | 1367240 | 1263863 | 65536 | 16305871 | 105197986 | 170904 | 170904 | 8537252.0 | 5867609.0 | 170904 | 0 | 5638449.0 | 4441785.0 | 302 | 172322 | 0.0 | 0.0 | 3418444.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10024 | 2414 | 0.0 | 2097152.0 | 2387 | 169481 | 2097152.0 | 0.0 | 0 | 16405 | 304.0 | 0.0 | 152361 | 0.0 | 1247440.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597368887799 | 16609179674297 | 16609179775257 | 16597369262316 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 338 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 158784 | 0x7fc65c5cdd00 | 0x7fc640625b80 | 2396984 | 2274235 | 65536 | 29394152 | 177752916 | 299622 | 299622 | 26805584.0 | 26457392.0 | 299622 | 0 | 22550417.0 | 22154566.0 | 402 | 320887 | 0.0 | 0.0 | 16727596.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10689 | 2742 | 0.0 | 2097152.0 | 2657 | 295357 | 2097152.0 | 0.0 | 0 | 21918 | 113.0 | 0.0 | 278124 | 0.0 | 159411.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597369548936 | 16609179799897 | 16609179986295 | 16597370005340 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 341 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 161024 | 0x7fc65c5cdb80 | 0x7fc640625bc0 | 2377912 | 2258359 | 65536 | 29138771 | 180268821 | 297238 | 297238 | 27540632.0 | 27465212.0 | 297238 | 0 | 25272327.0 | 25099404.0 | 351 | 305712 | 0.0 | 0.0 | 19165047.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10566 | 2414 | 0.0 | 2097152.0 | 2335 | 292603 | 2097152.0 | 0.0 | 0 | 22691 | 69.0 | 0.0 | 277867 | 0.0 | 55816.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597370292850 | 16609180010455 | 16609180195894 | 16597370707465 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 344 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 163264 | 0x7fc65c5cda00 | 0x7fc640625c00 | 1650728 | 1537644 | 65536 | 19840128 | 129221445 | 206340 | 206340 | 7256720.0 | 4605042.0 | 206340 | 0 | 4332877.0 | 2978495.0 | 302 | 205824 | 0.0 | 0.0 | 1051276.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10457 | 2418 | 0.0 | 2097152.0 | 2388 | 203037 | 2097152.0 | 0.0 | 0 | 17947 | 219.0 | 0.0 | 186532 | 0.0 | 1358646.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597370993985 | 16609180220374 | 16609180343253 | 16597371338143 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 347 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 166528 | 0x7fc65c5cd880 | 0x7fc640625c40 | 2732864 | 2616561 | 65536 | 33773513 | 229751510 | 341607 | 341607 | 4569285.0 | 3244425.0 | 341607 | 0 | 3095631.0 | 2439238.0 | 302 | 339543 | 0.0 | 0.0 | 526784.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 11254 | 2416 | 0.0 | 2097152.0 | 2395 | 336902 | 2097152.0 | 0.0 | 0 | 25269 | 290.0 | 0.0 | 312174 | 0.0 | 661882.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597371625693 | 16609180369173 | 16609180576211 | 16597372101637 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 350 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 168512 | 0x7fc65c5cd700 | 0x7fc640625c80 | 1474296 | 1363807 | 65536 | 17600001 | 112178423 | 184286 | 184286 | 7684666.0 | 4821374.0 | 184286 | 0 | 4505345.0 | 3110730.0 | 302 | 183843 | 0.0 | 0.0 | 1571354.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10166 | 2680 | 0.0 | 2097152.0 | 2657 | 181256 | 2097152.0 | 0.0 | 0 | 17253 | 178.0 | 0.0 | 164496 | 0.0 | 1401906.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597372357148 | 16609180626931 | 16609180737010 | 16597372703336 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 353 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 171264 | 0x7fc65ed2e580 | 0x7fc640625cc0 | 2385528 | 2242619 | 65536 | 28972693 | 150474357 | 298190 | 298190 | 24961918.0 | 24066074.0 | 298190 | 0 | 24096754.0 | 23759588.0 | 302 | 298755 | 0.0 | 0.0 | 17424074.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10746 | 2414 | 0.0 | 2097152.0 | 2391 | 296002 | 2097152.0 | 0.0 | 0 | 22391 | 98.0 | 0.0 | 273803 | 0.0 | 446896.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597372991576 | 16609180762930 | 16609180945009 | 16597373403151 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 356 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 173760 | 0x7fc65ed2e400 | 0x7fc640625d00 | 2388152 | 2265361 | 65536 | 29258371 | 169271602 | 298518 | 298518 | 25934060.0 | 25298522.0 | 298518 | 0 | 24556531.0 | 24228974.0 | 302 | 297973 | 0.0 | 0.0 | 16089196.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10633 | 2415 | 0.0 | 2097152.0 | 2367 | 295080 | 2097152.0 | 0.0 | 0 | 21641 | 218.0 | 0.0 | 276245 | 0.0 | 274424.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597373691051 | 16609180971089 | 16609181158127 | 16597374150975 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 359 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 176256 | 0x7fc65ed2e280 | 0x7fc640625d40 | 1804696 | 1694944 | 65536 | 21892546 | 146391889 | 225586 | 225586 | 6710505.0 | 4107757.0 | 225586 | 0 | 3800653.0 | 2398646.0 | 302 | 224053 | 0.0 | 0.0 | 491142.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10464 | 2903 | 0.0 | 2097152.0 | 2400 | 222723 | 2097152.0 | 0.0 | 0 | 19070 | 152.0 | 0.0 | 203941 | 0.0 | 1387021.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597374438675 | 16609181183727 | 16609181317966 | 16597374860860 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 362 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 179776 | 0x7fc65ed2e100 | 0x7fc640625d80 | 3050880 | 2934374 | 65536 | 37918128 | 265423644 | 381359 | 381359 | 4500176.0 | 3200971.0 | 381359 | 0 | 3070403.0 | 2423893.0 | 302 | 380759 | 0.0 | 0.0 | 472445.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 11760 | 2414 | 0.0 | 2097152.0 | 2391 | 376821 | 2097152.0 | 0.0 | 0 | 27276 | 370.0 | 0.0 | 351807 | 0.0 | 649643.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597375147940 | 16609181343886 | 16609181576044 | 16597375644723 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 365 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 182272 | 0x7fc65c5cdf80 | 0x7fc640625dc0 | 1698208 | 1586203 | 65536 | 20450701 | 120250192 | 212275 | 212275 | 6705447.0 | 4333976.0 | 212275 | 0 | 4025071.0 | 2473231.0 | 302 | 210358 | 0.0 | 0.0 | 543470.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10412 | 2427 | 0.0 | 2097152.0 | 2405 | 207921 | 2097152.0 | 0.0 | 0 | 18226 | 141.0 | 0.0 | 190591 | 0.0 | 1530924.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597375900824 | 16609181621324 | 16609181747883 | 16597376253842 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 368 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 185536 | 0x7fc65c5cde00 | 0x7fc640625e00 | 2463424 | 2337754 | 65536 | 30089461 | 194231207 | 307927 | 307927 | 22021636.0 | 21198529.0 | 307927 | 0 | 20404006.0 | 19749097.0 | 302 | 307657 | 0.0 | 0.0 | 12476964.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11241 | 2763 | 0.0 | 2097152.0 | 2391 | 301712 | 2097152.0 | 0.0 | 0 | 22520 | 175.0 | 0.0 | 283450 | 0.0 | 729291.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597376541022 | 16609181773003 | 16609181965002 | 16597376977406 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 371 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 188544 | 0x7fc65c5cdc80 | 0x7fc640625e40 | 2488184 | 2358451 | 65536 | 30352696 | 182799287 | 311022 | 311022 | 21951513.0 | 20359280.0 | 311022 | 0 | 19548761.0 | 18793052.0 | 302 | 309787 | 0.0 | 0.0 | 11193020.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10791 | 2979 | 0.0 | 2097152.0 | 2391 | 306303 | 2097152.0 | 0.0 | 0 | 22206 | 290.0 | 0.0 | 288149 | 0.0 | 907245.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597377245487 | 16609181992521 | 16609182188040 | 16597377731920 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 374 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 191552 | 0x7fc65c5cdb00 | 0x7fc640625e80 | 2133120 | 2015869 | 65536 | 26022935 | 178812857 | 266639 | 266639 | 5097070.0 | 3369569.0 | 266639 | 0 | 3149342.0 | 2300473.0 | 302 | 265747 | 0.0 | 0.0 | 528822.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10654 | 2427 | 0.0 | 2097152.0 | 2367 | 263017 | 2097152.0 | 0.0 | 0 | 21555 | 161.0 | 0.0 | 242329 | 0.0 | 856823.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597377999470 | 16609182213160 | 16609182373319 | 16597378444965 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 377 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 195840 | 0x7fc65c5cd980 | 0x7fc640625ec0 | 3704624 | 3579771 | 65536 | 46309448 | 339919399 | 463077 | 463077 | 4728008.0 | 3224827.0 | 463077 | 0 | 3084931.0 | 2440584.0 | 302 | 461067 | 0.0 | 0.0 | 657587.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 12260 | 2417 | 0.0 | 2097152.0 | 2391 | 458528 | 2097152.0 | 0.0 | 0 | 30927 | 240.0 | 0.0 | 427662 | 0.0 | 638477.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597378732745 | 16609182398599 | 16609182682437 | 16597379266096 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 380 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 198592 | 0x7fc65c5cd800 | 0x7fc640625f00 | 1951288 | 1842007 | 65536 | 23741815 | 155873957 | 243910 | 243910 | 7297845.0 | 4516831.0 | 243910 | 0 | 4144795.0 | 2336331.0 | 302 | 242374 | 0.0 | 0.0 | 420297.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10665 | 2422 | 0.0 | 2097152.0 | 2380 | 241926 | 2097152.0 | 0.0 | 0 | 20113 | 76.0 | 0.0 | 220727 | 0.0 | 1787844.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597379541107 | 16609182756516 | 16609182905635 | 16597379946003 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 383 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 202368 | 0x7fc65c5cd680 | 0x7fc640625f40 | 2677384 | 2563677 | 65536 | 33005536 | 210357226 | 334672 | 334672 | 19196623.0 | 18047375.0 | 334672 | 0 | 17239508.0 | 16303955.0 | 302 | 334490 | 0.0 | 0.0 | 9585624.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11179 | 2408 | 0.0 | 2097152.0 | 2394 | 331943 | 2097152.0 | 0.0 | 0 | 24219 | 101.0 | 0.0 | 308955 | 0.0 | 1030706.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597380232513 | 16609182932035 | 16609183140194 | 16597380744135 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 386 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 205888 | 0x7fc65ed2e500 | 0x7fc640625f80 | 2668216 | 2534361 | 65536 | 32687161 | 200561589 | 333526 | 333526 | 19611161.0 | 16911817.0 | 333526 | 0 | 14610812.0 | 13314585.0 | 302 | 328571 | 0.0 | 0.0 | 8120848.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11348 | 2423 | 0.0 | 2097152.0 | 2392 | 329037 | 2097152.0 | 0.0 | 0 | 22844 | 355.0 | 0.0 | 306125 | 0.0 | 1366089.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597381012295 | 16609183202913 | 16609183410592 | 16597381524858 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 389 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 209408 | 0x7fc65ed2e380 | 0x7fc640625fc0 | 2464120 | 2353368 | 65536 | 30276348 | 187907759 | 308014 | 308014 | 5047537.0 | 3327172.0 | 308014 | 0 | 3179139.0 | 2302320.0 | 302 | 306557 | 0.0 | 0.0 | 537155.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 11136 | 2959 | 0.0 | 2097152.0 | 3000 | 303710 | 2097152.0 | 0.0 | 0 | 23148 | 110.0 | 0.0 | 281152 | 0.0 | 880432.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597381792298 | 16609183472991 | 16609183658750 | 16597382260982 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 392 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 214464 | 0x7fc65ed2e200 | 0x7fc640626000 | 4333728 | 4227394 | 65536 | 54716747 | 405999663 | 541715 | 541715 | 4573932.0 | 3194373.0 | 541715 | 0 | 3058417.0 | 2461421.0 | 302 | 542001 | 0.0 | 0.0 | 617376.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 12797 | 2428 | 0.0 | 2097152.0 | 2401 | 539837 | 2097152.0 | 0.0 | 0 | 35263 | 270.0 | 0.0 | 504702 | 0.0 | 592623.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597382546702 | 16609183683870 | 16609184018747 | 16597383179890 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 395 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 217728 | 0x7fc65ed2e080 | 0x7fc640626040 | 2176848 | 2070558 | 65536 | 26753792 | 189908430 | 272105 | 272105 | 6790420.0 | 4421967.0 | 272105 | 0 | 4035954.0 | 2366659.0 | 302 | 272083 | 0.0 | 0.0 | 484031.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 10974 | 2807 | 0.0 | 2097152.0 | 3040 | 270159 | 2097152.0 | 0.0 | 0 | 21606 | 171.0 | 0.0 | 247939 | 0.0 | 1670109.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597383453010 | 16609184085947 | 16609184252826 | 16597383888125 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 398 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 222016 | 0x7fc65c5cdf00 | 0x7fc640626080 | 2921200 | 2800853 | 65536 | 36071147 | 242196427 | 365149 | 365149 | 16738971.0 | 15289561.0 | 365149 | 0 | 14373354.0 | 13180298.0 | 302 | 363095 | 0.0 | 0.0 | 7235690.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11581 | 2680 | 0.0 | 2097152.0 | 2396 | 365605 | 2097152.0 | 0.0 | 0 | 27589 | 132.0 | 0.0 | 336972 | 0.0 | 1146712.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597384175705 | 16609184278106 | 16609184506744 | 16597384697257 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 401 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 226048 | 0x7fc65c5cdd80 | 0x7fc6406260c0 | 2930160 | 2819301 | 65536 | 36275190 | 251643378 | 366269 | 366269 | 19249030.0 | 15925055.0 | 366269 | 0 | 13848185.0 | 12276074.0 | 302 | 367319 | 0.0 | 0.0 | 6616245.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11907 | 2365 | 0.0 | 2097152.0 | 3139 | 364124 | 2097152.0 | 0.0 | 0 | 27491 | 218.0 | 0.0 | 333395 | 0.0 | 1621550.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597384965137 | 16609184566104 | 16609184795222 | 16597385490869 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 404 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 230080 | 0x7fc65c5cdc00 | 0x7fc640626100 | 2774432 | 2664356 | 65536 | 34446929 | 244057420 | 346803 | 346803 | 5158516.0 | 3385356.0 | 346803 | 0 | 3167665.0 | 2275332.0 | 302 | 347173 | 0.0 | 0.0 | 445273.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 11278 | 3027 | 0.0 | 2097152.0 | 2403 | 343911 | 2097152.0 | 0.0 | 0 | 25276 | 78.0 | 0.0 | 319845 | 0.0 | 925488.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597385757229 | 16609184854102 | 16609185065300 | 16597386267392 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 407 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 235904 | 0x7fc65c5cda80 | 0x7fc640626140 | 5000680 | 4878878 | 65536 | 63125256 | 466417327 | 625084 | 625084 | 4570446.0 | 3193723.0 | 625084 | 0 | 3035458.0 | 2437322.0 | 302 | 622936 | 0.0 | 0.0 | 635569.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 13252 | 2924 | 0.0 | 2097152.0 | 2393 | 620930 | 2097152.0 | 0.0 | 0 | 39589 | 275.0 | 0.0 | 582090 | 0.0 | 595425.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597386534992 | 16609185101940 | 16609185487857 | 16597387221868 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 410 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 239424 | 0x7fc65c5cd900 | 0x7fc640626180 | 2438448 | 2321474 | 65536 | 29940000 | 196047882 | 304805 | 304805 | 6876525.0 | 4445977.0 | 304805 | 0 | 4073648.0 | 2368156.0 | 302 | 302813 | 0.0 | 0.0 | 518719.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 11137 | 2409 | 0.0 | 2097152.0 | 2391 | 300072 | 2097152.0 | 0.0 | 0 | 23078 | 161.0 | 0.0 | 278329 | 0.0 | 1693847.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597387496379 | 16609185529617 | 16609185717456 | 16597387953552 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 413 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 244224 | 0x7fc65c5cd780 | 0x7fc6406261c0 | 3179776 | 3071266 | 65536 | 39591210 | 275704594 | 397471 | 397471 | 16195469.0 | 14623168.0 | 397471 | 0 | 13112623.0 | 11761884.0 | 302 | 396183 | 0.0 | 0.0 | 5637718.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11571 | 2421 | 0.0 | 2097152.0 | 3022 | 396162 | 2097152.0 | 0.0 | 0 | 27069 | 117.0 | 0.0 | 370182 | 0.0 | 1375701.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597388241492 | 16609185743376 | 16609185992814 | 16597388786133 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 416 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 248768 | 0x7fc65c5cd600 | 0x7fc640626200 | 3130992 | 3020482 | 65536 | 39089975 | 282527868 | 391373 | 391373 | 15506762.0 | 11893008.0 | 391373 | 0 | 9487074.0 | 7589259.0 | 302 | 391455 | 0.0 | 0.0 | 2278962.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 11751 | 2974 | 0.0 | 2097152.0 | 2389 | 388134 | 2097152.0 | 0.0 | 0 | 27383 | 289.0 | 0.0 | 361335 | 0.0 | 1831017.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597389054514 | 16609186029134 | 16609186277132 | 16597389604205 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 419 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 253312 | 0x7fc65ed2e480 | 0x7fc640626240 | 3108144 | 2995743 | 65536 | 38705280 | 275978111 | 388517 | 388517 | 5168880.0 | 3416271.0 | 388517 | 0 | 3215938.0 | 2298335.0 | 302 | 387137 | 0.0 | 0.0 | 445821.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 11782 | 2417 | 0.0 | 2097152.0 | 2403 | 385819 | 2097152.0 | 0.0 | 0 | 27474 | 106.0 | 0.0 | 359044 | 0.0 | 902582.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597389871935 | 16609186341771 | 16609186579050 | 16597390356558 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 422 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 255552 | 0x7fc65ed2e300 | 0x7fc640626280 | 5750480 | 5630636 | 65536 | 72912690 | 538924829 | 718809 | 718809 | 15215013.0 | 6852992.0 | 718809 | 0 | 5947727.0 | 2137931.0 | 302 | 718675 | 0.0 | 0.0 | 51070.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 13943 | 2416 | 0.0 | 2097152.0 | 2398 | 715820 | 2097152.0 | 0.0 | 0 | 44658 | 0.0 | 0.0 | 672443 | 0.0 | 3814307.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597390621749 | 16609186643529 | 16609187097926 | 16597391375663 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 425 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 259840 | 0x7fc65ed2e180 | 0x7fc6406262c0 | 2912040 | 2803794 | 65536 | 36207092 | 249739879 | 364004 | 364004 | 7142861.0 | 4482705.0 | 364004 | 0 | 4112358.0 | 2356314.0 | 302 | 362712 | 0.0 | 0.0 | 421490.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 11426 | 2424 | 0.0 | 2097152.0 | 2690 | 361689 | 2097152.0 | 0.0 | 0 | 26382 | 167.0 | 0.0 | 334606 | 0.0 | 1754836.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597391651193 | 16609187165766 | 16609187391364 | 16597392163335 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 428 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 265664 | 0x7fc65ed2e000 | 0x7fc640626300 | 3744512 | 3640562 | 65536 | 47114397 | 352053141 | 468063 | 468063 | 14394453.0 | 11686114.0 | 468063 | 0 | 9344760.0 | 7718075.0 | 302 | 467743 | 0.0 | 0.0 | 1821956.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 12096 | 2430 | 0.0 | 2097152.0 | 3011 | 467038 | 2097152.0 | 0.0 | 0 | 31276 | 386.0 | 0.0 | 435684 | 0.0 | 1660881.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597392431786 | 16609187451684 | 16609187749442 | 16597393030735 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 431 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 271232 | 0x7fc65c5cde80 | 0x7fc640626340 | 3778728 | 3669109 | 65536 | 47431526 | 345583579 | 472340 | 472340 | 14141849.0 | 12200442.0 | 472340 | 0 | 10008329.0 | 8778738.0 | 302 | 469699 | 0.0 | 0.0 | 2676129.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 12153 | 2913 | 0.0 | 2097152.0 | 2393 | 468909 | 2097152.0 | 0.0 | 0 | 31555 | 236.0 | 0.0 | 438128 | 0.0 | 1244998.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597393299496 | 16609187810241 | 16609188108479 | 16597393896205 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 434 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 273216 | 0x7fc65c5cdd00 | 0x7fc640626380 | 3869592 | 3752062 | 65536 | 48536423 | 350183206 | 483698 | 483698 | 14718182.0 | 6818878.0 | 483698 | 0 | 5934273.0 | 2145188.0 | 302 | 482520 | 0.0 | 0.0 | 78200.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 12399 | 2588 | 0.0 | 2097152.0 | 2396 | 481168 | 2097152.0 | 0.0 | 0 | 32999 | 4.0 | 0.0 | 448273 | 0.0 | 3793849.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597394168835 | 16609188173119 | 16609188470077 | 16597394770324 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 437 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 275712 | 0x7fc65c5cdb80 | 0x7fc6406263c0 | 7040584 | 6930382 | 65536 | 89719549 | 674357811 | 880072 | 880072 | 14865675.0 | 6721531.0 | 880072 | 0 | 5823404.0 | 2140962.0 | 302 | 878390 | 0.0 | 0.0 | 75978.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 14963 | 3024 | 0.0 | 2097152.0 | 2387 | 878390 | 2097152.0 | 0.0 | 0 | 52182 | 2.0 | 0.0 | 825056 | 0.0 | 3696484.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597395038025 | 16609188536156 | 16609189093272 | 16597395892535 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 440 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 280768 | 0x7fc65c5cda00 | 0x7fc640626400 | 3418096 | 3304677 | 65536 | 42738010 | 306791583 | 427261 | 427261 | 6094064.0 | 3805503.0 | 427261 | 0 | 3523570.0 | 2308599.0 | 302 | 426091 | 0.0 | 0.0 | 321527.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 12066 | 2412 | 0.0 | 2097152.0 | 2406 | 424615 | 2097152.0 | 0.0 | 0 | 29326 | 93.0 | 0.0 | 395578 | 0.0 | 1259763.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597396165826 | 16609189164152 | 16609189428310 | 16597396731016 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 443 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 287616 | 0x7fc65c5cd880 | 0x7fc640626440 | 4394264 | 4291854 | 65536 | 55502628 | 412214760 | 549282 | 549282 | 14159303.0 | 11756629.0 | 549282 | 0 | 10790398.0 | 9299707.0 | 302 | 553183 | 0.0 | 0.0 | 2367939.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 12765 | 2406 | 0.0 | 2097152.0 | 2400 | 545839 | 2097152.0 | 0.0 | 0 | 34991 | 116.0 | 0.0 | 511831 | 0.0 | 1336475.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597396998046 | 16609189467190 | 16609189817907 | 16597397638304 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 446 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 294208 | 0x7fc65c5cd700 | 0x7fc640626480 | 4406928 | 4295671 | 65536 | 55590774 | 407561102 | 550865 | 550865 | 13891242.0 | 11483767.0 | 550865 | 0 | 10069818.0 | 8793443.0 | 302 | 550763 | 0.0 | 0.0 | 2293229.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 12635 | 2414 | 0.0 | 2097152.0 | 2391 | 549930 | 2097152.0 | 0.0 | 0 | 35375 | 166.0 | 0.0 | 512873 | 0.0 | 1200868.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597397908105 | 16609189855987 | 16609190206865 | 16597398558752 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 449 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 296960 | 0x7fc65ed2e580 | 0x7fc6406264c0 | 4507312 | 4390819 | 65536 | 56827727 | 420102325 | 563413 | 563413 | 15053153.0 | 6860829.0 | 563413 | 0 | 5964710.0 | 2144413.0 | 302 | 563136 | 0.0 | 0.0 | 72697.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 12959 | 2916 | 0.0 | 2097152.0 | 2394 | 560687 | 2097152.0 | 0.0 | 0 | 36838 | 4.0 | 0.0 | 524402 | 0.0 | 3821889.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597398827002 | 16609190276784 | 16609190624462 | 16597399471590 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 452 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 299968 | 0x7fc65ed2e400 | 0x7fc640626500 | 8348656 | 8229718 | 65536 | 106519343 | 802471459 | 1043581 | 1043581 | 14844439.0 | 6691484.0 | 1043581 | 0 | 5794075.0 | 2144842.0 | 302 | 1040681 | 0.0 | 0.0 | 66788.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 16146 | 2426 | 0.0 | 2097152.0 | 2402 | 1039414 | 2097152.0 | 0.0 | 0 | 60240 | 0.0 | 0.0 | 978630 | 0.0 | 3639293.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597399738201 | 16609190687181 | 16609191349417 | 16597400687558 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 455 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 302720 | 0x7fc65ed2e280 | 0x7fc640626540 | 5778616 | 5654667 | 65536 | 73243761 | 540219317 | 722326 | 722326 | 15134161.0 | 6849285.0 | 722326 | 0 | 5950390.0 | 2151112.0 | 302 | 720449 | 0.0 | 0.0 | 60023.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 13847 | 2776 | 0.0 | 2097152.0 | 3020 | 719090 | 2097152.0 | 0.0 | 0 | 43904 | 0.0 | 0.0 | 675299 | 0.0 | 3803605.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597400962928 | 16609191426216 | 16609191873253 | 16597401700142 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 458 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 305472 | 0x7fc65ed2e100 | 0x7fc640626580 | 5783560 | 5671091 | 65536 | 73432437 | 550434158 | 722944 | 722944 | 20889185.0 | 13078373.0 | 722944 | 0 | 6859586.0 | 2989569.0 | 302 | 721920 | 0.0 | 0.0 | 833200.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 14176 | 2427 | 0.0 | 2097152.0 | 2405 | 721070 | 2097152.0 | 0.0 | 0 | 44420 | 105.0 | 0.0 | 675716 | 0.0 | 3863921.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597401967893 | 16609191938533 | 16609192403170 | 16597402732156 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 461 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 308224 | 0x7fc65c5cdf80 | 0x7fc6406265c0 | 5771560 | 5661761 | 65536 | 73264703 | 556538550 | 721444 | 721444 | 20784765.0 | 13006801.0 | 721444 | 0 | 6827503.0 | 2960202.0 | 302 | 721253 | 0.0 | 0.0 | 744215.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 13729 | 2408 | 0.0 | 2097152.0 | 2398 | 717887 | 2097152.0 | 0.0 | 0 | 44354 | 122.0 | 0.0 | 674593 | 0.0 | 3855978.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597403000417 | 16609192467649 | 16609192930206 | 16597403722742 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 464 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 310976 | 0x7fc65c5cde00 | 0x7fc640626600 | 5802232 | 5691169 | 65536 | 73703913 | 560930660 | 725278 | 725278 | 15161934.0 | 6860554.0 | 725278 | 0 | 5946380.0 | 2143394.0 | 302 | 726922 | 0.0 | 0.0 | 65862.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 14167 | 2421 | 0.0 | 2097152.0 | 3148 | 725147 | 2097152.0 | 0.0 | 0 | 44396 | 0.0 | 0.0 | 678932 | 0.0 | 3803990.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597403988272 | 16609192995485 | 16609193447962 | 16597404757815 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 467 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 313216 | 0x7fc65c5cdc80 | 0x7fc640626640 | 10943784 | 10818120 | 65536 | 140081201 | 1067012494 | 1367972 | 1367972 | 15023844.0 | 6738366.0 | 1367972 | 0 | 5825806.0 | 2135280.0 | 302 | 1367298 | 0.0 | 0.0 | 58348.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 18256 | 2408 | 0.0 | 2097152.0 | 2999 | 1364037 | 2097152.0 | 0.0 | 0 | 76475 | 0.0 | 0.0 | 1288678 | 0.0 | 3708229.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597405024356 | 16609193513082 | 16609194383636 | 16597406162296 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 470 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 315968 | 0x7fc65c5cdb00 | 0x7fc640626680 | 10975368 | 10848062 | 65536 | 140589606 | 1080408297 | 1371920 | 1371920 | 15290457.0 | 6856411.0 | 1371920 | 0 | 5882552.0 | 2143234.0 | 302 | 1370138 | 0.0 | 0.0 | 61122.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 18322 | 2413 | 0.0 | 2097152.0 | 2387 | 1367943 | 2097152.0 | 0.0 | 0 | 77414 | 0.0 | 0.0 | 1291986 | 0.0 | 3769714.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597406435457 | 16609194460435 | 16609195313549 | 16597407600476 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 473 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 318720 | 0x7fc65c5cd980 | 0x7fc6406266c0 | 10984952 | 10862656 | 65536 | 140715844 | 1072456319 | 1373118 | 1373118 | 21362976.0 | 13161663.0 | 1373118 | 0 | 6763053.0 | 2853254.0 | 302 | 1374184 | 0.0 | 0.0 | 725560.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 18301 | 2983 | 0.0 | 2097152.0 | 3182 | 1370231 | 2097152.0 | 0.0 | 0 | 77632 | 110.0 | 0.0 | 1293681 | 0.0 | 3900563.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597407866947 | 16609195377869 | 16609196264423 | 16597409075205 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 476 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 321472 | 0x7fc65c5cd800 | 0x7fc640626700 | 10979608 | 10855215 | 65536 | 140570835 | 1071180251 | 1372450 | 1372450 | 21278655.0 | 13115963.0 | 1372450 | 0 | 6864167.0 | 2976929.0 | 302 | 1371090 | 0.0 | 0.0 | 656017.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 18310 | 2414 | 0.0 | 2097152.0 | 2343 | 1370756 | 2097152.0 | 0.0 | 0 | 76719 | 86.0 | 0.0 | 1292235 | 0.0 | 3878315.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597409341035 | 16609196340262 | 16609197219296 | 16597410492535 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 479 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 324224 | 0x7fc65c5cd680 | 0x7fc640626740 | 11016520 | 10879874 | 65536 | 140996548 | 1080484982 | 1377064 | 1377064 | 15189958.0 | 6813274.0 | 1377064 | 0 | 5937211.0 | 2152262.0 | 302 | 1375989 | 0.0 | 0.0 | 65995.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 18320 | 2704 | 0.0 | 2097152.0 | 2372 | 1373411 | 2097152.0 | 0.0 | 0 | 77476 | 0.0 | 0.0 | 1296963 | 0.0 | 3762700.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597410759616 | 16609197292416 | 16609198152730 | 16597411940934 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 482 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 326464 | 0x7fc65ed2e500 | 0x7fc640626780 | 21298520 | 21181618 | 65536 | 274521388 | 2123993658 | 2662314 | 2662314 | 14791691.0 | 6626793.0 | 2662314 | 0 | 5724376.0 | 2149224.0 | 302 | 2661669 | 0.0 | 0.0 | 89745.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 27069 | 2420 | 0.0 | 2097152.0 | 2393 | 2659846 | 2097152.0 | 0.0 | 0 | 143249 | 2.0 | 0.0 | 2518518 | 0.0 | 3593390.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597412202475 | 16609198221689 | 16609199920718 | 16597414280062 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 485 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 329216 | 0x7fc65ed2e380 | 0x7fc6406267c0 | 21359112 | 21225032 | 65536 | 275066135 | 2128871773 | 2669888 | 2669888 | 15873503.0 | 6940638.0 | 2669888 | 0 | 5966180.0 | 2131777.0 | 302 | 2669197 | 0.0 | 0.0 | 79938.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 27026 | 2428 | 0.0 | 2097152.0 | 2393 | 2665128 | 2097152.0 | 0.0 | 0 | 141990 | 2.0 | 0.0 | 2525479 | 0.0 | 3847781.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597414500005 | 16609200026157 | 16609201692225 | 16597416516714 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 488 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 331968 | 0x7fc65ed2e200 | 0x7fc640626800 | 21370520 | 21239103 | 65536 | 275256501 | 2130625114 | 2671314 | 2671314 | 22105579.0 | 13340157.0 | 2671314 | 0 | 6733858.0 | 2788340.0 | 302 | 2670619 | 0.0 | 0.0 | 716112.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 27275 | 2944 | 0.0 | 2097152.0 | 2398 | 2667376 | 2097152.0 | 0.0 | 0 | 142814 | 64.0 | 0.0 | 2524635 | 0.0 | 3946708.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597416741686 | 16609201786945 | 16609203511573 | 16597418787145 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 491 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 12 | 24 | 334720 | 0x7fc65ed2e080 | 0x7fc640626840 | 21354968 | 21228405 | 65536 | 275106366 | 2126826983 | 2669370 | 2669370 | 22115922.0 | 13314581.0 | 2669370 | 0 | 6740696.0 | 2794573.0 | 302 | 2669421 | 0.0 | 0.0 | 766826.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 27199 | 2992 | 0.0 | 2097152.0 | 2403 | 2664376 | 2097152.0 | 0.0 | 0 | 142433 | 86.0 | 0.0 | 2525526 | 0.0 | 3937367.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16597419007547 | 16609203600372 | 16609205317160 | 16597421053416 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 494 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 337472 | 0x7fc65c5cdf00 | 0x7fc640626880 | 21392536 | 21269246 | 65536 | 275593501 | 2127373053 | 2674066 | 2674066 | 15801747.0 | 6923684.0 | 2674066 | 0 | 6013051.0 | 2158959.0 | 302 | 2673807 | 0.0 | 0.0 | 57773.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 27116 | 2419 | 0.0 | 2097152.0 | 2386 | 2671749 | 2097152.0 | 0.0 | 0 | 142210 | 0.0 | 0.0 | 2526867 | 0.0 | 3841101.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597421297797 | 16609205406600 | 16609207080188 | 16597423295327 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 497 | 165605 | 165605 | 4194304 | 256 | 0 | 0 | 8 | 24 | 0 | 0x7fc65c5cdd80 | 0x7fc6406268c0 | 42065176 | 41935914 | 65536 | 543485013 | 4224091095 | 5258146 | 5258146 | 14811260.0 | 6654349.0 | 5258146 | 0 | 5743467.0 | 2155328.0 | 302 | 5255477 | 0.0 | 0.0 | 62065.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 524288.0 | 0.0 | 0.0 | 524288.0 | 0.0 | 0.0 | 44880 | 2882 | 0.0 | 2097152.0 | 2397 | 5253187 | 2097152.0 | 0.0 | 0 | 273132 | 2.0 | 0.0 | 4981010 | 0.0 | 3580778.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 16597423514630 | 16609207171068 | 16609210528804 | 16597427204631 |