90 KiB
90 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | SQ_CYCLES | SQ_BUSY_CYCLES | SQ_BUSY_CU_CYCLES | SQ_WAVES | SQ_WAVE_CYCLES | GRBM_COUNT | GRBM_GUI_ACTIVE | TCP_GATE_EN1_sum | TCP_GATE_EN2_sum | TCP_TD_TCP_STALL_CYCLES_sum | TCP_TCR_TCP_STALL_CYCLES_sum | TCP_READ_TAGCONFLICT_STALL_CYCLES_sum | TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum | TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum | TCP_TA_TCP_STATE_READ_sum | TCP_VOLATILE_sum | TCP_TOTAL_ACCESSES_sum | TCP_TOTAL_READ_sum | TCP_TOTAL_WRITE_sum | TCP_TOTAL_ATOMIC_WITH_RET_sum | TCP_TOTAL_ATOMIC_WITHOUT_RET_sum | TCP_TOTAL_WRITEBACK_INVALIDATES_sum | TCP_TOTAL_CACHE_ACCESSES_sum | TCP_UTCL1_TRANSLATION_MISS_sum | TCP_UTCL1_TRANSLATION_HIT_sum | TCP_UTCL1_PERMISSION_MISS_sum | TCP_UTCL1_REQUEST_sum | TCP_TCP_LATENCY_sum | TCP_TCC_READ_REQ_LATENCY_sum | TCP_TCC_WRITE_REQ_LATENCY_sum | TCP_TCC_READ_REQ_sum | TCP_TCC_WRITE_REQ_sum | TCP_TCC_ATOMIC_WITH_RET_REQ_sum | TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum | TCP_TCC_NC_READ_REQ_sum | TCP_TCC_NC_WRITE_REQ_sum | TCP_TCC_NC_ATOMIC_REQ_sum | TCP_TCC_UC_READ_REQ_sum | TCP_TCC_UC_WRITE_REQ_sum | TCP_TCC_UC_ATOMIC_REQ_sum | TCP_TCC_CC_READ_REQ_sum | TCP_TCC_CC_WRITE_REQ_sum | TCP_TCC_CC_ATOMIC_REQ_sum | TCP_TCC_RW_READ_REQ_sum | TCP_TCC_RW_WRITE_REQ_sum | TCP_TCC_RW_ATOMIC_REQ_sum | TCP_PENDING_STALL_CYCLES_sum | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBufferAligned.kd | 0 | 0 | 0 | 151449 | 151449 | 33554432 | 256 | 0 | 0 | 4 | 32 | 4160 | 0x0 | 0x7f022a604280 | 3076512 | 2989574 | 38842013 | 524288 | 242121889 | 384563 | 384563 | 39106410.0 | 38083768.0 | 3.0 | 4067115.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 0.0 | 33554432.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 13284.0 | 8242484.0 | 0.0 | 8388608.0 | 313264439.0 | 0.0 | 1463068751.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 19128712.0 | 16219564975519 | 16227562900659 | 16227563139060 | 16219710251567 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 151449 | 151449 | 32768 | 256 | 0 | 0 | 12 | 24 | 13888 | 0x0 | 0x7f022a623f80 | 265008 | 169056 | 1410267 | 512 | 1756157 | 33125 | 33125 | 2433904.0 | 174869.0 | 208.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4096.0 | 65536.0 | 65536.0 | 65536.0 | 0.0 | 0.0 | 0.0 | 104.0 | 16384.0 | 104.0 | 15240.0 | 0.0 | 16384.0 | 3868580.0 | 5430964.0 | 0.0 | 8192.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 8192.0 | 0.0 | 0.0 | 124860.0 | 16219715974101 | 16227567936047 | 16227567949487 | 16219716213625 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 5 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 14336 | 0x7f022d5aa380 | 0x7f022a623fc0 | 1390672 | 1257890 | 16244343 | 65536 | 95138833 | 173833 | 173833 | 16588733.0 | 13862093.0 | 23826.0 | 680816.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 6560.0 | 2028965.0 | 0.0 | 2097152.0 | 894852738.0 | 2174316602.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11602704.0 | 16219716751753 | 16227568019567 | 16227568112208 | 16219717067306 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 8 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15040 | 0x7f022d5aa200 | 0x7f022a624000 | 2499432 | 2374679 | 30755675 | 65536 | 212914879 | 312428 | 312428 | 31107159.0 | 28741345.0 | 0.0 | 572006.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 208.0 | 8388608.0 | 12853.0 | 8248294.0 | 0.0 | 8388608.0 | 854492054.0 | 3923958668.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20553314.0 | 16219717610354 | 16227568146928 | 16227568325489 | 16219718027305 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 11 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15488 | 0x7f022d5aa080 | 0x7f022a624040 | 2497328 | 2392794 | 30982219 | 65536 | 226784148 | 312165 | 312165 | 31342537.0 | 29793298.0 | 0.0 | 641236.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12902.0 | 8247484.0 | 0.0 | 8388608.0 | 850784226.0 | 3853370502.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20646198.0 | 16219718564363 | 16227568350929 | 16227568530610 | 16219719066582 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 14 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 15936 | 0x7f022af09f00 | 0x7f022a624080 | 1395672 | 1274405 | 16452085 | 65536 | 79934736 | 174458 | 174458 | 16804975.0 | 13765491.0 | 24080.0 | 422181.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 6612.0 | 2027824.0 | 0.0 | 2097152.0 | 994245048.0 | 2214385136.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11607319.0 | 16219719601950 | 16227568555250 | 16227568649011 | 16219719931123 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 17 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 16384 | 0x7f022af09d80 | 0x7f022a6240c0 | 1333088 | 1226747 | 15857629 | 65536 | 82083564 | 166635 | 166635 | 16183757.0 | 14566834.0 | 23494.0 | 973484.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6593.0 | 2028813.0 | 0.0 | 2097152.0 | 983783794.0 | 2310020724.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12337788.0 | 16219720467601 | 16227568674291 | 16227568766291 | 16219720909051 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 20 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 17088 | 0x7f022af09c00 | 0x7f022a624100 | 1431048 | 1328751 | 17217788 | 65536 | 115782054 | 178880 | 178880 | 17509757.0 | 14249171.0 | 25266.0 | 327616.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6606.0 | 2031204.0 | 0.0 | 2097152.0 | 1004688991.0 | 2215423101.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11846944.0 | 16219721455469 | 16227568806292 | 16227568898772 | 16219721795641 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 23 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 16 | 24 | 17792 | 0x7f022af09a80 | 0x7f022a624140 | 2477096 | 2371831 | 30735250 | 65536 | 227866406 | 309636 | 309636 | 31070005.0 | 29507529.0 | 0.0 | 910440.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 208.0 | 8388608.0 | 12809.0 | 8248606.0 | 0.0 | 8388608.0 | 825473681.0 | 3946828205.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20650851.0 | 16219722331500 | 16227568930772 | 16227569110773 | 16219722860388 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 26 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 16 | 24 | 18496 | 0x7f022af09900 | 0x7f022a624180 | 2468032 | 2345045 | 30388864 | 65536 | 213029389 | 308503 | 308503 | 30721839.0 | 28564368.0 | 0.0 | 404286.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12618.0 | 8250932.0 | 0.0 | 8388608.0 | 847029950.0 | 3986558870.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20983551.0 | 16219723394486 | 16227569139573 | 16227569321014 | 16219723791618 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 29 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19200 | 0x7f022af09780 | 0x7f022a6241c0 | 1318560 | 1215826 | 15738282 | 65536 | 90484316 | 164819 | 164819 | 16041953.0 | 14400932.0 | 24885.0 | 561732.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 6628.0 | 2049765.0 | 0.0 | 2097152.0 | 1011554299.0 | 2365821621.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11392083.0 | 16219724327956 | 16227569349655 | 16227569443095 | 16219724780966 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 32 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19904 | 0x7f022af09600 | 0x7f022a624200 | 1469688 | 1368233 | 17708983 | 65536 | 114319146 | 183710 | 183710 | 18023205.0 | 14525386.0 | 24026.0 | 816644.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 6615.0 | 2051730.0 | 0.0 | 2097152.0 | 978769267.0 | 2303738539.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12258186.0 | 16219725315504 | 16227569483255 | 16227569576376 | 16219725643457 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 35 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 20608 | 0x7f022d5aa480 | 0x7f022a624240 | 1309624 | 1207786 | 15637183 | 65536 | 106067050 | 163702 | 163702 | 15937303.0 | 14409292.0 | 24436.0 | 551561.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6648.0 | 2029425.0 | 0.0 | 2097152.0 | 999115972.0 | 2280322338.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11479658.0 | 16219726189695 | 16227569620856 | 16227569714457 | 16219726639415 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 38 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 16 | 24 | 21312 | 0x7f022d5aa300 | 0x7f022a624280 | 2482528 | 2375471 | 30798546 | 65536 | 225244456 | 310315 | 310315 | 31117195.0 | 29558235.0 | 0.0 | 998336.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12870.0 | 8247855.0 | 0.0 | 8388608.0 | 875980685.0 | 3813849219.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 21148176.0 | 16219727175193 | 16227569754297 | 16227569933178 | 16219727682662 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 41 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22016 | 0x7f022d5aa180 | 0x7f022a6242c0 | 2476376 | 2370338 | 30705715 | 65536 | 220541857 | 309546 | 309546 | 31050479.0 | 29482297.0 | 0.0 | 1002719.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12562.0 | 8251222.0 | 0.0 | 8388608.0 | 855156177.0 | 3959307381.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20077740.0 | 16219728218010 | 16227569971578 | 16227570151099 | 16219728697009 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 44 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22720 | 0x7f022d5aa000 | 0x7f022a624300 | 1322456 | 1215137 | 15723453 | 65536 | 84578733 | 165306 | 165306 | 16033035.0 | 14461100.0 | 24317.0 | 578277.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6615.0 | 2054320.0 | 0.0 | 2097152.0 | 987775405.0 | 2355093222.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 13127425.0 | 16219729233027 | 16227570179259 | 16227570273660 | 16219729617169 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 47 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 23424 | 0x7f022af09e80 | 0x7f022a624340 | 1309352 | 1207130 | 15633719 | 65536 | 93133777 | 163668 | 163668 | 15928827.0 | 14392001.0 | 24210.0 | 948052.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6574.0 | 2031545.0 | 0.0 | 2097152.0 | 1002515839.0 | 2322675189.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12229625.0 | 16219730152807 | 16227570311580 | 16227570405021 | 16219730543418 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 50 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 24128 | 0x7f022af09d00 | 0x7f022a624380 | 1309840 | 1204612 | 15577651 | 65536 | 93522828 | 163729 | 163729 | 15896145.0 | 14285915.0 | 24405.0 | 767208.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6564.0 | 2029713.0 | 0.0 | 2097152.0 | 982095459.0 | 2316806402.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12178509.0 | 16219731086046 | 16227570438141 | 16227570531421 | 16219731489457 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 53 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 16 | 24 | 24832 | 0x7f022af09b80 | 0x7f022a6243c0 | 2475776 | 2374375 | 30787141 | 65536 | 228786661 | 309471 | 309471 | 31103181.0 | 29539627.0 | 0.0 | 1002057.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 13000.0 | 8246973.0 | 0.0 | 8388608.0 | 868946855.0 | 3811280101.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 21025000.0 | 16219732025446 | 16227570563262 | 16227570741823 | 16219732495585 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 56 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 16 | 24 | 25536 | 0x7f022af09a00 | 0x7f022a624400 | 2459368 | 2354063 | 30480708 | 65536 | 219050734 | 307420 | 307420 | 30839047.0 | 29328167.0 | 0.0 | 956809.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12605.0 | 8251073.0 | 0.0 | 8388608.0 | 879923556.0 | 3768310018.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20839794.0 | 16219733030434 | 16227570773983 | 16227570954624 | 16219733501953 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 59 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26240 | 0x7f022af09880 | 0x7f022a624440 | 1306200 | 1205334 | 15598656 | 65536 | 104508244 | 163274 | 163274 | 15905453.0 | 14356968.0 | 24565.0 | 895349.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6576.0 | 2030919.0 | 0.0 | 2097152.0 | 984336727.0 | 2238119997.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12299914.0 | 16219734037241 | 16227570983744 | 16227571077345 | 16219734418793 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 62 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26944 | 0x7f022af09700 | 0x7f022a624480 | 1312176 | 1208238 | 15626051 | 65536 | 95374143 | 164021 | 164021 | 15943257.0 | 14311337.0 | 24289.0 | 843243.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6595.0 | 2029585.0 | 0.0 | 2097152.0 | 967331056.0 | 2325847979.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12203795.0 | 16219734954771 | 16227571107425 | 16227571200865 | 16219735324493 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 65 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 27648 | 0x7f022d5aa580 | 0x7f022a6244c0 | 1309320 | 1204304 | 15596572 | 65536 | 102105141 | 163664 | 163664 | 15892050.0 | 14418891.0 | 23608.0 | 822941.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6588.0 | 2031537.0 | 0.0 | 2097152.0 | 973140143.0 | 2212813454.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12256527.0 | 16219735867201 | 16227571235585 | 16227571327746 | 16219736255022 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 68 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 16 | 24 | 28608 | 0x7f022d5aa400 | 0x7f022a624500 | 2464296 | 2356943 | 30538297 | 65536 | 223840483 | 308036 | 308036 | 30876409.0 | 29309993.0 | 0.0 | 960404.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12964.0 | 8247316.0 | 0.0 | 8388608.0 | 859123508.0 | 3759240854.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20612663.0 | 16219736791410 | 16227571374466 | 16227571553987 | 16219737258190 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 71 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 16 | 24 | 29312 | 0x7f022d5aa280 | 0x7f022a624540 | 2445960 | 2340092 | 30302085 | 65536 | 218211604 | 305744 | 305744 | 30657541.0 | 29018539.0 | 0.0 | 871605.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12500.0 | 8252156.0 | 0.0 | 8388608.0 | 858131332.0 | 3858441631.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20893663.0 | 16219737793388 | 16227571580707 | 16227571762308 | 16219738290517 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 74 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 30016 | 0x7f022d5aa100 | 0x7f022a624580 | 1299736 | 1195886 | 15470360 | 65536 | 95998542 | 162466 | 162466 | 15782707.0 | 14328440.0 | 24867.0 | 750403.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6579.0 | 2031211.0 | 0.0 | 2097152.0 | 981897361.0 | 2361801752.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12215142.0 | 16219738825885 | 16227571788229 | 16227571879909 | 16219739203177 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 77 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 30976 | 0x7f022af09f80 | 0x7f022a6245c0 | 1295848 | 1190256 | 15387236 | 65536 | 88887399 | 161980 | 161980 | 15709517.0 | 14233296.0 | 24395.0 | 737599.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6577.0 | 2030299.0 | 0.0 | 2097152.0 | 967374776.0 | 2200958427.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12236236.0 | 16219739737975 | 16227571906469 | 16227571998470 | 16219740118977 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 80 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 31680 | 0x7f022af09e00 | 0x7f022a624600 | 1309984 | 1194981 | 15451431 | 65536 | 88461159 | 163747 | 163747 | 15771059.0 | 14239389.0 | 24642.0 | 619284.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6549.0 | 2031181.0 | 0.0 | 2097152.0 | 987993748.0 | 2190604012.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12172597.0 | 16219740661415 | 16227572032870 | 16227572125831 | 16219741037266 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 83 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 16 | 24 | 32640 | 0x7f022af09c80 | 0x7f022a624640 | 2464816 | 2352130 | 30451732 | 65536 | 224128032 | 308101 | 308101 | 30813853.0 | 29313398.0 | 0.0 | 950130.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12954.0 | 8247532.0 | 0.0 | 8388608.0 | 861565952.0 | 3883785747.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20801209.0 | 16219741571864 | 16227572152071 | 16227572332072 | 16219742030714 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 86 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 33600 | 0x7f022af09b00 | 0x7f022a624680 | 2461040 | 2353515 | 30468770 | 65536 | 222810286 | 307629 | 307629 | 30831793.0 | 29281718.0 | 0.0 | 859965.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12740.0 | 8249616.0 | 0.0 | 8388608.0 | 864131064.0 | 3929041828.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20802873.0 | 16219742567252 | 16227572357352 | 16227572536393 | 16219743037772 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 89 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 34560 | 0x7f022af09980 | 0x7f022a6246c0 | 1311424 | 1203299 | 15571527 | 65536 | 94415307 | 163927 | 163927 | 15878933.0 | 14331553.0 | 25015.0 | 633741.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6532.0 | 2031390.0 | 0.0 | 2097152.0 | 993698799.0 | 2371012542.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12123084.0 | 16219743572970 | 16227572563273 | 16227572656874 | 16219743959442 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 92 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 35520 | 0x7f022af09800 | 0x7f022a624700 | 1319528 | 1209920 | 15605494 | 65536 | 88142706 | 164940 | 164940 | 15964967.0 | 14370138.0 | 23510.0 | 845440.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6527.0 | 2029706.0 | 0.0 | 2097152.0 | 939606021.0 | 2214493828.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12171489.0 | 16219744494780 | 16227572681834 | 16227572775434 | 16219744883721 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 95 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 36224 | 0x7f022af09680 | 0x7f022a624740 | 1307264 | 1202702 | 15563142 | 65536 | 99299879 | 163407 | 163407 | 15871367.0 | 14256907.0 | 24128.0 | 950974.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6541.0 | 2031476.0 | 0.0 | 2097152.0 | 975310312.0 | 2274414300.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12176774.0 | 16219745425769 | 16227572809354 | 16227572902315 | 16219745810661 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 98 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 37184 | 0x7f022d5aa500 | 0x7f022a624780 | 2439240 | 2337398 | 30273421 | 65536 | 226859499 | 304904 | 304904 | 30622207.0 | 29126087.0 | 0.0 | 456659.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12852.0 | 8248431.0 | 0.0 | 8388608.0 | 858822988.0 | 3816408485.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20616744.0 | 16219746347219 | 16227572926475 | 16227573105676 | 16219746818369 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 101 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 38144 | 0x7f022d5aa380 | 0x7f022a6247c0 | 2470352 | 2362938 | 30621066 | 65536 | 223882632 | 308793 | 308793 | 30954331.0 | 29384532.0 | 0.0 | 903614.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12887.0 | 8248325.0 | 0.0 | 8388608.0 | 861091734.0 | 3887824660.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20643359.0 | 16219747354267 | 16227573132556 | 16227573313197 | 16219747813877 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 104 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 39104 | 0x7f022d5aa200 | 0x7f022a624800 | 1311088 | 1209110 | 15638101 | 65536 | 95268253 | 163885 | 163885 | 15954619.0 | 14327993.0 | 24847.0 | 671677.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6593.0 | 2029227.0 | 0.0 | 2097152.0 | 958652993.0 | 2318105086.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12036825.0 | 16219748348935 | 16227573338958 | 16227573432398 | 16219748747716 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 107 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 40320 | 0x7f022d5aa080 | 0x7f022a624840 | 1298192 | 1195841 | 15434838 | 65536 | 89795885 | 162273 | 162273 | 15782161.0 | 14199814.0 | 24947.0 | 529713.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6577.0 | 2055912.0 | 0.0 | 2097152.0 | 906927897.0 | 2158742201.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12181887.0 | 16219749282544 | 16227573457678 | 16227573551919 | 16219749654726 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 110 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 41280 | 0x7f022af09f00 | 0x7f022a624880 | 1311592 | 1207133 | 15616629 | 65536 | 106014949 | 163948 | 163948 | 15928931.0 | 14311051.0 | 22655.0 | 1085245.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6585.0 | 2030156.0 | 0.0 | 2097152.0 | 988291070.0 | 2236204364.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12049512.0 | 16219750198574 | 16227573585039 | 16227573677839 | 16219750587955 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 113 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 42240 | 0x7f022af09d80 | 0x7f022a6248c0 | 2453664 | 2348433 | 30433799 | 65536 | 223561488 | 306707 | 306707 | 30765779.0 | 29221696.0 | 0.0 | 955032.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12835.0 | 8248492.0 | 0.0 | 8388608.0 | 866108711.0 | 3766349436.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20775908.0 | 16219751122583 | 16227573702800 | 16227573882001 | 16219751596203 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 116 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 43200 | 0x7f022af09c00 | 0x7f022a624900 | 2445544 | 2334470 | 30227063 | 65536 | 221383257 | 305692 | 305692 | 30584247.0 | 29112372.0 | 0.0 | 841994.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12868.0 | 8248559.0 | 0.0 | 8388608.0 | 866721897.0 | 3815007948.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20688479.0 | 16219752132251 | 16227573908881 | 16227574088722 | 16219752601090 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 119 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 44160 | 0x7f022af09a80 | 0x7f022a624940 | 1304096 | 1200713 | 15527437 | 65536 | 95969857 | 163011 | 163011 | 15845393.0 | 14236919.0 | 25197.0 | 536579.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6540.0 | 2031346.0 | 0.0 | 2097152.0 | 975175277.0 | 2355118236.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12124156.0 | 16219753136159 | 16227574116562 | 16227574209683 | 16219753509510 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 122 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 45376 | 0x7f022af09900 | 0x7f022a624980 | 1311072 | 1205748 | 15553796 | 65536 | 94857841 | 163883 | 163883 | 15911043.0 | 14180696.0 | 24656.0 | 647722.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6528.0 | 2039994.0 | 0.0 | 2097152.0 | 913820381.0 | 2186871184.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12135155.0 | 16219754044578 | 16227574234323 | 16227574328563 | 16219754438870 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 125 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 46336 | 0x7f022af09780 | 0x7f022a6249c0 | 1298696 | 1197431 | 15492579 | 65536 | 94740404 | 162336 | 162336 | 15802701.0 | 14283324.0 | 24505.0 | 696834.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6577.0 | 2039878.0 | 0.0 | 2097152.0 | 949445577.0 | 2270478807.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12129176.0 | 16219754982268 | 16227574361203 | 16227574454964 | 16219755357780 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 128 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 47552 | 0x7f022af09600 | 0x7f022a624a00 | 2448048 | 2334370 | 30228757 | 65536 | 224485117 | 306005 | 306005 | 30582999.0 | 28980189.0 | 0.0 | 909024.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12848.0 | 8248474.0 | 0.0 | 8388608.0 | 860180550.0 | 3834278799.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20652987.0 | 16219755893918 | 16227574481524 | 16227574660245 | 16219756350208 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 131 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 48512 | 0x7f022d5aa480 | 0x7f022a624a40 | 2432960 | 2326981 | 30131928 | 65536 | 215469089 | 304119 | 304119 | 30486825.0 | 28939745.0 | 0.0 | 872080.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12645.0 | 8250828.0 | 0.0 | 8388608.0 | 838707203.0 | 3955193369.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20312206.0 | 16219756885186 | 16227574686165 | 16227574865206 | 16219757342356 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 134 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 49472 | 0x7f022d5aa300 | 0x7f022a624a80 | 1286848 | 1183577 | 15300193 | 65536 | 93861420 | 160855 | 160855 | 15622755.0 | 14149041.0 | 24779.0 | 622851.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6553.0 | 2029900.0 | 0.0 | 2097152.0 | 958692124.0 | 2149141255.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12162566.0 | 16219757878014 | 16227574890006 | 16227574982167 | 16219758256776 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 137 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 50688 | 0x7f022d5aa180 | 0x7f022a624ac0 | 1295024 | 1189849 | 15355502 | 65536 | 98253597 | 161877 | 161877 | 15704369.0 | 14126873.0 | 24557.0 | 310527.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6558.0 | 2038901.0 | 0.0 | 2097152.0 | 880149001.0 | 2076149406.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11947699.0 | 16219758792574 | 16227575009687 | 16227575102968 | 16219759162436 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 140 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 51648 | 0x7f022d5aa000 | 0x7f022a624b00 | 1286720 | 1182738 | 15290160 | 65536 | 90082204 | 160839 | 160839 | 15611835.0 | 14142719.0 | 23678.0 | 833033.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6573.0 | 2040365.0 | 0.0 | 2097152.0 | 965794496.0 | 2324855464.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12057246.0 | 16219759705544 | 16227575137528 | 16227575229528 | 16219760111875 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 143 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 52864 | 0x7f022af09e80 | 0x7f022a624b40 | 2424440 | 2320895 | 30075416 | 65536 | 223337535 | 303054 | 303054 | 30407915.0 | 28851208.0 | 0.0 | 799050.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12826.0 | 8248641.0 | 0.0 | 8388608.0 | 862331490.0 | 3789100824.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20231972.0 | 16219760647623 | 16227575255929 | 16227575433850 | 16219761110702 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 146 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 54080 | 0x7f022af09d00 | 0x7f022a624b80 | 2434400 | 2323534 | 30085806 | 65536 | 209179832 | 304299 | 304299 | 30442209.0 | 28879337.0 | 0.0 | 867859.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12690.0 | 8250459.0 | 0.0 | 8388608.0 | 859808284.0 | 3778500683.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20240943.0 | 16219761645701 | 16227575459610 | 16227575638331 | 16219762099820 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 149 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 55296 | 0x7f022af09b80 | 0x7f022a624bc0 | 1297288 | 1192761 | 15404671 | 65536 | 85346691 | 162160 | 162160 | 15742147.0 | 14154608.0 | 25105.0 | 461337.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6546.0 | 2029877.0 | 0.0 | 2097152.0 | 971222586.0 | 2238987538.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12112097.0 | 16219762634809 | 16227575663451 | 16227575757371 | 16219763013200 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 152 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 56768 | 0x7f022af09a00 | 0x7f022a624c00 | 1307696 | 1202664 | 15510892 | 65536 | 99327678 | 163461 | 163461 | 15870795.0 | 14184483.0 | 23832.0 | 203416.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6632.0 | 2056712.0 | 0.0 | 2097152.0 | 747123847.0 | 1918330928.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11960418.0 | 16219763548309 | 16227575783932 | 16227575879132 | 16219763919430 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 155 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 57728 | 0x7f022af09880 | 0x7f022a624c40 | 1296296 | 1189186 | 15366738 | 65536 | 81772320 | 162036 | 162036 | 15695503.0 | 14139377.0 | 24724.0 | 627761.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6563.0 | 2040150.0 | 0.0 | 2097152.0 | 958762749.0 | 2235061754.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12005722.0 | 16219764462938 | 16227575911292 | 16227576006013 | 16219764838770 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 158 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 58944 | 0x7f022af09700 | 0x7f022a624c80 | 2409648 | 2308209 | 29907398 | 65536 | 220389135 | 301205 | 301205 | 30242815.0 | 28759086.0 | 0.0 | 726652.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12645.0 | 8250976.0 | 0.0 | 8388608.0 | 861539121.0 | 3917854615.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20374341.0 | 16219765374098 | 16227576031613 | 16227576211294 | 16219765840328 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 161 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 60160 | 0x7f022d5aa580 | 0x7f022a624cc0 | 2408168 | 2304648 | 29840040 | 65536 | 213912001 | 301020 | 301020 | 30196613.0 | 28543447.0 | 0.0 | 729950.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12525.0 | 8252227.0 | 0.0 | 8388608.0 | 863511517.0 | 3847176018.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20263919.0 | 16219766376676 | 16227576237054 | 16227576417375 | 16219766845406 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 164 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 61376 | 0x7f022d5aa400 | 0x7f022a624d00 | 1307152 | 1195623 | 15409126 | 65536 | 90450942 | 163393 | 163393 | 15779483.0 | 14143889.0 | 23989.0 | 613524.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6556.0 | 2029114.0 | 0.0 | 2097152.0 | 960514284.0 | 2235439976.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12069092.0 | 16219767381584 | 16227576441855 | 16227576536736 | 16219767746016 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 167 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 62848 | 0x7f022d5aa280 | 0x7f022a624d40 | 1353128 | 1250748 | 16120577 | 65536 | 105183981 | 169140 | 169140 | 16495991.0 | 14534404.0 | 23336.0 | 137863.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6595.0 | 2039153.0 | 0.0 | 2097152.0 | 618563159.0 | 1616404296.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12077538.0 | 16219768280874 | 16227576563616 | 16227576663137 | 16219768653606 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 170 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 63808 | 0x7f022d5aa100 | 0x7f022a624d80 | 1298696 | 1195930 | 15454670 | 65536 | 97882200 | 162336 | 162336 | 15783279.0 | 14214457.0 | 23675.0 | 783833.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6558.0 | 2039835.0 | 0.0 | 2097152.0 | 925940726.0 | 2171519463.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11979198.0 | 16219769195864 | 16227576696097 | 16227576789217 | 16219769610035 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 173 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 65024 | 0x7f022af09f80 | 0x7f022a624dc0 | 2417880 | 2314150 | 29958545 | 65536 | 221545965 | 302234 | 302234 | 30320217.0 | 28778389.0 | 0.0 | 673379.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12695.0 | 8250280.0 | 0.0 | 8388608.0 | 858986522.0 | 3848639526.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20399392.0 | 16219770146443 | 16227576817857 | 16227576996098 | 16219770624152 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 176 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 66240 | 0x7f022af09e00 | 0x7f022a624e00 | 2426616 | 2319778 | 30049119 | 65536 | 214389236 | 303326 | 303326 | 30393251.0 | 28924482.0 | 0.0 | 807506.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12600.0 | 8251338.0 | 0.0 | 8388608.0 | 858337177.0 | 3949592840.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20201435.0 | 16219771158890 | 16227577021699 | 16227577203140 | 16219771625190 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 179 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 67456 | 0x7f022af09c80 | 0x7f022a624e40 | 1303272 | 1194323 | 15401440 | 65536 | 92244675 | 162908 | 162908 | 15762245.0 | 14129232.0 | 24883.0 | 473910.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6568.0 | 2028511.0 | 0.0 | 2097152.0 | 931089039.0 | 2228193978.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12075551.0 | 16219772160788 | 16227577227940 | 16227577322660 | 16219772539380 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 182 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 68928 | 0x7f022af09b00 | 0x7f022a624e80 | 1412104 | 1305934 | 16828354 | 65536 | 106235427 | 176512 | 176512 | 17213383.0 | 14801046.0 | 22256.0 | 25242.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6592.0 | 2038049.0 | 0.0 | 2097152.0 | 551601396.0 | 1440944113.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12439709.0 | 16219773074328 | 16227577346981 | 16227577450981 | 16219773458739 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 185 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 70144 | 0x7f022af09980 | 0x7f022a624ec0 | 1299296 | 1193241 | 15414346 | 65536 | 90033040 | 162411 | 162411 | 15748231.0 | 14175165.0 | 25042.0 | 556509.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6588.0 | 2039320.0 | 0.0 | 2097152.0 | 972212096.0 | 2262054696.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11952722.0 | 16219774001647 | 16227577483941 | 16227577576582 | 16219774385959 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 188 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 71616 | 0x7f022af09800 | 0x7f022a624f00 | 2426328 | 2321039 | 30061302 | 65536 | 223251618 | 303290 | 303290 | 30409709.0 | 28781812.0 | 0.0 | 860217.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12893.0 | 8248498.0 | 0.0 | 8388608.0 | 865044281.0 | 3702500514.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20371320.0 | 16219774921447 | 16227577603622 | 16227577784583 | 16219775376577 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 191 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 72832 | 0x7f022af09680 | 0x7f022a624f40 | 2409856 | 2301491 | 29812819 | 65536 | 213985665 | 301231 | 301231 | 30155429.0 | 28587607.0 | 0.0 | 727672.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12770.0 | 8249594.0 | 0.0 | 8388608.0 | 853820426.0 | 3857684297.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20331146.0 | 16219775912055 | 16227577811303 | 16227577992104 | 16219776374425 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 194 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 74048 | 0x7f022d5aa500 | 0x7f022a624f80 | 1291784 | 1184271 | 15272426 | 65536 | 90809261 | 161472 | 161472 | 15631621.0 | 14109839.0 | 25203.0 | 562857.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6581.0 | 2054437.0 | 0.0 | 2097152.0 | 882331580.0 | 2228907050.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12018981.0 | 16219776909213 | 16227578017544 | 16227578111465 | 16219777272215 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 197 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 75776 | 0x7f022d5aa380 | 0x7f022a624fc0 | 1468256 | 1356085 | 17470314 | 65536 | 106630660 | 183531 | 183531 | 17865307.0 | 15211524.0 | 21000.0 | 31741.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6627.0 | 2038740.0 | 0.0 | 2097152.0 | 453914947.0 | 1232673301.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12612789.0 | 16219777807793 | 16227578136905 | 16227578244106 | 16219778190725 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 200 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 76992 | 0x7f022d5aa200 | 0x7f022a625000 | 1300600 | 1188466 | 15346128 | 65536 | 88154589 | 162574 | 162574 | 15686169.0 | 14103254.0 | 24296.0 | 771340.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6603.0 | 2039724.0 | 0.0 | 2097152.0 | 921400274.0 | 2175937398.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11930099.0 | 16219778744933 | 16227578276106 | 16227578370346 | 16219779225442 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 203 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 78464 | 0x7f022d5aa080 | 0x7f022a625040 | 2390192 | 2287023 | 29607908 | 65536 | 214043049 | 298773 | 298773 | 29967657.0 | 28341357.0 | 0.0 | 595137.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12828.0 | 8249268.0 | 0.0 | 8388608.0 | 857372074.0 | 3940281788.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20004403.0 | 16219779759260 | 16227578395467 | 16227578574668 | 16219780303538 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 206 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 79936 | 0x7f022af09f00 | 0x7f022a625080 | 2415048 | 2311865 | 29908115 | 65536 | 219311939 | 301880 | 301880 | 30290421.0 | 28682376.0 | 0.0 | 704809.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12604.0 | 8251349.0 | 0.0 | 8388608.0 | 860055341.0 | 3886937349.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20053039.0 | 16219780838286 | 16227578599788 | 16227578779789 | 16219781366535 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 209 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 81408 | 0x7f022af09d80 | 0x7f022a6250c0 | 1307144 | 1202273 | 15481136 | 65536 | 96154063 | 163392 | 163392 | 15865621.0 | 14205129.0 | 23404.0 | 859588.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6554.0 | 2039305.0 | 0.0 | 2097152.0 | 894786817.0 | 2088151272.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11988116.0 | 16219781898993 | 16227578805549 | 16227578900589 | 16219782376542 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 212 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 83136 | 0x7f022af09c00 | 0x7f022a625100 | 1530120 | 1425532 | 18382018 | 65536 | 124997564 | 191264 | 191264 | 18768105.0 | 15066495.0 | 20602.0 | 12787.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6605.0 | 2040657.0 | 0.0 | 2097152.0 | 405847945.0 | 1086293038.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12374115.0 | 16219782912040 | 16227578926350 | 16227579041870 | 16219783322941 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 215 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 84352 | 0x7f022af09a80 | 0x7f022a625140 | 1289264 | 1186542 | 15336373 | 65536 | 88780879 | 161157 | 161157 | 15661261.0 | 14051505.0 | 24897.0 | 596696.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6573.0 | 2040575.0 | 0.0 | 2097152.0 | 903089125.0 | 2117114075.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11966132.0 | 16219783864579 | 16227579089230 | 16227579183471 | 16219784230881 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 218 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 85824 | 0x7f022af09900 | 0x7f022a625180 | 2420136 | 2319058 | 30035880 | 65536 | 222076235 | 302516 | 302516 | 30383917.0 | 28735757.0 | 0.0 | 675692.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12862.0 | 8248861.0 | 0.0 | 8388608.0 | 857197498.0 | 3725512611.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20252958.0 | 16219784766479 | 16227579209231 | 16227579388752 | 16219785217529 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 221 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 87296 | 0x7f022af09780 | 0x7f022a6251c0 | 2420416 | 2314504 | 29949769 | 65536 | 219167310 | 302551 | 302551 | 30324793.0 | 28690256.0 | 0.0 | 756125.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12786.0 | 8249494.0 | 0.0 | 8388608.0 | 852356339.0 | 3864633940.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20182737.0 | 16219785752327 | 16227579414832 | 16227579593073 | 16219786244866 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 224 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 88768 | 0x7f022af09600 | 0x7f022a625200 | 1311960 | 1205148 | 15544083 | 65536 | 93993860 | 163994 | 163994 | 15903009.0 | 14190021.0 | 24195.0 | 573527.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6591.0 | 2038116.0 | 0.0 | 2097152.0 | 900279295.0 | 2099581988.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11913336.0 | 16219786780595 | 16227579619474 | 16227579714034 | 16219787177456 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 227 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 90752 | 0x7f022d5aa480 | 0x7f022a625240 | 1620288 | 1505610 | 19371870 | 65536 | 130472736 | 202535 | 202535 | 19809275.0 | 15677290.0 | 19926.0 | 3036.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6605.0 | 2041353.0 | 0.0 | 2097152.0 | 406093592.0 | 1066208965.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12722717.0 | 16219787713134 | 16227579739954 | 16227579860595 | 16219788121305 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 230 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 91968 | 0x7f022d5aa300 | 0x7f022a625280 | 1296104 | 1190922 | 15373326 | 65536 | 90488235 | 162012 | 162012 | 15718045.0 | 14102390.0 | 24187.0 | 701320.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6533.0 | 2040284.0 | 0.0 | 2097152.0 | 930763493.0 | 2195117432.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11988426.0 | 16219788662633 | 16227579893555 | 16227579987796 | 16219789115013 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 233 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 93440 | 0x7f022d5aa180 | 0x7f022a6252c0 | 2423368 | 2317145 | 30016076 | 65536 | 222351562 | 302920 | 302920 | 30359191.0 | 28643546.0 | 0.0 | 1153339.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12767.0 | 8250074.0 | 0.0 | 8388608.0 | 852424396.0 | 3963589340.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20169334.0 | 16219789650412 | 16227580012276 | 16227580192597 | 16219790189910 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 236 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 94912 | 0x7f022d5aa000 | 0x7f022a625300 | 2412536 | 2302099 | 29820390 | 65536 | 215579752 | 301566 | 301566 | 30163541.0 | 28600348.0 | 0.0 | 815167.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12686.0 | 8250283.0 | 0.0 | 8388608.0 | 852741481.0 | 3903009546.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20098939.0 | 16219790726108 | 16227580217877 | 16227580399478 | 16219791202937 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 239 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 96384 | 0x7f022af09e80 | 0x7f022a625340 | 1307160 | 1196073 | 15405106 | 65536 | 95319073 | 163394 | 163394 | 15785125.0 | 14099371.0 | 24327.0 | 548725.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6556.0 | 2039410.0 | 0.0 | 2097152.0 | 874046192.0 | 2054254792.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12025575.0 | 16219791737615 | 16227580424438 | 16227580519959 | 16219792104177 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 242 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 98368 | 0x7f022af09d00 | 0x7f022a625380 | 1685128 | 1579919 | 20366943 | 65536 | 134068811 | 210640 | 210640 | 20775253.0 | 16052343.0 | 19505.0 | 1533.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6638.0 | 2040941.0 | 0.0 | 2097152.0 | 341220755.0 | 961141474.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12783405.0 | 16219792637535 | 16227580546199 | 16227580672280 | 16219793127374 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 245 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 99584 | 0x7f022af09b80 | 0x7f022a6253c0 | 1294240 | 1191391 | 15371231 | 65536 | 86613190 | 161779 | 161779 | 15724103.0 | 14102579.0 | 24725.0 | 593318.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6553.0 | 2040741.0 | 0.0 | 2097152.0 | 914268468.0 | 2295688039.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11961263.0 | 16219793670632 | 16227580704280 | 16227580798520 | 16219794066974 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 248 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 101312 | 0x7f022af09a00 | 0x7f022a625400 | 2387592 | 2280836 | 29544441 | 65536 | 215266991 | 298448 | 298448 | 29886979.0 | 28375499.0 | 0.0 | 774008.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12623.0 | 8250921.0 | 0.0 | 8388608.0 | 846767258.0 | 3985418536.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 19892485.0 | 16219794602162 | 16227580824280 | 16227581004441 | 16219795151530 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 251 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 102784 | 0x7f022af09880 | 0x7f022a625440 | 2380248 | 2274563 | 29466635 | 65536 | 207954043 | 297530 | 297530 | 29805651.0 | 28234637.0 | 0.0 | 1079919.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12899.0 | 8248124.0 | 0.0 | 8388608.0 | 836221181.0 | 3907628955.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20032883.0 | 16219795687228 | 16227581030202 | 16227581211483 | 16219796160748 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 254 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 104256 | 0x7f022af09700 | 0x7f022a625480 | 1296880 | 1192753 | 15383880 | 65536 | 98986884 | 162109 | 162109 | 15741913.0 | 14131820.0 | 24695.0 | 320199.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6580.0 | 2038676.0 | 0.0 | 2097152.0 | 846276884.0 | 2032979124.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12005310.0 | 16219796694716 | 16227581234363 | 16227581328443 | 16219797064578 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 257 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 106240 | 0x7f022d5aa580 | 0x7f022a6254c0 | 1753496 | 1641233 | 21186471 | 65536 | 138513074 | 219186 | 219186 | 21572205.0 | 16206550.0 | 19310.0 | 863.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6650.0 | 2040920.0 | 0.0 | 2097152.0 | 330571494.0 | 956523667.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12964537.0 | 16219797598056 | 16227581352443 | 16227581482844 | 16219798086345 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 260 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 107712 | 0x7f022d5aa400 | 0x7f022a625500 | 1281848 | 1178394 | 15215963 | 65536 | 88986921 | 160230 | 160230 | 15555207.0 | 14037554.0 | 24111.0 | 587058.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6581.0 | 2039152.0 | 0.0 | 2097152.0 | 928400600.0 | 2230697208.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11878655.0 | 16219798629133 | 16227581515484 | 16227581608925 | 16219799078403 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 263 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 16 | 24 | 109440 | 0x7f022d5aa280 | 0x7f022a625540 | 2408288 | 2300604 | 29775042 | 65536 | 216300437 | 301035 | 301035 | 30144171.0 | 28522451.0 | 0.0 | 958174.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12746.0 | 8249584.0 | 0.0 | 8388608.0 | 842417997.0 | 3922977125.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 19824251.0 | 16219799613331 | 16227581636285 | 16227581816606 | 16219800151739 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 266 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 16 | 24 | 111168 | 0x7f022d5aa100 | 0x7f022a625580 | 2383168 | 2276718 | 29486887 | 65536 | 206308080 | 297895 | 297895 | 29833627.0 | 28262562.0 | 0.0 | 1003021.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12661.0 | 8250524.0 | 0.0 | 8388608.0 | 841571821.0 | 3871572306.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 19850784.0 | 16219800687178 | 16227581842046 | 16227582020607 | 16219801171247 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 269 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 112896 | 0x7f022af09f80 | 0x7f022a6255c0 | 1295816 | 1192617 | 15376434 | 65536 | 103735230 | 161976 | 161976 | 15740171.0 | 13985348.0 | 24461.0 | 181166.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6618.0 | 2056129.0 | 0.0 | 2097152.0 | 799613757.0 | 1974236783.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11970622.0 | 16219801707835 | 16227582044767 | 16227582139648 | 16219802090606 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 272 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 115136 | 0x7f022af09e00 | 0x7f022a625600 | 1841464 | 1728722 | 22322772 | 65536 | 140974629 | 230182 | 230182 | 22709757.0 | 16109697.0 | 19429.0 | 1095.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6635.0 | 2041038.0 | 0.0 | 2097152.0 | 296456265.0 | 885327925.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12497550.0 | 16219802619585 | 16227582164288 | 16227582303969 | 16219803122584 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 275 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 116608 | 0x7f022af09c80 | 0x7f022a625640 | 1302904 | 1186375 | 15304407 | 65536 | 92171408 | 162862 | 162862 | 15659077.0 | 14048515.0 | 24869.0 | 516583.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6529.0 | 2042365.0 | 0.0 | 2097152.0 | 915956710.0 | 2232559636.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11889613.0 | 16219803664042 | 16227582336289 | 16227582430210 | 16219804108922 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 278 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 16 | 24 | 118336 | 0x7f022af09b00 | 0x7f022a625680 | 2396048 | 2288713 | 29615360 | 65536 | 214976124 | 299505 | 299505 | 29989601.0 | 28361454.0 | 0.0 | 740978.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12646.0 | 8250707.0 | 0.0 | 8388608.0 | 842002963.0 | 3883247178.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 19853018.0 | 16219804642200 | 16227582454050 | 16227582633411 | 16219805176288 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 281 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 16 | 24 | 120064 | 0x7f022af09980 | 0x7f022a6256c0 | 2378568 | 2271501 | 29434421 | 65536 | 206212484 | 297320 | 297320 | 29765689.0 | 28184698.0 | 0.0 | 779860.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12792.0 | 8249394.0 | 0.0 | 8388608.0 | 841013645.0 | 3867741619.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 19867880.0 | 16219805712836 | 16227582657731 | 16227582838052 | 16219806164106 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 284 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 121792 | 0x7f022af09800 | 0x7f022a625700 | 1311760 | 1208346 | 15610075 | 65536 | 102033600 | 163969 | 163969 | 15944713.0 | 14296360.0 | 24149.0 | 205012.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6584.0 | 2059916.0 | 0.0 | 2097152.0 | 690270346.0 | 1851929382.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12134233.0 | 16219806699664 | 16227582862372 | 16227582959493 | 16219807099396 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 287 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 124032 | 0x7f022af09680 | 0x7f022a625740 | 1924616 | 1811263 | 23410306 | 65536 | 149658230 | 240576 | 240576 | 23782673.0 | 16110181.0 | 19562.0 | 1735.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6648.0 | 2041011.0 | 0.0 | 2097152.0 | 286590133.0 | 866632224.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12744783.0 | 16219807632844 | 16227582986533 | 16227583131494 | 16219808145423 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 290 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 125504 | 0x7f022d5aa500 | 0x7f022a625780 | 1294096 | 1184398 | 15260579 | 65536 | 94516241 | 161761 | 161761 | 15633415.0 | 13943258.0 | 24205.0 | 532879.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6577.0 | 2038600.0 | 0.0 | 2097152.0 | 892287951.0 | 2163436623.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11854066.0 | 16219808688471 | 16227583164774 | 16227583259654 | 16219809083542 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 293 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 127488 | 0x7f022d5aa380 | 0x7f022a6257c0 | 2381712 | 2275981 | 29463558 | 65536 | 205434170 | 297713 | 297713 | 29824007.0 | 28229141.0 | 0.0 | 752891.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12694.0 | 8250365.0 | 0.0 | 8388608.0 | 828221905.0 | 3967057109.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 19730091.0 | 16219809618320 | 16227583284935 | 16227583465736 | 16219810159288 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 296 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 129216 | 0x7f022d5aa200 | 0x7f022a625800 | 2385656 | 2274566 | 29447309 | 65536 | 207691186 | 298206 | 298206 | 29805547.0 | 28211537.0 | 0.0 | 754133.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12798.0 | 8248927.0 | 0.0 | 8388608.0 | 833783454.0 | 3908117120.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 19784758.0 | 16219810695216 | 16227583491016 | 16227583671817 | 16219811176386 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 299 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 130944 | 0x7f022d5aa080 | 0x7f022a625840 | 1376368 | 1268074 | 16368294 | 65536 | 101685571 | 172045 | 172045 | 16721229.0 | 14923259.0 | 22510.0 | 121672.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6611.0 | 2039055.0 | 0.0 | 2097152.0 | 617334945.0 | 1614362380.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12668077.0 | 16219811712494 | 16227583698057 | 16227583797738 | 16219812091165 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 302 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 133440 | 0x7f022af09f00 | 0x7f022a625880 | 2072752 | 1960284 | 25337287 | 65536 | 170651257 | 259093 | 259093 | 25719725.0 | 16039375.0 | 20430.0 | 848.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6663.0 | 2039779.0 | 0.0 | 2097152.0 | 285592163.0 | 861971418.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12164681.0 | 16219812626513 | 16227583823658 | 16227583979019 | 16219813145352 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 305 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 134912 | 0x7f022af09d80 | 0x7f022a6258c0 | 1302016 | 1188632 | 15302770 | 65536 | 98613031 | 162751 | 162751 | 15688405.0 | 14020635.0 | 23920.0 | 302483.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6579.0 | 2039559.0 | 0.0 | 2097152.0 | 825563204.0 | 2032120735.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11849932.0 | 16219813688270 | 16227584011499 | 16227584106379 | 16219814124130 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 308 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 136896 | 0x7f022af09c00 | 0x7f022a625900 | 2361968 | 2254982 | 29159415 | 65536 | 210324959 | 295245 | 295245 | 29550851.0 | 27909635.0 | 0.0 | 731219.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12520.0 | 8252057.0 | 0.0 | 8388608.0 | 834581061.0 | 3880645666.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 19562519.0 | 16219814625239 | 16227584131979 | 16227584311340 | 16219815192497 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 311 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 138880 | 0x7f022af09a80 | 0x7f022a625940 | 2374120 | 2271593 | 29403171 | 65536 | 214862882 | 296764 | 296764 | 29767041.0 | 28085929.0 | 0.0 | 881623.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12744.0 | 8249579.0 | 0.0 | 8388608.0 | 833190342.0 | 3941580227.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 19648762.0 | 16219815728155 | 16227584337901 | 16227584518862 | 16219816210644 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 314 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 140864 | 0x7f022af09900 | 0x7f022a625980 | 1429272 | 1323293 | 17073625 | 65536 | 115395428 | 178658 | 178658 | 17438985.0 | 15502393.0 | 20388.0 | 86383.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6593.0 | 2038801.0 | 0.0 | 2097152.0 | 500920151.0 | 1306042559.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 13165532.0 | 16219816745662 | 16227584543662 | 16227584649582 | 16219817132694 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 317 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 143616 | 0x7f022af09780 | 0x7f022a6259c0 | 2238480 | 2131260 | 27573526 | 65536 | 198900398 | 279809 | 279809 | 27942439.0 | 15928520.0 | 19826.0 | 540.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6658.0 | 2039571.0 | 0.0 | 2097152.0 | 278987645.0 | 848934530.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12199316.0 | 16219817668552 | 16227584675823 | 16227584846064 | 16219818117382 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 320 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 145344 | 0x7f022af09600 | 0x7f022a625a00 | 1296712 | 1186112 | 15307081 | 65536 | 99785588 | 162088 | 162088 | 15655619.0 | 14008832.0 | 24358.0 | 129949.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6602.0 | 2040512.0 | 0.0 | 2097152.0 | 702582409.0 | 1893385961.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 11840023.0 | 16219818659700 | 16227584879824 | 16227584975184 | 16219819116070 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 323 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 147584 | 0x7f022d5aa480 | 0x7f022a625a40 | 2369944 | 2266390 | 29302275 | 65536 | 209811898 | 296242 | 296242 | 29699285.0 | 27905071.0 | 0.0 | 629919.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12553.0 | 8251338.0 | 0.0 | 8388608.0 | 822273036.0 | 3861566467.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 19532204.0 | 16219819651268 | 16227585001904 | 16227585182705 | 16219820199896 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 326 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 149568 | 0x7f022d5aa300 | 0x7f022a625a80 | 2376520 | 2267169 | 29330150 | 65536 | 212566239 | 297064 | 297064 | 29709347.0 | 28034635.0 | 0.0 | 596830.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12895.0 | 8247930.0 | 0.0 | 8388608.0 | 830030512.0 | 3827283834.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 19420499.0 | 16219820735524 | 16227585207506 | 16227585390387 | 16219821211924 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 329 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 151552 | 0x7f022d5aa180 | 0x7f022a625ac0 | 1513920 | 1399868 | 18042829 | 65536 | 110881607 | 189239 | 189239 | 18434551.0 | 16498181.0 | 16214.0 | 40740.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6608.0 | 2039842.0 | 0.0 | 2097152.0 | 382265530.0 | 1074569572.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 14026935.0 | 16219821746942 | 16227585416787 | 16227585526547 | 16219822171882 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 332 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 154304 | 0x7f022d5aa000 | 0x7f022a625b00 | 2398880 | 2289132 | 29612126 | 65536 | 211974240 | 299859 | 299859 | 29994645.0 | 16041525.0 | 20507.0 | 1699.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6675.0 | 2039795.0 | 0.0 | 2097152.0 | 281864938.0 | 843075827.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12341998.0 | 16219822707971 | 16227585551508 | 16227585732789 | 16219823263198 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 335 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 156288 | 0x7f022af09e80 | 0x7f022a625b40 | 1376352 | 1272081 | 16392142 | 65536 | 102967707 | 172043 | 172043 | 16773307.0 | 14818207.0 | 21820.0 | 62082.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6600.0 | 2038354.0 | 0.0 | 2097152.0 | 565025646.0 | 1495169250.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12494518.0 | 16219823806306 | 16227585764949 | 16227585865909 | 16219824206697 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 338 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 158784 | 0x7f022af09d00 | 0x7f022a625b80 | 2373624 | 2255282 | 29135983 | 65536 | 179398739 | 296702 | 296702 | 29554803.0 | 27826065.0 | 0.0 | 477487.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12058.0 | 8257000.0 | 0.0 | 8388608.0 | 822707405.0 | 3895083721.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 19470343.0 | 16219824741666 | 16227585891190 | 16227586075671 | 16219825199525 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 341 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 161024 | 0x7f022af09b80 | 0x7f022a625bc0 | 2382848 | 2259173 | 29174727 | 65536 | 182309009 | 297855 | 297855 | 29605451.0 | 27847377.0 | 0.0 | 725119.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12342.0 | 8253820.0 | 0.0 | 8388608.0 | 802974972.0 | 3975344206.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 19479280.0 | 16219825734663 | 16227586107671 | 16227586289912 | 16219826203133 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 344 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 163264 | 0x7f022af09a00 | 0x7f022a625c00 | 1653648 | 1535786 | 19803771 | 65536 | 127878933 | 206705 | 206705 | 20201407.0 | 18082545.0 | 16394.0 | 771.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6621.0 | 2038368.0 | 0.0 | 2097152.0 | 374130242.0 | 1087739349.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 15691401.0 | 16219826738251 | 16227586314712 | 16227586436633 | 16219827174272 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 347 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 166528 | 0x7f022af09880 | 0x7f022a625c40 | 2723584 | 2610224 | 33749865 | 65536 | 241539368 | 340447 | 340447 | 34169101.0 | 16125139.0 | 20466.0 | 470.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6721.0 | 2040646.0 | 0.0 | 2097152.0 | 281576314.0 | 839809061.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12599226.0 | 16219827708730 | 16227586463193 | 16227586669754 | 16219828225628 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 350 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 168512 | 0x7f022af09700 | 0x7f022a625c80 | 1479032 | 1370606 | 17669090 | 65536 | 110489050 | 184878 | 184878 | 18054041.0 | 15824405.0 | 19579.0 | 20626.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6611.0 | 2036975.0 | 0.0 | 2097152.0 | 381221320.0 | 1049199941.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 13304598.0 | 16219828730877 | 16227586717914 | 16227586829275 | 16219829125818 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 353 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 171264 | 0x7f022d5aa580 | 0x7f022a625cc0 | 2378072 | 2256557 | 29122730 | 65536 | 187097584 | 297258 | 297258 | 29571469.0 | 27708084.0 | 0.0 | 745182.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12289.0 | 8254448.0 | 0.0 | 8388608.0 | 815020417.0 | 3782217130.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 19157106.0 | 16219829661336 | 16227586855355 | 16227587038076 | 16219830119846 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 356 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 173760 | 0x7f022d5aa400 | 0x7f022a625d00 | 2380832 | 2262208 | 29201325 | 65536 | 187213336 | 297603 | 297603 | 29644815.0 | 27774643.0 | 0.0 | 699148.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12185.0 | 8255452.0 | 0.0 | 8388608.0 | 808884780.0 | 3888404480.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 19251290.0 | 16219830655544 | 16227587062556 | 16227587247357 | 16219831121024 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 359 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 176256 | 0x7f022d5aa280 | 0x7f022a625d40 | 1803312 | 1691637 | 21831808 | 65536 | 149743754 | 225413 | 225413 | 22227587.0 | 19592942.0 | 16033.0 | 2073.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 312.0 | 2097152.0 | 6626.0 | 2031494.0 | 0.0 | 2097152.0 | 273281337.0 | 855795257.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 16243998.0 | 16219831656082 | 16227587273757 | 16227587407838 | 16219832066033 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 362 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 179776 | 0x7f022d5aa100 | 0x7f022a625d80 | 3039280 | 2929589 | 37903573 | 65536 | 277309623 | 379909 | 379909 | 38320781.0 | 16234358.0 | 19715.0 | 2267.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6704.0 | 2040269.0 | 0.0 | 2097152.0 | 275834307.0 | 833101307.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 12617186.0 | 16219832600412 | 16227587434718 | 16227587667040 | 16219833162139 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 365 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 182272 | 0x7f022af09f80 | 0x7f022a625dc0 | 1692712 | 1584018 | 20459528 | 65536 | 139198598 | 211588 | 211588 | 20828501.0 | 17993573.0 | 17964.0 | 1280.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 6649.0 | 2050388.0 | 0.0 | 2097152.0 | 305586407.0 | 909552176.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 14773197.0 | 16219833663078 | 16227587714400 | 16227587840961 | 16219834070029 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 368 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 185536 | 0x7f022af09e00 | 0x7f022a625e00 | 2475384 | 2350344 | 30267492 | 65536 | 185782705 | 309422 | 309422 | 30790739.0 | 28088658.0 | 0.0 | 274320.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 208.0 | 8388608.0 | 12530.0 | 8251296.0 | 0.0 | 8388608.0 | 671589942.0 | 3282607082.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 19429239.0 | 16219834605407 | 16227587866721 | 16227588056162 | 16219835162345 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 371 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 188544 | 0x7f022af09c80 | 0x7f022a625e40 | 2492304 | 2350468 | 30248053 | 65536 | 185093671 | 311537 | 311537 | 30792143.0 | 28574087.0 | 0.0 | 342407.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 208.0 | 8388608.0 | 12598.0 | 8250588.0 | 0.0 | 8388608.0 | 635100783.0 | 3169307743.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 19975147.0 | 16219835697263 | 16227588082242 | 16227588274083 | 16219836269311 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 374 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 191552 | 0x7f022af09b00 | 0x7f022a625e80 | 2128792 | 2018073 | 26077106 | 65536 | 181637351 | 266098 | 266098 | 26471125.0 | 20592292.0 | 19540.0 | 2304.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 6629.0 | 2026502.0 | 0.0 | 2097152.0 | 273723171.0 | 832191068.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 16462443.0 | 16219836734560 | 16227588299203 | 16227588459364 | 16219837169531 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 377 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 195840 | 0x7f022af09980 | 0x7f022a625ec0 | 3696152 | 3582117 | 46356667 | 65536 | 339952669 | 462018 | 462018 | 46803775.0 | 16991707.0 | 19800.0 | 1465.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 6699.0 | 2040297.0 | 0.0 | 2097152.0 | 273970859.0 | 830440245.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 13239934.0 | 16219837704159 | 16227588485444 | 16227588769126 | 16219838337515 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 380 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 198592 | 0x7f022af09800 | 0x7f022a625f00 | 1954168 | 1840290 | 23782915 | 65536 | 163261609 | 244270 | 244270 | 24159985.0 | 21957269.0 | 13589.0 | 2136.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 6626.0 | 2046877.0 | 0.0 | 2097152.0 | 264938725.0 | 840726964.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 19458012.0 | 16219838816524 | 16227588815526 | 16227588965127 | 16219839254785 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 383 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 202368 | 0x7f022af09680 | 0x7f022a625f40 | 2671712 | 2553256 | 32907068 | 65536 | 206575040 | 333963 | 333963 | 33428673.0 | 28880019.0 | 0.0 | 134000.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 312.0 | 8388608.0 | 12733.0 | 8248873.0 | 0.0 | 8388608.0 | 593852460.0 | 2988233438.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 19835324.0 | 16219839788803 | 16227588990567 | 16227589198408 | 16219840334861 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 386 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 205888 | 0x7f022d5aa500 | 0x7f022a625f80 | 2672776 | 2537300 | 32725725 | 65536 | 208414803 | 334096 | 334096 | 33221063.0 | 30691318.0 | 0.0 | 111520.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12871.0 | 8247362.0 | 0.0 | 8388608.0 | 514751356.0 | 2762116087.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 21745286.0 | 16219840806520 | 16227589239849 | 16227589448490 | 16219841362378 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 389 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 209408 | 0x7f022d5aa380 | 0x7f022a625fc0 | 2462544 | 2344819 | 30290561 | 65536 | 216559932 | 307817 | 307817 | 30718771.0 | 21464175.0 | 18577.0 | 7595.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 6636.0 | 2026842.0 | 0.0 | 2097152.0 | 273519955.0 | 831291496.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 16517203.0 | 16219841835118 | 16227589485290 | 16227589670731 | 16219842328707 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 392 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 214464 | 0x7f022d5aa200 | 0x7f022a626000 | 4341056 | 4227350 | 54737726 | 65536 | 411368568 | 542631 | 542631 | 55191713.0 | 17325839.0 | 19954.0 | 965.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 6721.0 | 2040411.0 | 0.0 | 2097152.0 | 279573371.0 | 834417420.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 13271479.0 | 16219842863065 | 16227589696971 | 16227590031853 | 16219843574429 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 395 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 217728 | 0x7f022d5aa080 | 0x7f022a626040 | 2177320 | 2066737 | 26744962 | 65536 | 194635879 | 272164 | 272164 | 27103679.0 | 19491514.0 | 16862.0 | 2470.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6622.0 | 2048254.0 | 0.0 | 2097152.0 | 271022605.0 | 837825037.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 15361401.0 | 16219844056199 | 16227590075054 | 16227590243534 | 16219844499989 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 398 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 222016 | 0x7f022af09f00 | 0x7f022a626080 | 2942016 | 2816991 | 36239660 | 65536 | 237560624 | 367751 | 367751 | 36857241.0 | 29757768.0 | 0.0 | 75208.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 208.0 | 8388608.0 | 12864.0 | 8247366.0 | 0.0 | 8388608.0 | 492402124.0 | 2529841229.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20303382.0 | 16219845034847 | 16227590270095 | 16227590497936 | 16219845611034 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 401 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 226048 | 0x7f022af09d80 | 0x7f022a6260c0 | 3023000 | 2899454 | 37375570 | 65536 | 254634310 | 377874 | 377874 | 37928987.0 | 33227352.0 | 0.0 | 94951.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12875.0 | 8247287.0 | 0.0 | 8388608.0 | 446390129.0 | 2339605746.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 24390657.0 | 16219846091663 | 16227590532496 | 16227590761777 | 16219846519594 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 404 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 230080 | 0x7f022af09c00 | 0x7f022a626100 | 2943920 | 2782004 | 35816221 | 65536 | 188870106 | 367989 | 367989 | 36402215.0 | 21227256.0 | 20706.0 | 27809.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 6642.0 | 2026415.0 | 0.0 | 2097152.0 | 269400773.0 | 821764953.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 17133462.0 | 16219847064632 | 16227590799378 | 16227591010739 | 16219847531032 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 407 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 235904 | 0x7f022af09a80 | 0x7f022a626140 | 5133272 | 4973821 | 64085020 | 65536 | 393600290 | 641658 | 641658 | 64896031.0 | 19888208.0 | 17894.0 | 9856.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 6713.0 | 2040557.0 | 0.0 | 2097152.0 | 283969437.0 | 839500578.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 13372715.0 | 16219848065850 | 16227591048339 | 16227591433621 | 16219848809193 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 410 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 239424 | 0x7f022af09900 | 0x7f022a626180 | 2513848 | 2379428 | 30738755 | 65536 | 208530009 | 314230 | 314230 | 31168675.0 | 21269464.0 | 15620.0 | 1068.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 6631.0 | 2046603.0 | 0.0 | 2097152.0 | 271776895.0 | 833512025.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 16267379.0 | 16219849258653 | 16227591475862 | 16227591662903 | 16219849691634 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 413 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 244224 | 0x7f022af09780 | 0x7f022a6261c0 | 3239008 | 3101777 | 40000429 | 65536 | 270127125 | 404875 | 404875 | 40559407.0 | 30817688.0 | 0.0 | 22819.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 12882.0 | 8247088.0 | 0.0 | 8388608.0 | 443804390.0 | 2276081643.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 21197808.0 | 16219850228162 | 16227591688503 | 16227591939704 | 16219850716251 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 416 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 248768 | 0x7f022af09600 | 0x7f022a626200 | 3214216 | 3070848 | 39669834 | 65536 | 267238922 | 401776 | 401776 | 40157421.0 | 34976550.0 | 0.0 | 30118.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 13179.0 | 8243652.0 | 0.0 | 8388608.0 | 286997515.0 | 1736664034.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 23798994.0 | 16219851254819 | 16227591974104 | 16227592220986 | 16219851735139 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 419 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 253312 | 0x7f022d5aa480 | 0x7f022a626240 | 3102336 | 2988478 | 38645189 | 65536 | 283972881 | 387791 | 387791 | 39086325.0 | 22090625.0 | 17605.0 | 2707.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6642.0 | 2027237.0 | 0.0 | 2097152.0 | 275270907.0 | 826129812.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 16982263.0 | 16219852270857 | 16227592259066 | 16227592495867 | 16219852904593 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 422 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 255552 | 0x7f022d5aa300 | 0x7f022a626280 | 5749464 | 5634927 | 72924096 | 65536 | 548217758 | 718682 | 718682 | 73490227.0 | 68537275.0 | 857.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 7030.0 | 2065643.0 | 0.0 | 2097152.0 | 228670470.0 | 794441573.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 63300950.0 | 16219853374762 | 16227592530108 | 16227592983550 | 16219854239883 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 425 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 259840 | 0x7f022d5aa180 | 0x7f022a6262c0 | 3014920 | 2875025 | 37014447 | 65536 | 225872060 | 376864 | 376864 | 37611605.0 | 24095224.0 | 14030.0 | 11274.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6645.0 | 2045051.0 | 0.0 | 2097152.0 | 266189850.0 | 822562785.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 18220229.0 | 16219854688083 | 16227593026270 | 16227593251872 | 16219855151813 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 428 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 265664 | 0x7f022d5aa000 | 0x7f022a626300 | 3878024 | 3742871 | 48287138 | 65536 | 318420702 | 484752 | 484752 | 48894513.0 | 35826624.0 | 0.0 | 1115.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 13078.0 | 8244761.0 | 0.0 | 8388608.0 | 289332855.0 | 1691499382.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 21770169.0 | 16219855688341 | 16227593289152 | 16227593587714 | 16219856289678 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 431 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 271232 | 0x7f022af09e80 | 0x7f022a626340 | 3934936 | 3795964 | 48951397 | 65536 | 309964862 | 491866 | 491866 | 49583721.0 | 30845155.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 13005.0 | 8245594.0 | 0.0 | 8388608.0 | 339998104.0 | 1944398579.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20975187.0 | 16219856761288 | 16227593625154 | 16227593924036 | 16219857380174 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 434 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 273216 | 0x7f022af09d00 | 0x7f022a626380 | 4006288 | 3890703 | 50330667 | 65536 | 367782654 | 500785 | 500785 | 50815419.0 | 47091396.0 | 1628.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 7610.0 | 2063898.0 | 0.0 | 2097152.0 | 232173547.0 | 798636548.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 44379083.0 | 16219857829694 | 16227593964516 | 16227594261158 | 16219858442720 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 437 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 275712 | 0x7f022af09b80 | 0x7f022a6263c0 | 7205048 | 7087577 | 91769022 | 65536 | 696924552 | 900630 | 900630 | 92374807.0 | 76360550.0 | 924.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 6805.0 | 2064295.0 | 0.0 | 2097152.0 | 231254245.0 | 798457139.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 68051101.0 | 16219858893110 | 16227594301478 | 16227594860521 | 16219859776411 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 440 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 280768 | 0x7f022af09a00 | 0x7f022a626400 | 3523400 | 3381613 | 43642280 | 65536 | 280098421 | 440424 | 440424 | 44198107.0 | 27545754.0 | 15788.0 | 903.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 6646.0 | 2028468.0 | 0.0 | 2097152.0 | 255703461.0 | 808191674.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 22261989.0 | 16219860235120 | 16227594931241 | 16227595195083 | 16219860784428 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 443 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 287616 | 0x7f022af09880 | 0x7f022a626440 | 4541256 | 4428970 | 57299126 | 65536 | 418012502 | 567656 | 567656 | 57813007.0 | 30059845.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 208.0 | 8388608.0 | 12997.0 | 8245698.0 | 0.0 | 8388608.0 | 297335799.0 | 1731071241.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 20958792.0 | 16219861259388 | 16227595257803 | 16227595608205 | 16219861884104 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 446 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 294208 | 0x7f022af09700 | 0x7f022a626480 | 4411136 | 4304921 | 55709369 | 65536 | 412700726 | 551391 | 551391 | 56200227.0 | 34972209.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 208.0 | 8388608.0 | 12952.0 | 8246248.0 | 0.0 | 8388608.0 | 327839873.0 | 1889998441.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 23245274.0 | 16219862383463 | 16227595681326 | 16227596031248 | 16219863129007 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 449 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 296960 | 0x7f022d5aa580 | 0x7f022a6264c0 | 4517616 | 4393986 | 56850626 | 65536 | 419341777 | 564701 | 564701 | 57357929.0 | 55054737.0 | 1120.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 7533.0 | 2064255.0 | 0.0 | 2097152.0 | 231198422.0 | 800210974.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 51823166.0 | 16219863622516 | 16227596093328 | 16227596440530 | 16219864315610 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 452 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 299968 | 0x7f022d5aa400 | 0x7f022a626500 | 8342088 | 8223828 | 106494605 | 65536 | 814351179 | 1042760 | 1042760 | 107145927.0 | 82434498.0 | 850.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 6821.0 | 2063450.0 | 0.0 | 2097152.0 | 230498603.0 | 799820451.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 73686879.0 | 16219864805270 | 16227596480850 | 16227597143254 | 16219865809648 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 455 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 302720 | 0x7f022d5aa280 | 0x7f022a626540 | 5775184 | 5657731 | 73261472 | 65536 | 541967163 | 721897 | 721897 | 73786523.0 | 68226236.0 | 916.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 7418.0 | 2063945.0 | 0.0 | 2097152.0 | 231236438.0 | 801511684.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 62609866.0 | 16219866289497 | 16227597191094 | 16227597638457 | 16219867086909 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 458 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 305472 | 0x7f022d5aa100 | 0x7f022a626580 | 5792648 | 5671703 | 73441113 | 65536 | 551938826 | 724080 | 724080 | 73968081.0 | 68731664.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 208.0 | 8388608.0 | 13119.0 | 8244299.0 | 0.0 | 8388608.0 | 234039716.0 | 1574827706.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 57752338.0 | 16219867557129 | 16227597677017 | 16227598141660 | 16219868367811 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 461 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 308224 | 0x7f022af09f80 | 0x7f022a6265c0 | 5775696 | 5663164 | 73298896 | 65536 | 549982447 | 721961 | 721961 | 73857165.0 | 68901581.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 13095.0 | 8244563.0 | 0.0 | 8388608.0 | 235975217.0 | 1585751903.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 57750376.0 | 16219868838680 | 16227598208540 | 16227598669343 | 16219869647282 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 464 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 310976 | 0x7f022af09e00 | 0x7f022a626600 | 5816440 | 5701944 | 73797539 | 65536 | 558103438 | 727054 | 727054 | 74361383.0 | 68630119.0 | 907.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 208.0 | 2097152.0 | 7539.0 | 2063485.0 | 0.0 | 2097152.0 | 231071865.0 | 800648334.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 63097051.0 | 16219870118932 | 16227598708223 | 16227599160226 | 16219870961583 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 467 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 313216 | 0x7f022af09c80 | 0x7f022a626640 | 10936776 | 10817470 | 140183312 | 65536 | 1074867605 | 1367096 | 1367096 | 140863429.0 | 95503701.0 | 932.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6814.0 | 2064736.0 | 0.0 | 2097152.0 | 234245316.0 | 808126574.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 82989187.0 | 16219871432983 | 16227599224546 | 16227600093351 | 16219872660276 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 470 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 315968 | 0x7f022af09b00 | 0x7f022a626680 | 10965688 | 10846272 | 140558986 | 65536 | 1084029225 | 1370710 | 1370710 | 141237673.0 | 91480724.0 | 975.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 7421.0 | 2063269.0 | 0.0 | 2097152.0 | 236928084.0 | 817722907.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 81212259.0 | 16219873133176 | 16227600166631 | 16227601021196 | 16219874338549 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 473 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 318720 | 0x7f022af09980 | 0x7f022a6266c0 | 10973304 | 10855886 | 140697395 | 65536 | 1079839627 | 1371662 | 1371662 | 141362707.0 | 95168903.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 13274.0 | 8242594.0 | 0.0 | 8388608.0 | 234291149.0 | 1575991244.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 78402344.0 | 16219874808629 | 16227601086797 | 16227601971122 | 16219876033002 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 476 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 321472 | 0x7f022af09800 | 0x7f022a626700 | 10976480 | 10848485 | 140597110 | 65536 | 1080427544 | 1372059 | 1372059 | 141266533.0 | 96014378.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 13220.0 | 8243188.0 | 0.0 | 8388608.0 | 235545859.0 | 1581107717.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 78393460.0 | 16219876504451 | 16227602010162 | 16227602890807 | 16219877723334 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 479 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 324224 | 0x7f022af09680 | 0x7f022a626740 | 10997352 | 10887950 | 141056826 | 65536 | 1086542735 | 1374668 | 1374668 | 141779487.0 | 93748346.0 | 847.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 7526.0 | 2063175.0 | 0.0 | 2097152.0 | 236828553.0 | 816738942.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 81376342.0 | 16219878196694 | 16227602954807 | 16227603814972 | 16219879401607 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 482 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 326464 | 0x7f022d5aa500 | 0x7f022a626780 | 21325056 | 21184689 | 274563460 | 65536 | 2124401407 | 2665631 | 2665631 | 275637055.0 | 111219877.0 | 578.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6777.0 | 2064990.0 | 0.0 | 2097152.0 | 240153833.0 | 828982023.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 93342953.0 | 16219879870956 | 16227603877693 | 16227605577222 | 16219881921821 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 485 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 329216 | 0x7f022d5aa380 | 0x7f022a6267c0 | 21345752 | 21225520 | 275113353 | 65536 | 2129395414 | 2668218 | 2668218 | 276167845.0 | 127697277.0 | 631.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 7878.0 | 2061871.0 | 0.0 | 2097152.0 | 238795484.0 | 833342642.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 110759528.0 | 16219882395461 | 16227605676103 | 16227607341073 | 16219884406846 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 488 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 331968 | 0x7f022d5aa200 | 0x7f022a626800 | 21384272 | 21241424 | 275221999 | 65536 | 2127132891 | 2673033 | 2673033 | 276374701.0 | 131113682.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 13270.0 | 8242638.0 | 0.0 | 8388608.0 | 234503902.0 | 1594138317.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 109026919.0 | 16219884879746 | 16227607429393 | 16227609154203 | 16219886961880 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 491 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 12 | 24 | 334720 | 0x7f022d5aa080 | 0x7f022a626840 | 21345632 | 21233630 | 275140206 | 65536 | 2119676412 | 2668203 | 2668203 | 276273353.0 | 136020978.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 33554432.0 | 33554432.0 | 33554432.0 | 0.0 | 0.0 | 0.0 | 104.0 | 8388608.0 | 13215.0 | 8243243.0 | 0.0 | 8388608.0 | 235332429.0 | 1600140738.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194304.0 | 0.0 | 0.0 | 111360128.0 | 16219887429640 | 16227609251644 | 16227610968613 | 16219889490044 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 494 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 337472 | 0x7f022af09f00 | 0x7f022a626880 | 21392736 | 21266298 | 275642117 | 65536 | 2136172590 | 2674091 | 2674091 | 276698089.0 | 126882908.0 | 593.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 7880.0 | 2061927.0 | 0.0 | 2097152.0 | 239117756.0 | 834344180.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 108076478.0 | 16219889959233 | 16227611065894 | 16227612740144 | 16219892013528 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 497 | 151449 | 151449 | 4194304 | 256 | 0 | 0 | 8 | 24 | 0 | 0x7f022af09d80 | 0x7f022a6268c0 | 42046064 | 41925787 | 543401451 | 65536 | 4225396593 | 5255757 | 5255757 | 545271329.0 | 110224321.0 | 509.0 | 0.0 | 0.0 | 0.0 | 0.0 | 524288.0 | 8388608.0 | 8388608.0 | 8388608.0 | 0.0 | 0.0 | 0.0 | 104.0 | 2097152.0 | 6751.0 | 2065864.0 | 0.0 | 2097152.0 | 241277972.0 | 837879749.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097152.0 | 0.0 | 0.0 | 98808014.0 | 16219892473018 | 16227612828144 | 16227616185763 | 16219896186936 |