84 KiB
84 KiB
| 1 | Index | KernelName | gpu-id | queue-id | queue-index | pid | tid | grd | wgr | lds | scr | vgpr | sgpr | fbar | sig | obj | SQ_CYCLES | SQ_BUSY_CYCLES | SQ_WAVES | GRBM_COUNT | GRBM_GUI_ACTIVE | TCC_CYCLE_sum | TCC_BUSY_sum | TCC_PROBE_sum | TCC_PROBE_ALL_sum | TCC_NC_REQ_sum | TCC_UC_REQ_sum | TCC_CC_REQ_sum | TCC_RW_REQ_sum | TCC_EA_ATOMIC_LEVEL_sum | TCC_REQ_sum | TCC_STREAMING_REQ_sum | TCC_HIT_sum | TCC_MISS_sum | TCC_READ_sum | TCC_WRITE_sum | TCC_ATOMIC_sum | TCC_WRITEBACK_sum | TCC_EA_WRREQ_sum | TCC_EA_WRREQ_64B_sum | TCC_EA_WR_UNCACHED_32B_sum | TCC_EA_WRREQ_STALL_sum | TCC_EA_WRREQ_IO_CREDIT_STALL_sum | TCC_EA_WRREQ_GMI_CREDIT_STALL_sum | TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum | TCC_TOO_MANY_EA_WRREQS_STALL_sum | TCC_EA_ATOMIC_sum | TCC_EA_RDREQ_sum | TCC_EA_RDREQ_32B_sum | TCC_EA_RD_UNCACHED_32B_sum | TCC_EA_RDREQ_IO_CREDIT_STALL_sum | TCC_EA_RDREQ_GMI_CREDIT_STALL_sum | TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum | TCC_TAG_STALL_sum | TCC_NORMAL_WRITEBACK_sum | TCC_ALL_TC_OP_WB_WRITEBACK_sum | TCC_NORMAL_EVICT_sum | TCC_ALL_TC_OP_INV_EVICT_sum | TCC_EA_RDREQ_DRAM_sum | TCC_EA_WRREQ_DRAM_sum | TCC_EA_RDREQ_LEVEL_sum | TCC_EA_WRREQ_LEVEL_sum | DispatchNs | BeginNs | EndNs | CompleteNs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | __amd_rocclr_fillBufferAligned.kd | 0 | 0 | 0 | 154513 | 154513 | 33554432 | 256 | 0 | 0 | 4 | 32 | 4160 | 0x0 | 0x7fbb0a204280 | 3076816 | 2991462 | 524288 | 384601 | 384601 | 12307232.0 | 9403842.0 | 0.0 | 0.0 | 56.0 | 303.0 | 0.0 | 4194696.0 | 0.0 | 4195052.0 | 0.0 | 2097594.0 | 2097458.0 | 750.0 | 4194304.0 | 0.0 | 2064387.0 | 4128768.0 | 4128768.0 | 0.0 | 1472185.0 | 0.0 | 0.0 | 1492792.0 | 0.0 | 0.0 | 311.0 | 0.0 | 606.0 | 0.0 | 0.0 | 0.0 | 200953.0 | 2064384.0 | 0.0 | 2031623.0 | 0.0 | 141.0 | 4128768.0 | 989215.0 | 1616374412.0 | 16299483631972 | 16307555332228 | 16307555571270 | 16299629181928 |
| 3 | 1 | void benchmark_func<short, 256, 8u, 0u>(short, short*) [clone .kd] | 0 | 0 | 2 | 154513 | 154513 | 32768 | 256 | 0 | 0 | 12 | 24 | 13888 | 0x0 | 0x7fbb0a223f80 | 268296 | 168350 | 512 | 33536 | 33536 | 1073152.0 | 397446.0 | 0.0 | 0.0 | 0.0 | 60.0 | 0.0 | 4600.0 | 0.0 | 4659.0 | 0.0 | 499.0 | 4160.0 | 4660.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 8260.0 | 0.0 | 118.0 | 0.0 | 0.0 | 0.0 | 52775.0 | 0.0 | 0.0 | 0.0 | 0.0 | 8205.0 | 0.0 | 3274341.0 | 0.0 | 16299634449212 | 16307560365218 | 16307560378338 | 16299634704877 |
| 4 | 2 | void benchmark_func<float, 256, 8u, 0u>(float, float*) [clone .kd] | 0 | 0 | 5 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 14336 | 0x7fbb0cfbe380 | 0x7fbb0a223fc0 | 1322240 | 1212884 | 65536 | 165279 | 165279 | 5288928.0 | 4735777.0 | 0.0 | 0.0 | 0.0 | 186.0 | 0.0 | 1048968.0 | 0.0 | 1049151.0 | 0.0 | 388.0 | 1048763.0 | 1049143.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097344.0 | 0.0 | 370.0 | 0.0 | 0.0 | 499659.0 | 1573706.0 | 0.0 | 0.0 | 983046.0 | 0.0 | 2097289.0 | 0.0 | 1443042619.0 | 0.0 | 16299634791785 | 16307560446819 | 16307560538979 | 16299635362512 |
| 5 | 3 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 0u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 8 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15040 | 0x7fbb0cfbe200 | 0x7fbb0a224000 | 2473696 | 2367603 | 65536 | 309211 | 309211 | 9894752.0 | 9337510.0 | 0.0 | 0.0 | 0.0 | 275.0 | 0.0 | 2364871.0 | 0.0 | 2329721.0 | 0.0 | 232287.0 | 2097434.0 | 2346001.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194589.0 | 0.0 | 550.0 | 0.0 | 0.0 | 679266.0 | 2202576.0 | 0.0 | 0.0 | 2031623.0 | 0.0 | 4194532.0 | 0.0 | 2665347199.0 | 0.0 | 16299635450850 | 16307560573379 | 16307560754021 | 16299636052187 |
| 6 | 4 | void benchmark_func<double, 256, 8u, 0u>(double, double*) [clone .kd] | 0 | 0 | 11 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 16 | 24 | 15488 | 0x7fbb0cfbe080 | 0x7fbb0a224040 | 2501216 | 2391509 | 65536 | 312651 | 312651 | 10004832.0 | 9412810.0 | 0.0 | 0.0 | 0.0 | 286.0 | 0.0 | 2352793.0 | 0.0 | 2335865.0 | 0.0 | 238423.0 | 2097442.0 | 2345685.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194604.0 | 0.0 | 584.0 | 0.0 | 0.0 | 756343.0 | 2521712.0 | 0.0 | 0.0 | 2031622.0 | 0.0 | 4194542.0 | 0.0 | 2632669176.0 | 0.0 | 16299636135665 | 16307560780101 | 16307560959462 | 16299636719382 |
| 7 | 5 | void benchmark_func<__half2, 256, 8u, 0u>(__half2, __half2*) [clone .kd] | 0 | 0 | 14 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 15936 | 0x7fbb0abd5f00 | 0x7fbb0a224080 | 1326016 | 1214158 | 65536 | 165751 | 165751 | 5304032.0 | 4732047.0 | 0.0 | 0.0 | 0.0 | 183.0 | 0.0 | 1049080.0 | 0.0 | 1049260.0 | 0.0 | 499.0 | 1048761.0 | 1049259.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097343.0 | 0.0 | 364.0 | 0.0 | 0.0 | 450643.0 | 1360669.0 | 0.0 | 0.0 | 983047.0 | 0.0 | 2097287.0 | 0.0 | 1458084742.0 | 0.0 | 16299636804230 | 16307560984582 | 16307561078023 | 16299637287309 |
| 8 | 6 | void benchmark_func<int, 256, 8u, 0u>(int, int*) [clone .kd] | 0 | 0 | 17 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 16384 | 0x7fbb0abd5d80 | 0x7fbb0a2240c0 | 1319312 | 1212991 | 65536 | 164913 | 164913 | 5277216.0 | 4751018.0 | 0.0 | 0.0 | 0.0 | 184.0 | 0.0 | 1049024.0 | 0.0 | 1049210.0 | 0.0 | 443.0 | 1048767.0 | 1049205.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097342.0 | 0.0 | 364.0 | 0.0 | 0.0 | 542005.0 | 1515556.0 | 0.0 | 0.0 | 983047.0 | 0.0 | 2097291.0 | 0.0 | 1475520748.0 | 0.0 | 16299637370808 | 16307561104263 | 16307561196103 | 16299637855117 |
| 9 | 7 | void benchmark_func<float, 256, 8u, 1u>(float, float*) [clone .kd] | 0 | 0 | 20 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 17088 | 0x7fbb0abd5c00 | 0x7fbb0a224100 | 1309128 | 1208958 | 65536 | 163640 | 163640 | 5236480.0 | 4708621.0 | 0.0 | 0.0 | 0.0 | 174.0 | 0.0 | 1049080.0 | 0.0 | 1049259.0 | 0.0 | 499.0 | 1048760.0 | 1049264.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097339.0 | 0.0 | 356.0 | 0.0 | 0.0 | 459585.0 | 1500071.0 | 0.0 | 0.0 | 983047.0 | 0.0 | 2097287.0 | 0.0 | 1436792959.0 | 0.0 | 16299637951545 | 16307561235624 | 16307561327944 | 16299638373625 |
| 10 | 8 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 1u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 23 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 16 | 24 | 17792 | 0x7fbb0abd5a80 | 0x7fbb0a224140 | 2487536 | 2377128 | 65536 | 310941 | 310941 | 9950112.0 | 9342452.0 | 0.0 | 0.0 | 0.0 | 277.0 | 0.0 | 2340477.0 | 0.0 | 2344518.0 | 0.0 | 247098.0 | 2097420.0 | 2351312.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194585.0 | 0.0 | 540.0 | 0.0 | 0.0 | 834873.0 | 2684914.0 | 0.0 | 0.0 | 2031624.0 | 0.0 | 4194533.0 | 0.0 | 2657291165.0 | 0.0 | 16299638458083 | 16307561367144 | 16307561546825 | 16299638972702 |
| 11 | 9 | void benchmark_func<double, 256, 8u, 1u>(double, double*) [clone .kd] | 0 | 0 | 26 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 16 | 24 | 18496 | 0x7fbb0abd5900 | 0x7fbb0a224180 | 2467280 | 2358908 | 65536 | 308409 | 308409 | 9869088.0 | 9295907.0 | 0.0 | 0.0 | 0.0 | 281.0 | 0.0 | 2351419.0 | 0.0 | 2357762.0 | 0.0 | 260325.0 | 2097437.0 | 2343706.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194587.0 | 0.0 | 546.0 | 0.0 | 0.0 | 728732.0 | 2359515.0 | 0.0 | 0.0 | 2031623.0 | 0.0 | 4194526.0 | 0.0 | 2813070859.0 | 0.0 | 16299639057450 | 16307561573386 | 16307561752427 | 16299639583119 |
| 12 | 10 | void benchmark_func<__half2, 256, 8u, 1u>(__half2, __half2*) [clone .kd] | 0 | 0 | 29 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19200 | 0x7fbb0abd5780 | 0x7fbb0a2241c0 | 1316368 | 1216014 | 65536 | 164545 | 164545 | 5265440.0 | 4700847.0 | 0.0 | 0.0 | 0.0 | 183.0 | 0.0 | 1049136.0 | 0.0 | 1049321.0 | 0.0 | 555.0 | 1048766.0 | 1049318.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097339.0 | 0.0 | 354.0 | 0.0 | 0.0 | 345780.0 | 1045812.0 | 0.0 | 0.0 | 983047.0 | 0.0 | 2097284.0 | 0.0 | 1417531860.0 | 0.0 | 16299639667507 | 16307561785547 | 16307561879147 | 16299640080178 |
| 13 | 11 | void benchmark_func<int, 256, 8u, 1u>(int, int*) [clone .kd] | 0 | 0 | 32 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 16 | 24 | 19904 | 0x7fbb0abd5600 | 0x7fbb0a224200 | 1321728 | 1217445 | 65536 | 165215 | 165215 | 5286880.0 | 4690378.0 | 0.0 | 0.0 | 0.0 | 186.0 | 0.0 | 1049136.0 | 0.0 | 1049316.0 | 0.0 | 555.0 | 1048761.0 | 1049321.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097345.0 | 0.0 | 366.0 | 0.0 | 0.0 | 544471.0 | 1687814.0 | 0.0 | 0.0 | 983047.0 | 0.0 | 2097283.0 | 0.0 | 1416511405.0 | 0.0 | 16299640164186 | 16307561911308 | 16307562004908 | 16299640578827 |
| 14 | 12 | void benchmark_func<float, 256, 8u, 2u>(float, float*) [clone .kd] | 0 | 0 | 35 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 20608 | 0x7fbb0cfbe480 | 0x7fbb0a224240 | 1306736 | 1202743 | 65536 | 163341 | 163341 | 5226912.0 | 4655894.0 | 0.0 | 0.0 | 0.0 | 182.0 | 0.0 | 1049136.0 | 0.0 | 1049312.0 | 0.0 | 555.0 | 1048757.0 | 1049312.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097343.0 | 0.0 | 362.0 | 0.0 | 0.0 | 483919.0 | 1478150.0 | 0.0 | 0.0 | 983047.0 | 0.0 | 2097291.0 | 0.0 | 1491731786.0 | 0.0 | 16299640671575 | 16307562037068 | 16307562130029 | 16299641078256 |
| 15 | 13 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 2u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 38 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 16 | 24 | 21312 | 0x7fbb0cfbe300 | 0x7fbb0a224280 | 2479392 | 2377365 | 65536 | 309923 | 309923 | 9917536.0 | 9364204.0 | 0.0 | 0.0 | 0.0 | 268.0 | 0.0 | 2329651.0 | 0.0 | 2342460.0 | 0.0 | 245015.0 | 2097445.0 | 2324839.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194600.0 | 0.0 | 568.0 | 0.0 | 0.0 | 694791.0 | 2286993.0 | 0.0 | 0.0 | 2031624.0 | 0.0 | 4194546.0 | 0.0 | 2586317556.0 | 0.0 | 16299641163544 | 16307562159469 | 16307562338510 | 16299641664483 |
| 16 | 14 | void benchmark_func<double, 256, 8u, 2u>(double, double*) [clone .kd] | 0 | 0 | 41 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22016 | 0x7fbb0cfbe180 | 0x7fbb0a2242c0 | 2466200 | 2359720 | 65536 | 308274 | 308274 | 9864768.0 | 9272801.0 | 0.0 | 0.0 | 0.0 | 278.0 | 0.0 | 2364198.0 | 0.0 | 2341316.0 | 0.0 | 243885.0 | 2097431.0 | 2360349.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194579.0 | 0.0 | 528.0 | 0.0 | 0.0 | 663230.0 | 2242861.0 | 0.0 | 0.0 | 2031624.0 | 0.0 | 4194530.0 | 0.0 | 2714445798.0 | 0.0 | 16299641749591 | 16307562363790 | 16307562543312 | 16299642258970 |
| 17 | 15 | void benchmark_func<__half2, 256, 8u, 2u>(__half2, __half2*) [clone .kd] | 0 | 0 | 44 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 16 | 24 | 22720 | 0x7fbb0cfbe000 | 0x7fbb0a224300 | 1322704 | 1211087 | 65536 | 165337 | 165337 | 5290784.0 | 4698257.0 | 0.0 | 0.0 | 0.0 | 183.0 | 0.0 | 1049192.0 | 0.0 | 1049376.0 | 0.0 | 610.0 | 1048766.0 | 1049375.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097344.0 | 0.0 | 362.0 | 0.0 | 0.0 | 357008.0 | 1160192.0 | 0.0 | 0.0 | 983048.0 | 0.0 | 2097293.0 | 0.0 | 1447906067.0 | 0.0 | 16299642345208 | 16307562578352 | 16307562672752 | 16299642762049 |
| 18 | 16 | void benchmark_func<int, 256, 8u, 2u>(int, int*) [clone .kd] | 0 | 0 | 47 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 23424 | 0x7fbb0abd5e80 | 0x7fbb0a224340 | 1309984 | 1208796 | 65536 | 163747 | 163747 | 5239904.0 | 4688700.0 | 0.0 | 0.0 | 0.0 | 183.0 | 0.0 | 1049248.0 | 0.0 | 1049428.0 | 0.0 | 666.0 | 1048762.0 | 1049429.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097345.0 | 0.0 | 362.0 | 0.0 | 0.0 | 544720.0 | 1652209.0 | 0.0 | 0.0 | 983048.0 | 0.0 | 2097288.0 | 0.0 | 1435838326.0 | 0.0 | 16299642846277 | 16307562701713 | 16307562794513 | 16299643255818 |
| 19 | 17 | void benchmark_func<float, 256, 8u, 3u>(float, float*) [clone .kd] | 0 | 0 | 50 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 24128 | 0x7fbb0abd5d00 | 0x7fbb0a224380 | 1313592 | 1209562 | 65536 | 164198 | 164198 | 5254336.0 | 4713456.0 | 0.0 | 0.0 | 0.0 | 178.0 | 0.0 | 1049192.0 | 0.0 | 1049371.0 | 0.0 | 610.0 | 1048761.0 | 1049369.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097343.0 | 0.0 | 360.0 | 0.0 | 0.0 | 490736.0 | 1545103.0 | 0.0 | 0.0 | 983048.0 | 0.0 | 2097289.0 | 0.0 | 1430348646.0 | 0.0 | 16299643348666 | 16307562828433 | 16307562922194 | 16299643755117 |
| 20 | 18 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 3u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 53 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 16 | 24 | 24832 | 0x7fbb0abd5b80 | 0x7fbb0a2243c0 | 2471192 | 2366019 | 65536 | 308898 | 308898 | 9884736.0 | 9314207.0 | 0.0 | 0.0 | 0.0 | 275.0 | 0.0 | 2340950.0 | 0.0 | 2342230.0 | 0.0 | 244795.0 | 2097435.0 | 2360699.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194581.0 | 0.0 | 528.0 | 0.0 | 0.0 | 729779.0 | 2382406.0 | 0.0 | 0.0 | 2031625.0 | 0.0 | 4194537.0 | 0.0 | 2761895047.0 | 0.0 | 16299643839865 | 16307562952274 | 16307563131155 | 16299644337014 |
| 21 | 19 | void benchmark_func<double, 256, 8u, 3u>(double, double*) [clone .kd] | 0 | 0 | 56 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 16 | 24 | 25536 | 0x7fbb0abd5a00 | 0x7fbb0a224400 | 2463720 | 2356011 | 65536 | 307964 | 307964 | 9854848.0 | 9273743.0 | 0.0 | 0.0 | 0.0 | 279.0 | 0.0 | 2347391.0 | 0.0 | 2371210.0 | 0.0 | 273801.0 | 2097409.0 | 2338762.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194581.0 | 0.0 | 532.0 | 0.0 | 0.0 | 662347.0 | 2184703.0 | 0.0 | 0.0 | 2031624.0 | 0.0 | 4194542.0 | 0.0 | 2800166439.0 | 0.0 | 16299644421462 | 16307563162035 | 16307563342196 | 16299644919481 |
| 22 | 20 | void benchmark_func<__half2, 256, 8u, 3u>(__half2, __half2*) [clone .kd] | 0 | 0 | 59 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26240 | 0x7fbb0abd5880 | 0x7fbb0a224440 | 1315176 | 1208187 | 65536 | 164396 | 164396 | 5260672.0 | 4675140.0 | 0.0 | 0.0 | 0.0 | 185.0 | 0.0 | 1049248.0 | 0.0 | 1049429.0 | 0.0 | 666.0 | 1048763.0 | 1049424.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097345.0 | 0.0 | 362.0 | 0.0 | 0.0 | 496319.0 | 1572274.0 | 0.0 | 0.0 | 983048.0 | 0.0 | 2097291.0 | 0.0 | 1391427562.0 | 0.0 | 16299645003959 | 16307563368757 | 16307563462037 | 16299645427320 |
| 23 | 21 | void benchmark_func<int, 256, 8u, 3u>(int, int*) [clone .kd] | 0 | 0 | 62 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 26944 | 0x7fbb0abd5700 | 0x7fbb0a224480 | 1306088 | 1204635 | 65536 | 163260 | 163260 | 5224320.0 | 4682187.0 | 0.0 | 0.0 | 0.0 | 186.0 | 0.0 | 1049304.0 | 0.0 | 1049487.0 | 0.0 | 721.0 | 1048766.0 | 1049488.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097350.0 | 0.0 | 370.0 | 0.0 | 0.0 | 523819.0 | 1674681.0 | 0.0 | 0.0 | 983049.0 | 0.0 | 2097288.0 | 0.0 | 1362289018.0 | 0.0 | 16299645511768 | 16307563494197 | 16307563587958 | 16299645933648 |
| 24 | 22 | void benchmark_func<float, 256, 8u, 4u>(float, float*) [clone .kd] | 0 | 0 | 65 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 27648 | 0x7fbb0cfbe580 | 0x7fbb0a2244c0 | 1303792 | 1203564 | 65536 | 162973 | 162973 | 5215136.0 | 4676149.0 | 0.0 | 0.0 | 0.0 | 181.0 | 0.0 | 1049192.0 | 0.0 | 1049370.0 | 0.0 | 610.0 | 1048760.0 | 1049374.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097349.0 | 0.0 | 372.0 | 0.0 | 0.0 | 553893.0 | 1698209.0 | 0.0 | 0.0 | 983048.0 | 0.0 | 2097289.0 | 0.0 | 1385606040.0 | 0.0 | 16299646026106 | 16307563622358 | 16307563714199 | 16299646436537 |
| 25 | 23 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 4u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 68 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 16 | 24 | 28608 | 0x7fbb0cfbe400 | 0x7fbb0a224500 | 2470064 | 2365548 | 65536 | 308757 | 308757 | 9880224.0 | 9281799.0 | 0.0 | 0.0 | 0.0 | 275.0 | 0.0 | 2345934.0 | 0.0 | 2352726.0 | 0.0 | 255282.0 | 2097444.0 | 2351572.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194597.0 | 0.0 | 558.0 | 0.0 | 0.0 | 835368.0 | 2799063.0 | 0.0 | 0.0 | 2031625.0 | 0.0 | 4194533.0 | 0.0 | 2604854370.0 | 0.0 | 16299646521655 | 16307563764439 | 16307563942840 | 16299647025334 |
| 26 | 24 | void benchmark_func<double, 256, 8u, 4u>(double, double*) [clone .kd] | 0 | 0 | 71 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 16 | 24 | 29312 | 0x7fbb0cfbe280 | 0x7fbb0a224540 | 2453688 | 2347618 | 65536 | 306710 | 306710 | 9814720.0 | 9227539.0 | 0.0 | 0.0 | 0.0 | 251.0 | 0.0 | 2371361.0 | 0.0 | 2355335.0 | 0.0 | 257910.0 | 2097425.0 | 2346710.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194587.0 | 0.0 | 540.0 | 0.0 | 0.0 | 788490.0 | 2578406.0 | 0.0 | 0.0 | 2031625.0 | 0.0 | 4194522.0 | 0.0 | 2826632402.0 | 0.0 | 16299647110302 | 16307563969240 | 16307564148921 | 16299647610711 |
| 27 | 25 | void benchmark_func<__half2, 256, 8u, 4u>(__half2, __half2*) [clone .kd] | 0 | 0 | 74 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 30016 | 0x7fbb0cfbe100 | 0x7fbb0a224580 | 1300472 | 1200046 | 65536 | 162558 | 162558 | 5201856.0 | 4683584.0 | 0.0 | 0.0 | 0.0 | 182.0 | 0.0 | 1049248.0 | 0.0 | 1049432.0 | 0.0 | 665.0 | 1048767.0 | 1049427.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097342.0 | 0.0 | 356.0 | 0.0 | 0.0 | 412481.0 | 1212400.0 | 0.0 | 0.0 | 983049.0 | 0.0 | 2097291.0 | 0.0 | 1388419700.0 | 0.0 | 16299647695770 | 16307564184122 | 16307564276122 | 16299648110120 |
| 28 | 26 | void benchmark_func<int, 256, 8u, 4u>(int, int*) [clone .kd] | 0 | 0 | 77 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 30976 | 0x7fbb0abd5f80 | 0x7fbb0a2245c0 | 1306784 | 1202151 | 65536 | 163347 | 163347 | 5227104.0 | 4672926.0 | 0.0 | 0.0 | 0.0 | 190.0 | 0.0 | 1049360.0 | 0.0 | 1049547.0 | 0.0 | 776.0 | 1048771.0 | 1049544.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097347.0 | 0.0 | 362.0 | 0.0 | 0.0 | 563054.0 | 1707934.0 | 0.0 | 0.0 | 983050.0 | 0.0 | 2097297.0 | 0.0 | 1400855381.0 | 0.0 | 16299648196208 | 16307564309562 | 16307564402043 | 16299648612549 |
| 29 | 27 | void benchmark_func<float, 256, 8u, 5u>(float, float*) [clone .kd] | 0 | 0 | 80 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 31680 | 0x7fbb0abd5e00 | 0x7fbb0a224600 | 1306048 | 1201303 | 65536 | 163255 | 163255 | 5224160.0 | 4674595.0 | 0.0 | 0.0 | 0.0 | 182.0 | 0.0 | 1049304.0 | 0.0 | 1049486.0 | 0.0 | 721.0 | 1048765.0 | 1049482.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097344.0 | 0.0 | 358.0 | 0.0 | 0.0 | 552302.0 | 1640431.0 | 0.0 | 0.0 | 983049.0 | 0.0 | 2097275.0 | 0.0 | 1404319625.0 | 0.0 | 16299648704977 | 16307564435963 | 16307564528604 | 16299649115398 |
| 30 | 28 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 5u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 83 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 16 | 24 | 32640 | 0x7fbb0abd5c80 | 0x7fbb0a224640 | 2450984 | 2345626 | 65536 | 306372 | 306372 | 9803904.0 | 9270827.0 | 0.0 | 0.0 | 0.0 | 253.0 | 0.0 | 2349923.0 | 0.0 | 2353107.0 | 0.0 | 255685.0 | 2097422.0 | 2366836.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194592.0 | 0.0 | 548.0 | 0.0 | 0.0 | 555873.0 | 1783735.0 | 0.0 | 0.0 | 2031626.0 | 0.0 | 4194524.0 | 0.0 | 2610848408.0 | 0.0 | 16299649200256 | 16307564565084 | 16307564745725 | 16299649702565 |
| 31 | 29 | void benchmark_func<double, 256, 8u, 5u>(double, double*) [clone .kd] | 0 | 0 | 86 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 33600 | 0x7fbb0abd5b00 | 0x7fbb0a224680 | 2460608 | 2356123 | 65536 | 307575 | 307575 | 9842400.0 | 9270435.0 | 0.0 | 0.0 | 0.0 | 271.0 | 0.0 | 2342994.0 | 0.0 | 2352360.0 | 0.0 | 254941.0 | 2097419.0 | 2356013.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194593.0 | 0.0 | 550.0 | 0.0 | 0.0 | 697015.0 | 2273161.0 | 0.0 | 0.0 | 2031625.0 | 0.0 | 4194525.0 | 0.0 | 2613043764.0 | 0.0 | 16299649787683 | 16307564770045 | 16307564951326 | 16299650295572 |
| 32 | 30 | void benchmark_func<__half2, 256, 8u, 5u>(__half2, __half2*) [clone .kd] | 0 | 0 | 89 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 34560 | 0x7fbb0abd5980 | 0x7fbb0a2246c0 | 1299904 | 1198470 | 65536 | 162487 | 162487 | 5199584.0 | 4650493.0 | 0.0 | 0.0 | 0.0 | 182.0 | 0.0 | 1049360.0 | 0.0 | 1049538.0 | 0.0 | 777.0 | 1048761.0 | 1049546.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097350.0 | 0.0 | 368.0 | 0.0 | 0.0 | 540213.0 | 1533915.0 | 0.0 | 0.0 | 983049.0 | 0.0 | 2097288.0 | 0.0 | 1458566522.0 | 0.0 | 16299650379780 | 16307564974846 | 16307565067967 | 16299650795501 |
| 33 | 31 | void benchmark_func<int, 256, 8u, 5u>(int, int*) [clone .kd] | 0 | 0 | 92 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 35520 | 0x7fbb0abd5800 | 0x7fbb0a224700 | 1306032 | 1205032 | 65536 | 163253 | 163253 | 5224096.0 | 4661100.0 | 0.0 | 0.0 | 0.0 | 188.0 | 0.0 | 1049472.0 | 0.0 | 1049655.0 | 0.0 | 888.0 | 1048767.0 | 1049657.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097355.0 | 0.0 | 374.0 | 0.0 | 0.0 | 607920.0 | 1843427.0 | 0.0 | 0.0 | 983051.0 | 0.0 | 2097295.0 | 0.0 | 1422081337.0 | 0.0 | 16299650880449 | 16307565095007 | 16307565188928 | 16299651295810 |
| 34 | 32 | void benchmark_func<float, 256, 8u, 6u>(float, float*) [clone .kd] | 0 | 0 | 95 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 36224 | 0x7fbb0abd5680 | 0x7fbb0a224740 | 1307408 | 1205656 | 65536 | 163425 | 163425 | 5229600.0 | 4687164.0 | 0.0 | 0.0 | 0.0 | 188.0 | 0.0 | 1049304.0 | 0.0 | 1049485.0 | 0.0 | 721.0 | 1048764.0 | 1049487.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097348.0 | 0.0 | 366.0 | 0.0 | 0.0 | 532420.0 | 1580998.0 | 0.0 | 0.0 | 983049.0 | 0.0 | 2097288.0 | 0.0 | 1435937693.0 | 0.0 | 16299651387628 | 16307565221408 | 16307565314529 | 16299651790649 |
| 35 | 33 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 6u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 98 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 37184 | 0x7fbb0cfbe500 | 0x7fbb0a224780 | 2446320 | 2342043 | 65536 | 305789 | 305789 | 9785248.0 | 9250265.0 | 0.0 | 0.0 | 0.0 | 281.0 | 0.0 | 2327737.0 | 0.0 | 2365746.0 | 0.0 | 268310.0 | 2097436.0 | 2362630.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194587.0 | 0.0 | 536.0 | 0.0 | 0.0 | 703763.0 | 2275861.0 | 0.0 | 0.0 | 2031626.0 | 0.0 | 4194527.0 | 0.0 | 2596613729.0 | 0.0 | 16299651875617 | 16307565339489 | 16307565518850 | 16299652375656 |
| 36 | 34 | void benchmark_func<double, 256, 8u, 6u>(double, double*) [clone .kd] | 0 | 0 | 101 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 38144 | 0x7fbb0cfbe380 | 0x7fbb0a2247c0 | 2438864 | 2331921 | 65536 | 304857 | 304857 | 9755424.0 | 9193848.0 | 0.0 | 0.0 | 0.0 | 262.0 | 0.0 | 2347257.0 | 0.0 | 2344369.0 | 0.0 | 246919.0 | 2097450.0 | 2340639.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194571.0 | 0.0 | 504.0 | 0.0 | 0.0 | 592039.0 | 1929197.0 | 0.0 | 0.0 | 2031626.0 | 0.0 | 4194530.0 | 0.0 | 2722919972.0 | 0.0 | 16299652460534 | 16307565543170 | 16307565724931 | 16299652963933 |
| 37 | 35 | void benchmark_func<__half2, 256, 8u, 6u>(__half2, __half2*) [clone .kd] | 0 | 0 | 104 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 39104 | 0x7fbb0cfbe200 | 0x7fbb0a224800 | 1306032 | 1200582 | 65536 | 163253 | 163253 | 5224096.0 | 4661778.0 | 0.0 | 0.0 | 0.0 | 187.0 | 0.0 | 1049416.0 | 0.0 | 1049599.0 | 0.0 | 832.0 | 1048767.0 | 1049601.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097354.0 | 0.0 | 374.0 | 0.0 | 0.0 | 500811.0 | 1638113.0 | 0.0 | 0.0 | 983050.0 | 0.0 | 2097294.0 | 0.0 | 1410028342.0 | 0.0 | 16299653048341 | 16307565751491 | 16307565845092 | 16299653460502 |
| 38 | 36 | void benchmark_func<int, 256, 8u, 6u>(int, int*) [clone .kd] | 0 | 0 | 107 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 40320 | 0x7fbb0cfbe080 | 0x7fbb0a224840 | 1314672 | 1202114 | 65536 | 164333 | 164333 | 5258656.0 | 4663702.0 | 0.0 | 0.0 | 0.0 | 188.0 | 0.0 | 1049584.0 | 0.0 | 1049766.0 | 0.0 | 999.0 | 1048767.0 | 1049776.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097348.0 | 0.0 | 356.0 | 0.0 | 0.0 | 499824.0 | 1557803.0 | 0.0 | 0.0 | 983051.0 | 0.0 | 2097298.0 | 0.0 | 1370345240.0 | 0.0 | 16299653545320 | 16307565869252 | 16307565963813 | 16299653971131 |
| 39 | 37 | void benchmark_func<float, 256, 8u, 7u>(float, float*) [clone .kd] | 0 | 0 | 110 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 41280 | 0x7fbb0abd5f00 | 0x7fbb0a224880 | 1304432 | 1199825 | 65536 | 163053 | 163053 | 5217696.0 | 4660299.0 | 0.0 | 0.0 | 0.0 | 182.0 | 0.0 | 1049360.0 | 0.0 | 1049538.0 | 0.0 | 777.0 | 1048761.0 | 1049547.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097347.0 | 0.0 | 362.0 | 0.0 | 0.0 | 561980.0 | 1763211.0 | 0.0 | 0.0 | 983049.0 | 0.0 | 2097297.0 | 0.0 | 1389728303.0 | 0.0 | 16299654064169 | 16307565995973 | 16307566088613 | 16299654482130 |
| 40 | 38 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 7u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 113 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 42240 | 0x7fbb0abd5d80 | 0x7fbb0a2248c0 | 2456640 | 2349791 | 65536 | 307079 | 307079 | 9826528.0 | 9254730.0 | 0.0 | 0.0 | 0.0 | 260.0 | 0.0 | 2353027.0 | 0.0 | 2339187.0 | 0.0 | 241767.0 | 2097420.0 | 2364955.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194591.0 | 0.0 | 540.0 | 0.0 | 0.0 | 622185.0 | 2077224.0 | 0.0 | 0.0 | 2031627.0 | 0.0 | 4194535.0 | 0.0 | 2533295148.0 | 0.0 | 16299654566898 | 16307566116453 | 16307566295655 | 16299655069997 |
| 41 | 39 | void benchmark_func<double, 256, 8u, 7u>(double, double*) [clone .kd] | 0 | 0 | 116 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 43200 | 0x7fbb0abd5c00 | 0x7fbb0a224900 | 2452624 | 2347659 | 65536 | 306577 | 306577 | 9810464.0 | 9276307.0 | 0.0 | 0.0 | 0.0 | 277.0 | 0.0 | 2336779.0 | 0.0 | 2356637.0 | 0.0 | 259211.0 | 2097426.0 | 2344078.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194583.0 | 0.0 | 528.0 | 0.0 | 0.0 | 678291.0 | 2226645.0 | 0.0 | 0.0 | 2031626.0 | 0.0 | 4194522.0 | 0.0 | 2576827993.0 | 0.0 | 16299655155035 | 16307566321415 | 16307566502376 | 16299655656864 |
| 42 | 40 | void benchmark_func<__half2, 256, 8u, 7u>(__half2, __half2*) [clone .kd] | 0 | 0 | 119 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 44160 | 0x7fbb0abd5a80 | 0x7fbb0a224940 | 1308496 | 1202302 | 65536 | 163561 | 163561 | 5233952.0 | 4669876.0 | 0.0 | 0.0 | 0.0 | 189.0 | 0.0 | 1049472.0 | 0.0 | 1049656.0 | 0.0 | 888.0 | 1048768.0 | 1049653.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097351.0 | 0.0 | 366.0 | 0.0 | 0.0 | 468713.0 | 1436709.0 | 0.0 | 0.0 | 983050.0 | 0.0 | 2097297.0 | 0.0 | 1406387039.0 | 0.0 | 16299655740942 | 16307566528296 | 16307566622217 | 16299656160983 |
| 43 | 41 | void benchmark_func<int, 256, 8u, 7u>(int, int*) [clone .kd] | 0 | 0 | 122 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 45376 | 0x7fbb0abd5900 | 0x7fbb0a224980 | 1308896 | 1197510 | 65536 | 163611 | 163611 | 5235552.0 | 4616242.0 | 0.0 | 0.0 | 0.0 | 187.0 | 0.0 | 1049640.0 | 0.0 | 1049825.0 | 0.0 | 1054.0 | 1048771.0 | 1049830.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097356.0 | 0.0 | 370.0 | 0.0 | 0.0 | 559973.0 | 1601309.0 | 0.0 | 0.0 | 983052.0 | 0.0 | 2097301.0 | 0.0 | 1438182099.0 | 0.0 | 16299656245061 | 16307566645577 | 16307566740297 | 16299656661022 |
| 44 | 42 | void benchmark_func<float, 256, 8u, 8u>(float, float*) [clone .kd] | 0 | 0 | 125 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 46336 | 0x7fbb0abd5780 | 0x7fbb0a2249c0 | 1300576 | 1200480 | 65536 | 162571 | 162571 | 5202272.0 | 4645495.0 | 0.0 | 0.0 | 0.0 | 184.0 | 0.0 | 1049416.0 | 0.0 | 1049601.0 | 0.0 | 832.0 | 1048769.0 | 1049604.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097357.0 | 0.0 | 380.0 | 0.0 | 0.0 | 484049.0 | 1449888.0 | 0.0 | 0.0 | 983050.0 | 0.0 | 2097301.0 | 0.0 | 1397499280.0 | 0.0 | 16299656753310 | 16307566772938 | 16307566866058 | 16299657166360 |
| 45 | 43 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 8u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 128 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 47552 | 0x7fbb0abd5600 | 0x7fbb0a224a00 | 2467024 | 2349110 | 65536 | 308377 | 308377 | 9868064.0 | 9263334.0 | 0.0 | 0.0 | 0.0 | 266.0 | 0.0 | 2352038.0 | 0.0 | 2378272.0 | 0.0 | 280849.0 | 2097423.0 | 2359752.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194588.0 | 0.0 | 532.0 | 0.0 | 0.0 | 594617.0 | 1928466.0 | 0.0 | 0.0 | 2031627.0 | 0.0 | 4194526.0 | 0.0 | 2637049899.0 | 0.0 | 16299657250519 | 16307566891338 | 16307567071499 | 16299657755967 |
| 46 | 44 | void benchmark_func<double, 256, 8u, 8u>(double, double*) [clone .kd] | 0 | 0 | 131 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 48512 | 0x7fbb0cfbe480 | 0x7fbb0a224a40 | 2418968 | 2309752 | 65536 | 302370 | 302370 | 9675840.0 | 9113202.0 | 0.0 | 0.0 | 0.0 | 257.0 | 0.0 | 2355202.0 | 0.0 | 2355499.0 | 0.0 | 258084.0 | 2097415.0 | 2351561.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194561.0 | 0.0 | 480.0 | 0.0 | 0.0 | 666031.0 | 2164559.0 | 0.0 | 0.0 | 2031627.0 | 0.0 | 4194509.0 | 0.0 | 2728705551.0 | 0.0 | 16299657840516 | 16307567095019 | 16307567275821 | 16299658350234 |
| 47 | 45 | void benchmark_func<__half2, 256, 8u, 8u>(__half2, __half2*) [clone .kd] | 0 | 0 | 134 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 49472 | 0x7fbb0cfbe300 | 0x7fbb0a224a80 | 1289360 | 1186214 | 65536 | 161169 | 161169 | 5157408.0 | 4624854.0 | 0.0 | 0.0 | 0.0 | 184.0 | 0.0 | 1049472.0 | 0.0 | 1049654.0 | 0.0 | 887.0 | 1048767.0 | 1049660.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097352.0 | 0.0 | 368.0 | 0.0 | 0.0 | 545667.0 | 1603001.0 | 0.0 | 0.0 | 983051.0 | 0.0 | 2097292.0 | 0.0 | 1427694366.0 | 0.0 | 16299658435712 | 16307567299021 | 16307567392141 | 16299658857553 |
| 48 | 46 | void benchmark_func<int, 256, 8u, 8u>(int, int*) [clone .kd] | 0 | 0 | 137 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 50688 | 0x7fbb0cfbe180 | 0x7fbb0a224ac0 | 1307744 | 1200046 | 65536 | 163467 | 163467 | 5230944.0 | 4657372.0 | 0.0 | 0.0 | 0.0 | 190.0 | 0.0 | 1049696.0 | 0.0 | 1049884.0 | 0.0 | 1109.0 | 1048775.0 | 1049882.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097362.0 | 0.0 | 380.0 | 0.0 | 0.0 | 426717.0 | 1310304.0 | 0.0 | 0.0 | 983053.0 | 0.0 | 2097302.0 | 0.0 | 1397929489.0 | 0.0 | 16299658942231 | 16307567416621 | 16307567509582 | 16299659356202 |
| 49 | 47 | void benchmark_func<float, 256, 8u, 9u>(float, float*) [clone .kd] | 0 | 0 | 140 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 51648 | 0x7fbb0cfbe000 | 0x7fbb0a224b00 | 1288784 | 1188184 | 65536 | 161097 | 161097 | 5155104.0 | 4639782.0 | 0.0 | 0.0 | 0.0 | 184.0 | 0.0 | 1049416.0 | 0.0 | 1049602.0 | 0.0 | 832.0 | 1048770.0 | 1049598.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097353.0 | 0.0 | 372.0 | 0.0 | 0.0 | 536622.0 | 1554281.0 | 0.0 | 0.0 | 983050.0 | 0.0 | 2097294.0 | 0.0 | 1419422071.0 | 0.0 | 16299659450130 | 16307567541902 | 16307567634543 | 16299659858471 |
| 50 | 48 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 9u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 143 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 52864 | 0x7fbb0abd5e80 | 0x7fbb0a224b40 | 2435880 | 2329274 | 65536 | 304484 | 304484 | 9743488.0 | 9189007.0 | 0.0 | 0.0 | 0.0 | 278.0 | 0.0 | 2369373.0 | 0.0 | 2351207.0 | 0.0 | 253788.0 | 2097419.0 | 2361194.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194589.0 | 0.0 | 532.0 | 0.0 | 0.0 | 758073.0 | 2523226.0 | 0.0 | 0.0 | 2031628.0 | 0.0 | 4194506.0 | 0.0 | 2634921195.0 | 0.0 | 16299659942779 | 16307567660303 | 16307567840144 | 16299660452058 |
| 51 | 49 | void benchmark_func<double, 256, 8u, 9u>(double, double*) [clone .kd] | 0 | 0 | 146 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 54080 | 0x7fbb0abd5d00 | 0x7fbb0a224b80 | 2414200 | 2304924 | 65536 | 301774 | 301774 | 9656768.0 | 9071234.0 | 0.0 | 0.0 | 0.0 | 248.0 | 0.0 | 2360193.0 | 0.0 | 2375421.0 | 0.0 | 277990.0 | 2097431.0 | 2355080.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194581.0 | 0.0 | 518.0 | 0.0 | 0.0 | 650071.0 | 2132911.0 | 0.0 | 0.0 | 2031627.0 | 0.0 | 4194520.0 | 0.0 | 2721050564.0 | 0.0 | 16299660536496 | 16307567864464 | 16307568044945 | 16299661035935 |
| 52 | 50 | void benchmark_func<__half2, 256, 8u, 9u>(__half2, __half2*) [clone .kd] | 0 | 0 | 149 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 55296 | 0x7fbb0abd5b80 | 0x7fbb0a224bc0 | 1298200 | 1198585 | 65536 | 162274 | 162274 | 5192768.0 | 4629348.0 | 0.0 | 0.0 | 0.0 | 183.0 | 0.0 | 1049584.0 | 0.0 | 1049771.0 | 0.0 | 999.0 | 1048772.0 | 1049771.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097357.0 | 0.0 | 374.0 | 0.0 | 0.0 | 576535.0 | 1618715.0 | 0.0 | 0.0 | 983051.0 | 0.0 | 2097300.0 | 0.0 | 1403166421.0 | 0.0 | 16299661120723 | 16307568068625 | 16307568162386 | 16299661543614 |
| 53 | 51 | void benchmark_func<int, 256, 8u, 9u>(int, int*) [clone .kd] | 0 | 0 | 152 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 56768 | 0x7fbb0abd5a00 | 0x7fbb0a224c00 | 1312464 | 1203264 | 65536 | 164057 | 164057 | 5249824.0 | 4666329.0 | 0.0 | 0.0 | 0.0 | 184.0 | 0.0 | 1049808.0 | 0.0 | 1049991.0 | 0.0 | 1221.0 | 1048770.0 | 1049998.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097364.0 | 0.0 | 380.0 | 0.0 | 0.0 | 343795.0 | 1121030.0 | 0.0 | 0.0 | 983053.0 | 0.0 | 2097308.0 | 0.0 | 1297117453.0 | 0.0 | 16299661628822 | 16307568186546 | 16307568282227 | 16299662049553 |
| 54 | 52 | void benchmark_func<float, 256, 8u, 10u>(float, float*) [clone .kd] | 0 | 0 | 155 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 57728 | 0x7fbb0abd5880 | 0x7fbb0a224c40 | 1306848 | 1196285 | 65536 | 163355 | 163355 | 5227360.0 | 4640967.0 | 0.0 | 0.0 | 0.0 | 187.0 | 0.0 | 1049528.0 | 0.0 | 1049715.0 | 0.0 | 943.0 | 1048772.0 | 1049714.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097353.0 | 0.0 | 368.0 | 0.0 | 0.0 | 442169.0 | 1483574.0 | 0.0 | 0.0 | 983051.0 | 0.0 | 2097294.0 | 0.0 | 1431487423.0 | 0.0 | 16299662142251 | 16307568314547 | 16307568408468 | 16299662565562 |
| 55 | 53 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 10u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 158 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 58944 | 0x7fbb0abd5700 | 0x7fbb0a224c80 | 2433448 | 2327807 | 65536 | 304180 | 304180 | 9733760.0 | 9170691.0 | 0.0 | 0.0 | 0.0 | 236.0 | 0.0 | 2355031.0 | 0.0 | 2360999.0 | 0.0 | 263592.0 | 2097407.0 | 2370059.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194585.0 | 0.0 | 524.0 | 0.0 | 0.0 | 650412.0 | 2093718.0 | 0.0 | 0.0 | 2031628.0 | 0.0 | 4194511.0 | 0.0 | 2690677322.0 | 0.0 | 16299662650570 | 16307568433268 | 16307568612469 | 16299663158448 |
| 56 | 54 | void benchmark_func<double, 256, 8u, 10u>(double, double*) [clone .kd] | 0 | 0 | 161 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 60160 | 0x7fbb0cfbe580 | 0x7fbb0a224cc0 | 2420488 | 2314628 | 65536 | 302560 | 302560 | 9681920.0 | 9099377.0 | 0.0 | 0.0 | 0.0 | 241.0 | 0.0 | 2365919.0 | 0.0 | 2355116.0 | 0.0 | 257700.0 | 2097416.0 | 2339678.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194574.0 | 0.0 | 502.0 | 0.0 | 0.0 | 685658.0 | 2228327.0 | 0.0 | 0.0 | 2031628.0 | 0.0 | 4194516.0 | 0.0 | 2752380634.0 | 0.0 | 16299663243097 | 16307568636789 | 16307568817430 | 16299663762005 |
| 57 | 55 | void benchmark_func<__half2, 256, 8u, 10u>(__half2, __half2*) [clone .kd] | 0 | 0 | 164 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 61376 | 0x7fbb0cfbe400 | 0x7fbb0a224d00 | 1298488 | 1196996 | 65536 | 162310 | 162310 | 5193920.0 | 4630305.0 | 0.0 | 0.0 | 0.0 | 185.0 | 0.0 | 1049640.0 | 0.0 | 1049827.0 | 0.0 | 1054.0 | 1048773.0 | 1049828.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097355.0 | 0.0 | 368.0 | 0.0 | 0.0 | 445367.0 | 1348182.0 | 0.0 | 0.0 | 983052.0 | 0.0 | 2097301.0 | 0.0 | 1367854963.0 | 0.0 | 16299663847613 | 16307568842230 | 16307568937751 | 16299664267374 |
| 58 | 56 | void benchmark_func<int, 256, 8u, 10u>(int, int*) [clone .kd] | 0 | 0 | 167 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 62848 | 0x7fbb0cfbe280 | 0x7fbb0a224d40 | 1343448 | 1238044 | 65536 | 167930 | 167930 | 5373760.0 | 4791805.0 | 0.0 | 0.0 | 0.0 | 184.0 | 0.0 | 1049920.0 | 0.0 | 1050099.0 | 0.0 | 1332.0 | 1048767.0 | 1050109.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097355.0 | 0.0 | 358.0 | 0.0 | 0.0 | 210209.0 | 715035.0 | 0.0 | 0.0 | 983054.0 | 0.0 | 2097302.0 | 0.0 | 1107728420.0 | 0.0 | 16299664351782 | 16307568962711 | 16307569062712 | 16299664774533 |
| 59 | 57 | void benchmark_func<float, 256, 8u, 11u>(float, float*) [clone .kd] | 0 | 0 | 170 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 63808 | 0x7fbb0cfbe100 | 0x7fbb0a224d80 | 1301312 | 1192989 | 65536 | 162663 | 162663 | 5205216.0 | 4625878.0 | 0.0 | 0.0 | 0.0 | 187.0 | 0.0 | 1049528.0 | 0.0 | 1049711.0 | 0.0 | 943.0 | 1048768.0 | 1049717.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097357.0 | 0.0 | 376.0 | 0.0 | 0.0 | 512723.0 | 1586467.0 | 0.0 | 0.0 | 983051.0 | 0.0 | 2097302.0 | 0.0 | 1431882214.0 | 0.0 | 16299664867300 | 16307569095992 | 16307569189912 | 16299665281011 |
| 60 | 58 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 11u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 173 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 65024 | 0x7fbb0abd5f80 | 0x7fbb0a224dc0 | 2430416 | 2322489 | 65536 | 303801 | 303801 | 9721632.0 | 9164884.0 | 0.0 | 0.0 | 0.0 | 245.0 | 0.0 | 2349293.0 | 0.0 | 2349865.0 | 0.0 | 252455.0 | 2097410.0 | 2374059.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194564.0 | 0.0 | 480.0 | 0.0 | 0.0 | 658317.0 | 2174604.0 | 0.0 | 0.0 | 2031629.0 | 0.0 | 4194518.0 | 0.0 | 2726518981.0 | 0.0 | 16299665365349 | 16307569218713 | 16307569397754 | 16299665875168 |
| 61 | 59 | void benchmark_func<double, 256, 8u, 11u>(double, double*) [clone .kd] | 0 | 0 | 176 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 66240 | 0x7fbb0abd5e00 | 0x7fbb0a224e00 | 2396872 | 2290442 | 65536 | 299608 | 299608 | 9587456.0 | 9061341.0 | 0.0 | 0.0 | 0.0 | 242.0 | 0.0 | 2363140.0 | 0.0 | 2367120.0 | 0.0 | 269727.0 | 2097393.0 | 2382147.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194580.0 | 0.0 | 514.0 | 0.0 | 0.0 | 720614.0 | 2372827.0 | 0.0 | 0.0 | 2031628.0 | 0.0 | 4194521.0 | 0.0 | 2813126784.0 | 0.0 | 16299665959236 | 16307569424154 | 16307569602715 | 16299666465465 |
| 62 | 60 | void benchmark_func<__half2, 256, 8u, 11u>(__half2, __half2*) [clone .kd] | 0 | 0 | 179 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 67456 | 0x7fbb0abd5c80 | 0x7fbb0a224e40 | 1300720 | 1195846 | 65536 | 162589 | 162589 | 5202848.0 | 4618726.0 | 0.0 | 0.0 | 0.0 | 191.0 | 0.0 | 1049696.0 | 0.0 | 1049880.0 | 0.0 | 1110.0 | 1048770.0 | 1049886.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097355.0 | 0.0 | 366.0 | 0.0 | 0.0 | 610948.0 | 1855747.0 | 0.0 | 0.0 | 983052.0 | 0.0 | 2097300.0 | 0.0 | 1389080936.0 | 0.0 | 16299666549343 | 16307569628635 | 16307569723196 | 16299666986544 |
| 63 | 61 | void benchmark_func<int, 256, 8u, 11u>(int, int*) [clone .kd] | 0 | 0 | 182 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 68928 | 0x7fbb0abd5b00 | 0x7fbb0a224e80 | 1412856 | 1311615 | 65536 | 176606 | 176606 | 5651392.0 | 5079715.0 | 0.0 | 0.0 | 0.0 | 183.0 | 0.0 | 1049976.0 | 0.0 | 1050164.0 | 0.0 | 1387.0 | 1048777.0 | 1050159.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097356.0 | 0.0 | 358.0 | 0.0 | 0.0 | 287898.0 | 897546.0 | 0.0 | 0.0 | 983055.0 | 0.0 | 2097296.0 | 0.0 | 1076649688.0 | 0.0 | 16299667071032 | 16307569749276 | 16307569853596 | 16299667507182 |
| 64 | 62 | void benchmark_func<float, 256, 8u, 12u>(float, float*) [clone .kd] | 0 | 0 | 185 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 70144 | 0x7fbb0abd5980 | 0x7fbb0a224ec0 | 1288512 | 1182375 | 65536 | 161063 | 161063 | 5154016.0 | 4601323.0 | 0.0 | 0.0 | 0.0 | 188.0 | 0.0 | 1049528.0 | 0.0 | 1049713.0 | 0.0 | 943.0 | 1048770.0 | 1049713.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097355.0 | 0.0 | 372.0 | 0.0 | 0.0 | 632397.0 | 1698914.0 | 0.0 | 0.0 | 983051.0 | 0.0 | 2097296.0 | 0.0 | 1437664420.0 | 0.0 | 16299667599770 | 16307569887517 | 16307569980477 | 16299668033151 |
| 65 | 63 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 12u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 188 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 71616 | 0x7fbb0abd5800 | 0x7fbb0a224f00 | 2424208 | 2316362 | 65536 | 303025 | 303025 | 9696800.0 | 9102169.0 | 0.0 | 0.0 | 0.0 | 221.0 | 0.0 | 2376858.0 | 0.0 | 2350213.0 | 0.0 | 252829.0 | 2097384.0 | 2325100.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194540.0 | 0.0 | 428.0 | 0.0 | 0.0 | 587330.0 | 1928322.0 | 0.0 | 0.0 | 2031629.0 | 0.0 | 4194491.0 | 0.0 | 2740795721.0 | 0.0 | 16299668117769 | 16307570006077 | 16307570184798 | 16299668624678 |
| 66 | 64 | void benchmark_func<double, 256, 8u, 12u>(double, double*) [clone .kd] | 0 | 0 | 191 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 72832 | 0x7fbb0abd5680 | 0x7fbb0a224f40 | 2414520 | 2310163 | 65536 | 301814 | 301814 | 9658048.0 | 9106529.0 | 0.0 | 0.0 | 0.0 | 232.0 | 0.0 | 2375635.0 | 0.0 | 2361831.0 | 0.0 | 264438.0 | 2097393.0 | 2359148.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194565.0 | 0.0 | 480.0 | 0.0 | 0.0 | 589068.0 | 1944240.0 | 0.0 | 0.0 | 2031629.0 | 0.0 | 4194508.0 | 0.0 | 2630582607.0 | 0.0 | 16299668708466 | 16307570213119 | 16307570392000 | 16299669221684 |
| 67 | 65 | void benchmark_func<__half2, 256, 8u, 12u>(__half2, __half2*) [clone .kd] | 0 | 0 | 194 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 74048 | 0x7fbb0cfbe500 | 0x7fbb0a224f80 | 1296024 | 1192863 | 65536 | 162002 | 162002 | 5184064.0 | 4615597.0 | 0.0 | 0.0 | 0.0 | 185.0 | 0.0 | 1049696.0 | 0.0 | 1049879.0 | 0.0 | 1109.0 | 1048770.0 | 1049886.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097359.0 | 0.0 | 374.0 | 0.0 | 0.0 | 526968.0 | 1628984.0 | 0.0 | 0.0 | 983053.0 | 0.0 | 2097300.0 | 0.0 | 1407104501.0 | 0.0 | 16299669305723 | 16307570415680 | 16307570509120 | 16299669734113 |
| 68 | 66 | void benchmark_func<int, 256, 8u, 12u>(int, int*) [clone .kd] | 0 | 0 | 197 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 75776 | 0x7fbb0cfbe380 | 0x7fbb0a224fc0 | 1467696 | 1353127 | 65536 | 183461 | 183461 | 5870752.0 | 5320963.0 | 0.0 | 0.0 | 0.0 | 180.0 | 0.0 | 1050032.0 | 0.0 | 1050204.0 | 0.0 | 1442.0 | 1048762.0 | 1050208.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097355.0 | 0.0 | 354.0 | 0.0 | 0.0 | 42544.0 | 172920.0 | 0.0 | 0.0 | 983056.0 | 0.0 | 2097290.0 | 0.0 | 901174814.0 | 0.0 | 16299669819141 | 16307570533281 | 16307570640961 | 16299670280171 |
| 69 | 67 | void benchmark_func<float, 256, 8u, 13u>(float, float*) [clone .kd] | 0 | 0 | 200 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 76992 | 0x7fbb0cfbe200 | 0x7fbb0a225000 | 1301232 | 1190953 | 65536 | 162653 | 162653 | 5204896.0 | 4616910.0 | 0.0 | 0.0 | 0.0 | 186.0 | 0.0 | 1049640.0 | 0.0 | 1049827.0 | 0.0 | 1054.0 | 1048773.0 | 1049823.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097360.0 | 0.0 | 378.0 | 0.0 | 0.0 | 416535.0 | 1358687.0 | 0.0 | 0.0 | 983052.0 | 0.0 | 2097299.0 | 0.0 | 1411404529.0 | 0.0 | 16299670385349 | 16307570673121 | 16307570767682 | 16299670807799 |
| 70 | 68 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 13u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 203 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 78464 | 0x7fbb0cfbe080 | 0x7fbb0a225040 | 2410176 | 2301561 | 65536 | 301271 | 301271 | 9640672.0 | 9022071.0 | 0.0 | 0.0 | 0.0 | 226.0 | 0.0 | 2364359.0 | 0.0 | 2350430.0 | 0.0 | 253045.0 | 2097385.0 | 2363347.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194569.0 | 0.0 | 484.0 | 0.0 | 0.0 | 566147.0 | 1838422.0 | 0.0 | 0.0 | 2031630.0 | 0.0 | 4194516.0 | 0.0 | 2796304324.0 | 0.0 | 16299670892468 | 16307570792002 | 16307570971203 | 16299671427246 |
| 71 | 69 | void benchmark_func<double, 256, 8u, 13u>(double, double*) [clone .kd] | 0 | 0 | 206 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 79936 | 0x7fbb0abd5f00 | 0x7fbb0a225080 | 2417008 | 2309222 | 65536 | 302125 | 302125 | 9668000.0 | 9096636.0 | 0.0 | 0.0 | 0.0 | 227.0 | 0.0 | 2363373.0 | 0.0 | 2368082.0 | 0.0 | 270685.0 | 2097397.0 | 2356413.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194543.0 | 0.0 | 434.0 | 0.0 | 0.0 | 550912.0 | 1802857.0 | 0.0 | 0.0 | 2031629.0 | 0.0 | 4194507.0 | 0.0 | 2690521674.0 | 0.0 | 16299671511884 | 16307570996003 | 16307571174724 | 16299672038332 |
| 72 | 70 | void benchmark_func<__half2, 256, 8u, 13u>(__half2, __half2*) [clone .kd] | 0 | 0 | 209 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 81408 | 0x7fbb0abd5d80 | 0x7fbb0a2250c0 | 1300192 | 1189155 | 65536 | 162523 | 162523 | 5200736.0 | 4604843.0 | 0.0 | 0.0 | 0.0 | 189.0 | 0.0 | 1049808.0 | 0.0 | 1049993.0 | 0.0 | 1221.0 | 1048772.0 | 1049988.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097364.0 | 0.0 | 380.0 | 0.0 | 0.0 | 598703.0 | 1789793.0 | 0.0 | 0.0 | 983053.0 | 0.0 | 2097303.0 | 0.0 | 1403759978.0 | 0.0 | 16299672123350 | 16307571199525 | 16307571293925 | 16299672559471 |
| 73 | 71 | void benchmark_func<int, 256, 8u, 13u>(int, int*) [clone .kd] | 0 | 0 | 212 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 83136 | 0x7fbb0abd5c00 | 0x7fbb0a225100 | 1536464 | 1427128 | 65536 | 192057 | 192057 | 6145824.0 | 5547972.0 | 0.0 | 0.0 | 0.0 | 169.0 | 0.0 | 1050144.0 | 0.0 | 1050313.0 | 0.0 | 1554.0 | 1048759.0 | 1050318.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097356.0 | 0.0 | 352.0 | 0.0 | 0.0 | 26818.0 | 128442.0 | 0.0 | 0.0 | 983056.0 | 0.0 | 2097300.0 | 0.0 | 780102848.0 | 0.0 | 16299672644739 | 16307571317925 | 16307571433286 | 16299673091479 |
| 74 | 72 | void benchmark_func<float, 256, 8u, 14u>(float, float*) [clone .kd] | 0 | 0 | 215 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 84352 | 0x7fbb0abd5a80 | 0x7fbb0a225140 | 1288832 | 1185822 | 65536 | 161103 | 161103 | 5155296.0 | 4601418.0 | 0.0 | 0.0 | 0.0 | 188.0 | 0.0 | 1049696.0 | 0.0 | 1049880.0 | 0.0 | 1110.0 | 1048770.0 | 1049882.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097355.0 | 0.0 | 366.0 | 0.0 | 0.0 | 523875.0 | 1674388.0 | 0.0 | 0.0 | 983052.0 | 0.0 | 2097302.0 | 0.0 | 1422964216.0 | 0.0 | 16299673184177 | 16307571480166 | 16307571574247 | 16299673604597 |
| 75 | 73 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 14u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 218 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 85824 | 0x7fbb0abd5900 | 0x7fbb0a225180 | 2414112 | 2308917 | 65536 | 301763 | 301763 | 9656416.0 | 9034732.0 | 0.0 | 0.0 | 0.0 | 251.0 | 0.0 | 2334750.0 | 0.0 | 2349054.0 | 0.0 | 251637.0 | 2097417.0 | 2360489.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194596.0 | 0.0 | 536.0 | 0.0 | 0.0 | 593967.0 | 1958061.0 | 0.0 | 0.0 | 2031630.0 | 0.0 | 4194513.0 | 0.0 | 2622860553.0 | 0.0 | 16299673690426 | 16307571598407 | 16307571778088 | 16299674222964 |
| 76 | 74 | void benchmark_func<double, 256, 8u, 14u>(double, double*) [clone .kd] | 0 | 0 | 221 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 87296 | 0x7fbb0abd5780 | 0x7fbb0a2251c0 | 2397456 | 2289668 | 65536 | 299681 | 299681 | 9589792.0 | 9015643.0 | 0.0 | 0.0 | 0.0 | 253.0 | 0.0 | 2357308.0 | 0.0 | 2362171.0 | 0.0 | 264769.0 | 2097402.0 | 2349092.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194553.0 | 0.0 | 452.0 | 0.0 | 0.0 | 631745.0 | 2086383.0 | 0.0 | 0.0 | 2031630.0 | 0.0 | 4194509.0 | 0.0 | 2645896986.0 | 0.0 | 16299674308372 | 16307571802888 | 16307571983049 | 16299674833010 |
| 77 | 75 | void benchmark_func<__half2, 256, 8u, 14u>(__half2, __half2*) [clone .kd] | 0 | 0 | 224 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 88768 | 0x7fbb0abd5600 | 0x7fbb0a225200 | 1296728 | 1193466 | 65536 | 162090 | 162090 | 5186880.0 | 4610074.0 | 0.0 | 0.0 | 0.0 | 184.0 | 0.0 | 1049864.0 | 0.0 | 1050047.0 | 0.0 | 1276.0 | 1048771.0 | 1050047.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097360.0 | 0.0 | 370.0 | 0.0 | 0.0 | 568906.0 | 1745661.0 | 0.0 | 0.0 | 983054.0 | 0.0 | 2097303.0 | 0.0 | 1353519410.0 | 0.0 | 16299674919018 | 16307572007210 | 16307572101770 | 16299675351859 |
| 78 | 76 | void benchmark_func<int, 256, 8u, 14u>(int, int*) [clone .kd] | 0 | 0 | 227 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 90752 | 0x7fbb0cfbe480 | 0x7fbb0a225240 | 1616512 | 1507482 | 65536 | 202063 | 202063 | 6466016.0 | 5858497.0 | 0.0 | 0.0 | 0.0 | 170.0 | 0.0 | 1050256.0 | 0.0 | 1050428.0 | 0.0 | 1665.0 | 1048763.0 | 1050434.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097358.0 | 0.0 | 352.0 | 0.0 | 0.0 | 18411.0 | 103092.0 | 0.0 | 0.0 | 983057.0 | 0.0 | 2097298.0 | 0.0 | 704341117.0 | 0.0 | 16299675436957 | 16307572126410 | 16307572247211 | 16299675893177 |
| 79 | 77 | void benchmark_func<float, 256, 8u, 15u>(float, float*) [clone .kd] | 0 | 0 | 230 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 91968 | 0x7fbb0cfbe300 | 0x7fbb0a225280 | 1291856 | 1190184 | 65536 | 161481 | 161481 | 5167392.0 | 4609993.0 | 0.0 | 0.0 | 0.0 | 184.0 | 0.0 | 1049696.0 | 0.0 | 1049872.0 | 0.0 | 1110.0 | 1048762.0 | 1049878.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097357.0 | 0.0 | 370.0 | 0.0 | 0.0 | 472860.0 | 1484285.0 | 0.0 | 0.0 | 983052.0 | 0.0 | 2097303.0 | 0.0 | 1420613359.0 | 0.0 | 16299675987165 | 16307572280331 | 16307572373932 | 16299676409506 |
| 80 | 78 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 15u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 233 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 93440 | 0x7fbb0cfbe180 | 0x7fbb0a2252c0 | 2412952 | 2305606 | 65536 | 301618 | 301618 | 9651776.0 | 9052312.0 | 0.0 | 0.0 | 0.0 | 255.0 | 0.0 | 2335777.0 | 0.0 | 2375029.0 | 0.0 | 277612.0 | 2097417.0 | 2346718.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194581.0 | 0.0 | 506.0 | 0.0 | 0.0 | 827591.0 | 2772665.0 | 0.0 | 0.0 | 2031630.0 | 0.0 | 4194542.0 | 0.0 | 2781880186.0 | 0.0 | 16299676495014 | 16307572398732 | 16307572578573 | 16299677017962 |
| 81 | 79 | void benchmark_func<double, 256, 8u, 15u>(double, double*) [clone .kd] | 0 | 0 | 236 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 94912 | 0x7fbb0cfbe000 | 0x7fbb0a225300 | 2404392 | 2299533 | 65536 | 300548 | 300548 | 9617536.0 | 9060991.0 | 0.0 | 0.0 | 0.0 | 248.0 | 0.0 | 2369398.0 | 0.0 | 2372329.0 | 0.0 | 274929.0 | 2097400.0 | 2366974.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194573.0 | 0.0 | 490.0 | 0.0 | 0.0 | 691306.0 | 2224822.0 | 0.0 | 0.0 | 2031630.0 | 0.0 | 4194524.0 | 0.0 | 2693637242.0 | 0.0 | 16299677103590 | 16307572603373 | 16307572782574 | 16299677620329 |
| 82 | 80 | void benchmark_func<__half2, 256, 8u, 15u>(__half2, __half2*) [clone .kd] | 0 | 0 | 239 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 96384 | 0x7fbb0abd5e80 | 0x7fbb0a225340 | 1295376 | 1191887 | 65536 | 161921 | 161921 | 5181472.0 | 4608054.0 | 0.0 | 0.0 | 0.0 | 189.0 | 0.0 | 1049920.0 | 0.0 | 1050106.0 | 0.0 | 1332.0 | 1048774.0 | 1050108.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097362.0 | 0.0 | 372.0 | 0.0 | 0.0 | 499028.0 | 1634529.0 | 0.0 | 0.0 | 983054.0 | 0.0 | 2097308.0 | 0.0 | 1409170624.0 | 0.0 | 16299677704357 | 16307572807535 | 16307572902575 | 16299678141677 |
| 83 | 81 | void benchmark_func<int, 256, 8u, 15u>(int, int*) [clone .kd] | 0 | 0 | 242 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 98368 | 0x7fbb0abd5d00 | 0x7fbb0a225380 | 1698448 | 1587281 | 65536 | 212305 | 212305 | 6793760.0 | 6205938.0 | 0.0 | 0.0 | 0.0 | 164.0 | 0.0 | 1050312.0 | 0.0 | 1050475.0 | 0.0 | 1720.0 | 1048755.0 | 1050462.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097340.0 | 0.0 | 314.0 | 0.0 | 0.0 | 41408.0 | 160691.0 | 0.0 | 0.0 | 983058.0 | 0.0 | 2097286.0 | 0.0 | 680936246.0 | 0.0 | 16299678227366 | 16307572926735 | 16307573053616 | 16299678699185 |
| 84 | 82 | void benchmark_func<float, 256, 8u, 16u>(float, float*) [clone .kd] | 0 | 0 | 245 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 99584 | 0x7fbb0abd5b80 | 0x7fbb0a2253c0 | 1298992 | 1196591 | 65536 | 162373 | 162373 | 5195936.0 | 4594673.0 | 0.0 | 0.0 | 0.0 | 187.0 | 0.0 | 1049752.0 | 0.0 | 1049938.0 | 0.0 | 1165.0 | 1048773.0 | 1049936.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097360.0 | 0.0 | 374.0 | 0.0 | 0.0 | 364655.0 | 1172948.0 | 0.0 | 0.0 | 983053.0 | 0.0 | 2097304.0 | 0.0 | 1415660813.0 | 0.0 | 16299678792343 | 16307573086896 | 16307573181937 | 16299679212834 |
| 85 | 83 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 16u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 248 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 101312 | 0x7fbb0abd5a00 | 0x7fbb0a225400 | 2385312 | 2271675 | 65536 | 298163 | 298163 | 9541216.0 | 8975526.0 | 0.0 | 0.0 | 0.0 | 253.0 | 0.0 | 2341126.0 | 0.0 | 2368459.0 | 0.0 | 271043.0 | 2097416.0 | 2364987.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194573.0 | 0.0 | 488.0 | 0.0 | 0.0 | 810238.0 | 2727752.0 | 0.0 | 0.0 | 2031631.0 | 0.0 | 4194521.0 | 0.0 | 2683612806.0 | 0.0 | 16299679297612 | 16307573207537 | 16307573387858 | 16299679817490 |
| 86 | 84 | void benchmark_func<double, 256, 8u, 16u>(double, double*) [clone .kd] | 0 | 0 | 251 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 102784 | 0x7fbb0abd5880 | 0x7fbb0a225440 | 2389888 | 2282210 | 65536 | 298735 | 298735 | 9559520.0 | 8992039.0 | 0.0 | 0.0 | 0.0 | 260.0 | 0.0 | 2357398.0 | 0.0 | 2369441.0 | 0.0 | 272003.0 | 2097438.0 | 2341254.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194578.0 | 0.0 | 498.0 | 0.0 | 0.0 | 666872.0 | 2214884.0 | 0.0 | 0.0 | 2031631.0 | 0.0 | 4194523.0 | 0.0 | 2631251355.0 | 0.0 | 16299679901788 | 16307573412498 | 16307573591699 | 16299680418707 |
| 87 | 85 | void benchmark_func<__half2, 256, 8u, 16u>(__half2, __half2*) [clone .kd] | 0 | 0 | 254 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 104256 | 0x7fbb0abd5700 | 0x7fbb0a225480 | 1283872 | 1182931 | 65536 | 160483 | 160483 | 5135456.0 | 4598185.0 | 0.0 | 0.0 | 0.0 | 188.0 | 0.0 | 1049920.0 | 0.0 | 1050105.0 | 0.0 | 1331.0 | 1048774.0 | 1050108.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097363.0 | 0.0 | 374.0 | 0.0 | 0.0 | 468092.0 | 1554868.0 | 0.0 | 0.0 | 983055.0 | 0.0 | 2097309.0 | 0.0 | 1311601374.0 | 0.0 | 16299680502665 | 16307573616019 | 16307573709780 | 16299680950305 |
| 88 | 86 | void benchmark_func<int, 256, 8u, 16u>(int, int*) [clone .kd] | 0 | 0 | 257 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 106240 | 0x7fbb0cfbe580 | 0x7fbb0a2254c0 | 1755776 | 1647542 | 65536 | 219471 | 219471 | 7023072.0 | 6474549.0 | 0.0 | 0.0 | 0.0 | 153.0 | 0.0 | 1050368.0 | 0.0 | 1050517.0 | 0.0 | 1775.0 | 1048742.0 | 1050528.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097350.0 | 0.0 | 332.0 | 0.0 | 0.0 | 1252.0 | 58318.0 | 0.0 | 0.0 | 983059.0 | 0.0 | 2097279.0 | 0.0 | 648070363.0 | 0.0 | 16299681035383 | 16307573733460 | 16307573863861 | 16299681500753 |
| 89 | 87 | void benchmark_func<float, 256, 8u, 17u>(float, float*) [clone .kd] | 0 | 0 | 260 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 107712 | 0x7fbb0cfbe400 | 0x7fbb0a225500 | 1281792 | 1180166 | 65536 | 160223 | 160223 | 5127136.0 | 4580726.0 | 0.0 | 0.0 | 0.0 | 185.0 | 0.0 | 1049752.0 | 0.0 | 1049935.0 | 0.0 | 1165.0 | 1048770.0 | 1049938.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097358.0 | 0.0 | 370.0 | 0.0 | 0.0 | 523465.0 | 1652952.0 | 0.0 | 0.0 | 983053.0 | 0.0 | 2097299.0 | 0.0 | 1431637384.0 | 0.0 | 16299681593111 | 16307573896021 | 16307573989142 | 16299682019102 |
| 90 | 88 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 17u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 263 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 16 | 24 | 109440 | 0x7fbb0cfbe280 | 0x7fbb0a225540 | 2398400 | 2288327 | 65536 | 299799 | 299799 | 9593568.0 | 9007437.0 | 0.0 | 0.0 | 0.0 | 244.0 | 0.0 | 2365762.0 | 0.0 | 2353897.0 | 0.0 | 256507.0 | 2097390.0 | 2349405.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194569.0 | 0.0 | 476.0 | 0.0 | 0.0 | 696601.0 | 2306065.0 | 0.0 | 0.0 | 2031632.0 | 0.0 | 4194510.0 | 0.0 | 2703756947.0 | 0.0 | 16299682105310 | 16307574014742 | 16307574195703 | 16299682629928 |
| 91 | 89 | void benchmark_func<double, 256, 8u, 17u>(double, double*) [clone .kd] | 0 | 0 | 266 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 16 | 24 | 111168 | 0x7fbb0cfbe100 | 0x7fbb0a225580 | 2385568 | 2273098 | 65536 | 298195 | 298195 | 9542240.0 | 8955628.0 | 0.0 | 0.0 | 0.0 | 263.0 | 0.0 | 2352263.0 | 0.0 | 2361522.0 | 0.0 | 264128.0 | 2097394.0 | 2366616.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194563.0 | 0.0 | 466.0 | 0.0 | 0.0 | 770087.0 | 2567284.0 | 0.0 | 0.0 | 2031631.0 | 0.0 | 4194528.0 | 0.0 | 2765487166.0 | 0.0 | 16299682715336 | 16307574220823 | 16307574400664 | 16299683242185 |
| 92 | 90 | void benchmark_func<__half2, 256, 8u, 17u>(__half2, __half2*) [clone .kd] | 0 | 0 | 269 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 112896 | 0x7fbb0abd5f80 | 0x7fbb0a2255c0 | 1313616 | 1212487 | 65536 | 164201 | 164201 | 5254432.0 | 4662134.0 | 0.0 | 0.0 | 0.0 | 190.0 | 0.0 | 1050032.0 | 0.0 | 1050218.0 | 0.0 | 1443.0 | 1048775.0 | 1050217.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097364.0 | 0.0 | 372.0 | 0.0 | 0.0 | 347222.0 | 1146412.0 | 0.0 | 0.0 | 983055.0 | 0.0 | 2097309.0 | 0.0 | 1325989398.0 | 0.0 | 16299683327963 | 16307574424824 | 16307574520665 | 16299683758683 |
| 93 | 91 | void benchmark_func<int, 256, 8u, 17u>(int, int*) [clone .kd] | 0 | 0 | 272 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 115136 | 0x7fbb0abd5e00 | 0x7fbb0a225600 | 1839472 | 1725526 | 65536 | 229933 | 229933 | 7357856.0 | 6784314.0 | 0.0 | 0.0 | 0.0 | 157.0 | 0.0 | 1050480.0 | 0.0 | 1050637.0 | 0.0 | 1887.0 | 1048750.0 | 1050626.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097339.0 | 0.0 | 306.0 | 0.0 | 0.0 | 0.0 | 57258.0 | 0.0 | 0.0 | 983059.0 | 0.0 | 2097285.0 | 0.0 | 612548111.0 | 0.0 | 16299683843161 | 16307574545785 | 16307574684506 | 16299684323681 |
| 94 | 92 | void benchmark_func<float, 256, 8u, 18u>(float, float*) [clone .kd] | 0 | 0 | 275 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 116608 | 0x7fbb0abd5c80 | 0x7fbb0a225640 | 1287760 | 1186345 | 65536 | 160969 | 160969 | 5151008.0 | 4571413.0 | 0.0 | 0.0 | 0.0 | 189.0 | 0.0 | 1049808.0 | 0.0 | 1049993.0 | 0.0 | 1221.0 | 1048772.0 | 1049989.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097360.0 | 0.0 | 372.0 | 0.0 | 0.0 | 505551.0 | 1655887.0 | 0.0 | 0.0 | 983054.0 | 0.0 | 2097303.0 | 0.0 | 1451576724.0 | 0.0 | 16299684416299 | 16307574716986 | 16307574812187 | 16299684837669 |
| 95 | 93 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 18u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 278 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 16 | 24 | 118336 | 0x7fbb0abd5b00 | 0x7fbb0a225680 | 2400440 | 2291387 | 65536 | 300054 | 300054 | 9601728.0 | 9023268.0 | 0.0 | 0.0 | 0.0 | 240.0 | 0.0 | 2366861.0 | 0.0 | 2370852.0 | 0.0 | 273443.0 | 2097409.0 | 2346223.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194573.0 | 0.0 | 482.0 | 0.0 | 0.0 | 703892.0 | 2336594.0 | 0.0 | 0.0 | 2031632.0 | 0.0 | 4194515.0 | 0.0 | 2728107357.0 | 0.0 | 16299684922608 | 16307574836987 | 16307575015548 | 16299685440266 |
| 96 | 94 | void benchmark_func<double, 256, 8u, 18u>(double, double*) [clone .kd] | 0 | 0 | 281 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 16 | 24 | 120064 | 0x7fbb0abd5980 | 0x7fbb0a2256c0 | 2393776 | 2286860 | 65536 | 299221 | 299221 | 9575072.0 | 9006297.0 | 0.0 | 0.0 | 0.0 | 242.0 | 0.0 | 2381071.0 | 0.0 | 2371006.0 | 0.0 | 273598.0 | 2097408.0 | 2345975.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194586.0 | 0.0 | 510.0 | 0.0 | 0.0 | 750526.0 | 2528682.0 | 0.0 | 0.0 | 2031632.0 | 0.0 | 4194534.0 | 0.0 | 2679211054.0 | 0.0 | 16299685524934 | 16307575040668 | 16307575220349 | 16299686047343 |
| 97 | 95 | void benchmark_func<__half2, 256, 8u, 18u>(__half2, __half2*) [clone .kd] | 0 | 0 | 284 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 121792 | 0x7fbb0abd5800 | 0x7fbb0a225700 | 1312856 | 1212622 | 65536 | 164106 | 164106 | 5251392.0 | 4688682.0 | 0.0 | 0.0 | 0.0 | 183.0 | 0.0 | 1050088.0 | 0.0 | 1050272.0 | 0.0 | 1498.0 | 1048774.0 | 1050273.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097366.0 | 0.0 | 374.0 | 0.0 | 0.0 | 285586.0 | 941246.0 | 0.0 | 0.0 | 983056.0 | 0.0 | 2097310.0 | 0.0 | 1254939037.0 | 0.0 | 16299686132371 | 16307575243869 | 16307575341470 | 16299686579541 |
| 98 | 96 | void benchmark_func<int, 256, 8u, 18u>(int, int*) [clone .kd] | 0 | 0 | 287 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 124032 | 0x7fbb0abd5680 | 0x7fbb0a225740 | 1925568 | 1811317 | 65536 | 240695 | 240695 | 7702240.0 | 7106782.0 | 0.0 | 0.0 | 0.0 | 148.0 | 0.0 | 1050592.0 | 0.0 | 1050732.0 | 0.0 | 1998.0 | 1048734.0 | 1050735.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097340.0 | 0.0 | 304.0 | 0.0 | 0.0 | 0.0 | 54465.0 | 0.0 | 0.0 | 983060.0 | 0.0 | 2097287.0 | 0.0 | 592523956.0 | 0.0 | 16299686663819 | 16307575365950 | 16307575512031 | 16299687148128 |
| 99 | 97 | void benchmark_func<float, 256, 8u, 20u>(float, float*) [clone .kd] | 0 | 0 | 290 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 125504 | 0x7fbb0cfbe500 | 0x7fbb0a225780 | 1301800 | 1197248 | 65536 | 162724 | 162724 | 5207168.0 | 4581414.0 | 0.0 | 0.0 | 0.0 | 186.0 | 0.0 | 1049920.0 | 0.0 | 1050103.0 | 0.0 | 1332.0 | 1048771.0 | 1050111.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097363.0 | 0.0 | 374.0 | 0.0 | 0.0 | 425604.0 | 1416760.0 | 0.0 | 0.0 | 983054.0 | 0.0 | 2097301.0 | 0.0 | 1421757798.0 | 0.0 | 16299687241386 | 16307575544031 | 16307575638912 | 16299687688296 |
| 100 | 98 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 20u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 293 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 127488 | 0x7fbb0cfbe380 | 0x7fbb0a2257c0 | 2375344 | 2268943 | 65536 | 296917 | 296917 | 9501344.0 | 8911414.0 | 0.0 | 0.0 | 0.0 | 269.0 | 0.0 | 2376232.0 | 0.0 | 2380717.0 | 0.0 | 283297.0 | 2097420.0 | 2359102.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194592.0 | 0.0 | 516.0 | 0.0 | 0.0 | 619057.0 | 2024230.0 | 0.0 | 0.0 | 2031633.0 | 0.0 | 4194550.0 | 0.0 | 2748247028.0 | 0.0 | 16299687773955 | 16307575663392 | 16307575843713 | 16299688295163 |
| 101 | 99 | void benchmark_func<double, 256, 8u, 20u>(double, double*) [clone .kd] | 0 | 0 | 296 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 129216 | 0x7fbb0cfbe200 | 0x7fbb0a225800 | 2383632 | 2275458 | 65536 | 297953 | 297953 | 9534496.0 | 8943552.0 | 0.0 | 0.0 | 0.0 | 261.0 | 0.0 | 2354192.0 | 0.0 | 2359482.0 | 0.0 | 262069.0 | 2097413.0 | 2357849.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194589.0 | 0.0 | 512.0 | 0.0 | 0.0 | 637746.0 | 2094927.0 | 0.0 | 0.0 | 2031633.0 | 0.0 | 4194518.0 | 0.0 | 2734170552.0 | 0.0 | 16299688380951 | 16307575868033 | 16307576049154 | 16299688903000 |
| 102 | 100 | void benchmark_func<__half2, 256, 8u, 20u>(__half2, __half2*) [clone .kd] | 0 | 0 | 299 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 130944 | 0x7fbb0cfbe080 | 0x7fbb0a225840 | 1367664 | 1259686 | 65536 | 170957 | 170957 | 5470624.0 | 4922237.0 | 0.0 | 0.0 | 0.0 | 181.0 | 0.0 | 1050144.0 | 0.0 | 1050326.0 | 0.0 | 1553.0 | 1048773.0 | 1050327.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097360.0 | 0.0 | 360.0 | 0.0 | 0.0 | 254536.0 | 842724.0 | 0.0 | 0.0 | 983057.0 | 0.0 | 2097296.0 | 0.0 | 1121532093.0 | 0.0 | 16299688987588 | 16307576072515 | 16307576172675 | 16299689435998 |
| 103 | 101 | void benchmark_func<int, 256, 8u, 20u>(int, int*) [clone .kd] | 0 | 0 | 302 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 133440 | 0x7fbb0abd5f00 | 0x7fbb0a225880 | 2072656 | 1963434 | 65536 | 259081 | 259081 | 8290592.0 | 7726028.0 | 0.0 | 0.0 | 0.0 | 149.0 | 0.0 | 1050704.0 | 0.0 | 1050842.0 | 0.0 | 2108.0 | 1048734.0 | 1050851.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097333.0 | 0.0 | 286.0 | 0.0 | 0.0 | 6.0 | 53938.0 | 0.0 | 0.0 | 983062.0 | 0.0 | 2097276.0 | 0.0 | 577560554.0 | 0.0 | 16299689520536 | 16307576196675 | 16307576352036 | 16299690031365 |
| 104 | 102 | void benchmark_func<float, 256, 8u, 22u>(float, float*) [clone .kd] | 0 | 0 | 305 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 134912 | 0x7fbb0abd5d80 | 0x7fbb0a2258c0 | 1286728 | 1182910 | 65536 | 160840 | 160840 | 5146880.0 | 4573959.0 | 0.0 | 0.0 | 0.0 | 188.0 | 0.0 | 1049976.0 | 0.0 | 1050159.0 | 0.0 | 1387.0 | 1048772.0 | 1050164.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097365.0 | 0.0 | 376.0 | 0.0 | 0.0 | 426241.0 | 1313379.0 | 0.0 | 0.0 | 983055.0 | 0.0 | 2097307.0 | 0.0 | 1379514367.0 | 0.0 | 16299690124792 | 16307576384676 | 16307576479877 | 16299690565593 |
| 105 | 103 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 22u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 308 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 136896 | 0x7fbb0abd5c00 | 0x7fbb0a225900 | 2372976 | 2267034 | 65536 | 296621 | 296621 | 9491872.0 | 8929700.0 | 0.0 | 0.0 | 0.0 | 264.0 | 0.0 | 2367532.0 | 0.0 | 2361913.0 | 0.0 | 264472.0 | 2097441.0 | 2337875.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194602.0 | 0.0 | 534.0 | 0.0 | 0.0 | 768920.0 | 2575023.0 | 0.0 | 0.0 | 2031634.0 | 0.0 | 4194547.0 | 0.0 | 2757380996.0 | 0.0 | 16299690649721 | 16307576506917 | 16307576685958 | 16299691174379 |
| 106 | 104 | void benchmark_func<double, 256, 8u, 22u>(double, double*) [clone .kd] | 0 | 0 | 311 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 138880 | 0x7fbb0abd5a80 | 0x7fbb0a225940 | 2388752 | 2277702 | 65536 | 298593 | 298593 | 9554976.0 | 8939851.0 | 0.0 | 0.0 | 0.0 | 258.0 | 0.0 | 2362817.0 | 0.0 | 2355326.0 | 0.0 | 257901.0 | 2097425.0 | 2361737.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194594.0 | 0.0 | 518.0 | 0.0 | 0.0 | 685600.0 | 2261468.0 | 0.0 | 0.0 | 2031634.0 | 0.0 | 4194531.0 | 0.0 | 2691857855.0 | 0.0 | 16299691258438 | 16307576711718 | 16307576892360 | 16299691796136 |
| 107 | 105 | void benchmark_func<__half2, 256, 8u, 22u>(__half2, __half2*) [clone .kd] | 0 | 0 | 314 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 140864 | 0x7fbb0abd5900 | 0x7fbb0a225980 | 1419744 | 1314368 | 65536 | 177467 | 177467 | 5678944.0 | 5122636.0 | 0.0 | 0.0 | 0.0 | 179.0 | 0.0 | 1050312.0 | 0.0 | 1050486.0 | 0.0 | 1720.0 | 1048766.0 | 1050487.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097352.0 | 0.0 | 338.0 | 0.0 | 0.0 | 94862.0 | 330334.0 | 0.0 | 0.0 | 983058.0 | 0.0 | 2097296.0 | 0.0 | 936994438.0 | 0.0 | 16299691876114 | 16307576915880 | 16307577023560 | 16299692339264 |
| 108 | 106 | void benchmark_func<int, 256, 8u, 22u>(int, int*) [clone .kd] | 0 | 0 | 317 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 143616 | 0x7fbb0abd5780 | 0x7fbb0a2259c0 | 2233592 | 2129475 | 65536 | 279198 | 279198 | 8934336.0 | 8352709.0 | 0.0 | 0.0 | 0.0 | 155.0 | 0.0 | 1050928.0 | 0.0 | 1051067.0 | 0.0 | 2331.0 | 1048736.0 | 1051075.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097335.0 | 0.0 | 282.0 | 0.0 | 0.0 | 0.0 | 54019.0 | 0.0 | 0.0 | 983063.0 | 0.0 | 2097285.0 | 0.0 | 568977667.0 | 0.0 | 16299692422302 | 16307577047721 | 16307577217802 | 16299692953640 |
| 109 | 107 | void benchmark_func<float, 256, 8u, 24u>(float, float*) [clone .kd] | 0 | 0 | 320 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 145344 | 0x7fbb0abd5600 | 0x7fbb0a225a00 | 1296496 | 1194469 | 65536 | 162061 | 162061 | 5185952.0 | 4629207.0 | 0.0 | 0.0 | 0.0 | 187.0 | 0.0 | 1050088.0 | 0.0 | 1050276.0 | 0.0 | 1498.0 | 1048778.0 | 1050275.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097367.0 | 0.0 | 376.0 | 0.0 | 0.0 | 310622.0 | 1032639.0 | 0.0 | 0.0 | 983056.0 | 0.0 | 2097313.0 | 0.0 | 1261539179.0 | 0.0 | 16299693044768 | 16307577250922 | 16307577346922 | 16299693481049 |
| 110 | 108 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 24u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 323 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 147584 | 0x7fbb0cfbe480 | 0x7fbb0a225a40 | 2366224 | 2257192 | 65536 | 295777 | 295777 | 9464864.0 | 8873998.0 | 0.0 | 0.0 | 0.0 | 270.0 | 0.0 | 2367867.0 | 0.0 | 2368847.0 | 0.0 | 271419.0 | 2097428.0 | 2344359.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194609.0 | 0.0 | 542.0 | 0.0 | 0.0 | 633122.0 | 2070719.0 | 0.0 | 0.0 | 2031635.0 | 0.0 | 4194564.0 | 0.0 | 2719511113.0 | 0.0 | 16299693565027 | 16307577371722 | 16307577553164 | 16299694086145 |
| 111 | 109 | void benchmark_func<double, 256, 8u, 24u>(double, double*) [clone .kd] | 0 | 0 | 326 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 149568 | 0x7fbb0cfbe300 | 0x7fbb0a225a80 | 2369160 | 2254212 | 65536 | 296144 | 296144 | 9476608.0 | 8888556.0 | 0.0 | 0.0 | 0.0 | 279.0 | 0.0 | 2367028.0 | 0.0 | 2356583.0 | 0.0 | 259161.0 | 2097422.0 | 2356535.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194602.0 | 0.0 | 530.0 | 0.0 | 0.0 | 611867.0 | 2022043.0 | 0.0 | 0.0 | 2031635.0 | 0.0 | 4194538.0 | 0.0 | 2792003151.0 | 0.0 | 16299694170333 | 16307577577164 | 16307577757485 | 16299694700442 |
| 112 | 110 | void benchmark_func<__half2, 256, 8u, 24u>(__half2, __half2*) [clone .kd] | 0 | 0 | 329 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 151552 | 0x7fbb0cfbe180 | 0x7fbb0a225ac0 | 1497840 | 1383602 | 65536 | 187229 | 187229 | 5991328.0 | 5415759.0 | 0.0 | 0.0 | 0.0 | 162.0 | 0.0 | 1050368.0 | 0.0 | 1050532.0 | 0.0 | 1775.0 | 1048757.0 | 1050533.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097347.0 | 0.0 | 326.0 | 0.0 | 0.0 | 47409.0 | 187068.0 | 0.0 | 0.0 | 983059.0 | 0.0 | 2097288.0 | 0.0 | 705615180.0 | 0.0 | 16299694784280 | 16307577781485 | 16307577892206 | 16299695249200 |
| 113 | 111 | void benchmark_func<int, 256, 8u, 24u>(int, int*) [clone .kd] | 0 | 0 | 332 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 154304 | 0x7fbb0cfbe000 | 0x7fbb0a225b00 | 2399656 | 2290432 | 65536 | 299956 | 299956 | 9598592.0 | 9011537.0 | 0.0 | 0.0 | 0.0 | 139.0 | 0.0 | 1051040.0 | 0.0 | 1051178.0 | 0.0 | 2441.0 | 1048737.0 | 1051180.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097332.0 | 0.0 | 272.0 | 0.0 | 0.0 | 0.0 | 54833.0 | 0.0 | 0.0 | 983065.0 | 0.0 | 2097275.0 | 0.0 | 562333356.0 | 0.0 | 16299695333358 | 16307577916366 | 16307578097647 | 16299695862846 |
| 114 | 112 | void benchmark_func<float, 256, 8u, 28u>(float, float*) [clone .kd] | 0 | 0 | 335 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 156288 | 0x7fbb0abd5e80 | 0x7fbb0a225b40 | 1373152 | 1268701 | 65536 | 171643 | 171643 | 5492576.0 | 4942772.0 | 0.0 | 0.0 | 0.0 | 182.0 | 0.0 | 1050200.0 | 0.0 | 1050384.0 | 0.0 | 1609.0 | 1048775.0 | 1050381.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097365.0 | 0.0 | 368.0 | 0.0 | 0.0 | 199833.0 | 622709.0 | 0.0 | 0.0 | 983057.0 | 0.0 | 2097303.0 | 0.0 | 1088922802.0 | 0.0 | 16299695955074 | 16307578130447 | 16307578231088 | 16299696393515 |
| 115 | 113 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 28u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 338 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 158784 | 0x7fbb0abd5d00 | 0x7fbb0a225b80 | 2393040 | 2266254 | 65536 | 299129 | 299129 | 9572128.0 | 8932938.0 | 0.0 | 0.0 | 0.0 | 298.0 | 0.0 | 2322950.0 | 0.0 | 2304278.0 | 0.0 | 206816.0 | 2097462.0 | 2326812.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194635.0 | 0.0 | 586.0 | 0.0 | 0.0 | 678888.0 | 2286263.0 | 0.0 | 0.0 | 2031637.0 | 0.0 | 4194587.0 | 0.0 | 2754035786.0 | 0.0 | 16299696477643 | 16307578255728 | 16307578441649 | 16299697006421 |
| 116 | 114 | void benchmark_func<double, 256, 8u, 28u>(double, double*) [clone .kd] | 0 | 0 | 341 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 161024 | 0x7fbb0abd5b80 | 0x7fbb0a225bc0 | 2382792 | 2259927 | 65536 | 297848 | 297848 | 9531136.0 | 8938749.0 | 0.0 | 0.0 | 0.0 | 288.0 | 0.0 | 2339531.0 | 0.0 | 2342782.0 | 0.0 | 245324.0 | 2097458.0 | 2360115.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194621.0 | 0.0 | 560.0 | 0.0 | 0.0 | 700318.0 | 2360929.0 | 0.0 | 0.0 | 2031637.0 | 0.0 | 4194573.0 | 0.0 | 2691967681.0 | 0.0 | 16299697089699 | 16307578465169 | 16307578647730 | 16299697623067 |
| 117 | 115 | void benchmark_func<__half2, 256, 8u, 28u>(__half2, __half2*) [clone .kd] | 0 | 0 | 344 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 163264 | 0x7fbb0abd5a00 | 0x7fbb0a225c00 | 1650568 | 1536761 | 65536 | 206320 | 206320 | 6602240.0 | 6031791.0 | 0.0 | 0.0 | 0.0 | 156.0 | 0.0 | 1050592.0 | 0.0 | 1050744.0 | 0.0 | 1997.0 | 1048747.0 | 1050740.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097345.0 | 0.0 | 314.0 | 0.0 | 0.0 | 25446.0 | 119966.0 | 0.0 | 0.0 | 983061.0 | 0.0 | 2097282.0 | 0.0 | 737739022.0 | 0.0 | 16299697705836 | 16307578672850 | 16307578795411 | 16299698172865 |
| 118 | 116 | void benchmark_func<int, 256, 8u, 28u>(int, int*) [clone .kd] | 0 | 0 | 347 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 166528 | 0x7fbb0abd5880 | 0x7fbb0a225c40 | 2723664 | 2611410 | 65536 | 340457 | 340457 | 10894624.0 | 10267249.0 | 0.0 | 0.0 | 0.0 | 132.0 | 0.0 | 1051376.0 | 0.0 | 1051507.0 | 0.0 | 2774.0 | 1048733.0 | 1051513.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097338.0 | 0.0 | 272.0 | 0.0 | 0.0 | 0.0 | 57788.0 | 0.0 | 0.0 | 983068.0 | 0.0 | 2097283.0 | 0.0 | 558733809.0 | 0.0 | 16299698256803 | 16307578819411 | 16307579026613 | 16299698880200 |
| 119 | 117 | void benchmark_func<float, 256, 8u, 32u>(float, float*) [clone .kd] | 0 | 0 | 350 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 168512 | 0x7fbb0abd5700 | 0x7fbb0a225c80 | 1480496 | 1364980 | 65536 | 185061 | 185061 | 5921952.0 | 5317939.0 | 0.0 | 0.0 | 0.0 | 169.0 | 0.0 | 1050424.0 | 0.0 | 1050582.0 | 0.0 | 1831.0 | 1048751.0 | 1050590.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097354.0 | 0.0 | 338.0 | 0.0 | 0.0 | 49988.0 | 194114.0 | 0.0 | 0.0 | 983059.0 | 0.0 | 2097297.0 | 0.0 | 786160250.0 | 0.0 | 16299698909029 | 16307579097333 | 16307579209014 | 16299699356359 |
| 120 | 118 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 32u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 353 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 171264 | 0x7fbb0cfbe580 | 0x7fbb0a225cc0 | 2385232 | 2254829 | 65536 | 298153 | 298153 | 9540896.0 | 8945535.0 | 0.0 | 0.0 | 0.0 | 290.0 | 0.0 | 2323559.0 | 0.0 | 2352074.0 | 0.0 | 254607.0 | 2097467.0 | 2350873.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194639.0 | 0.0 | 588.0 | 0.0 | 0.0 | 559733.0 | 1852583.0 | 0.0 | 0.0 | 2031639.0 | 0.0 | 4194592.0 | 0.0 | 2717193019.0 | 0.0 | 16299699440757 | 16307579233974 | 16307579417815 | 16299699970495 |
| 121 | 119 | void benchmark_func<double, 256, 8u, 32u>(double, double*) [clone .kd] | 0 | 0 | 356 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 173760 | 0x7fbb0cfbe400 | 0x7fbb0a225d00 | 2361344 | 2247522 | 65536 | 295167 | 295167 | 9445344.0 | 8820979.0 | 0.0 | 0.0 | 0.0 | 294.0 | 0.0 | 2343804.0 | 0.0 | 2349348.0 | 0.0 | 251882.0 | 2097466.0 | 2335703.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194645.0 | 0.0 | 600.0 | 0.0 | 0.0 | 719357.0 | 2396996.0 | 0.0 | 0.0 | 2031639.0 | 0.0 | 4194588.0 | 0.0 | 2703198674.0 | 0.0 | 16299700054554 | 16307579442135 | 16307579626136 | 16299700578952 |
| 122 | 120 | void benchmark_func<__half2, 256, 8u, 32u>(__half2, __half2*) [clone .kd] | 0 | 0 | 359 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 176256 | 0x7fbb0cfbe280 | 0x7fbb0a225d40 | 1807664 | 1690986 | 65536 | 225957 | 225957 | 7230624.0 | 6667309.0 | 0.0 | 0.0 | 0.0 | 148.0 | 0.0 | 1050816.0 | 0.0 | 1050965.0 | 0.0 | 2219.0 | 1048746.0 | 1050962.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097331.0 | 0.0 | 278.0 | 0.0 | 0.0 | 4601.0 | 66335.0 | 0.0 | 0.0 | 983063.0 | 0.0 | 2097279.0 | 0.0 | 584135389.0 | 0.0 | 16299700662750 | 16307579652056 | 16307579786777 | 16299701146559 |
| 123 | 121 | void benchmark_func<int, 256, 8u, 32u>(int, int*) [clone .kd] | 0 | 0 | 362 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 179776 | 0x7fbb0cfbe100 | 0x7fbb0a225d80 | 3043824 | 2931194 | 65536 | 380477 | 380477 | 12175264.0 | 11572547.0 | 0.0 | 0.0 | 0.0 | 136.0 | 0.0 | 1051712.0 | 0.0 | 1051849.0 | 0.0 | 3107.0 | 1048742.0 | 1051844.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097340.0 | 0.0 | 264.0 | 0.0 | 0.0 | 0.0 | 54223.0 | 0.0 | 0.0 | 983071.0 | 0.0 | 2097290.0 | 0.0 | 560254297.0 | 0.0 | 16299701229938 | 16307579810457 | 16307580042939 | 16299701848344 |
| 124 | 122 | void benchmark_func<float, 256, 8u, 40u>(float, float*) [clone .kd] | 0 | 0 | 365 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 182272 | 0x7fbb0abd5f80 | 0x7fbb0a225dc0 | 1687424 | 1577225 | 65536 | 210927 | 210927 | 6749664.0 | 6206386.0 | 0.0 | 0.0 | 0.0 | 157.0 | 0.0 | 1050704.0 | 0.0 | 1050852.0 | 0.0 | 2108.0 | 1048744.0 | 1050860.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097347.0 | 0.0 | 314.0 | 0.0 | 0.0 | 833.0 | 55826.0 | 0.0 | 0.0 | 983062.0 | 0.0 | 2097286.0 | 0.0 | 610299850.0 | 0.0 | 16299701900433 | 16307580113019 | 16307580240060 | 16299702367942 |
| 125 | 123 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 40u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 368 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 185536 | 0x7fbb0abd5e00 | 0x7fbb0a225e00 | 2449472 | 2326974 | 65536 | 306183 | 306183 | 9797856.0 | 9133095.0 | 0.0 | 0.0 | 0.0 | 281.0 | 0.0 | 2285451.0 | 0.0 | 2289507.0 | 0.0 | 192042.0 | 2097465.0 | 2287199.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194647.0 | 0.0 | 586.0 | 0.0 | 0.0 | 519688.0 | 1747783.0 | 0.0 | 0.0 | 2031643.0 | 0.0 | 4194589.0 | 0.0 | 2450748466.0 | 0.0 | 16299702451751 | 16307580265820 | 16307580456701 | 16299703051457 |
| 126 | 124 | void benchmark_func<double, 256, 8u, 40u>(double, double*) [clone .kd] | 0 | 0 | 371 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 188544 | 0x7fbb0abd5c80 | 0x7fbb0a225e40 | 2463424 | 2327218 | 65536 | 307927 | 307927 | 9853664.0 | 9232381.0 | 0.0 | 0.0 | 0.0 | 289.0 | 0.0 | 2272527.0 | 0.0 | 2272833.0 | 0.0 | 175370.0 | 2097463.0 | 2269203.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194624.0 | 0.0 | 542.0 | 0.0 | 0.0 | 498979.0 | 1716103.0 | 0.0 | 0.0 | 2031643.0 | 0.0 | 4194577.0 | 0.0 | 2374081504.0 | 0.0 | 16299703070747 | 16307580482142 | 16307580676223 | 16299703671694 |
| 127 | 125 | void benchmark_func<__half2, 256, 8u, 40u>(__half2, __half2*) [clone .kd] | 0 | 0 | 374 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 191552 | 0x7fbb0abd5b00 | 0x7fbb0a225e80 | 2125840 | 2015674 | 65536 | 265729 | 265729 | 8503328.0 | 7934039.0 | 0.0 | 0.0 | 0.0 | 134.0 | 0.0 | 1051264.0 | 0.0 | 1051393.0 | 0.0 | 2663.0 | 1048730.0 | 1051411.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097341.0 | 0.0 | 282.0 | 0.0 | 0.0 | 0.0 | 51681.0 | 0.0 | 0.0 | 983067.0 | 0.0 | 2097287.0 | 0.0 | 566692652.0 | 0.0 | 16299703693283 | 16307580703583 | 16307580864064 | 16299704205442 |
| 128 | 126 | void benchmark_func<int, 256, 8u, 40u>(int, int*) [clone .kd] | 0 | 0 | 377 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 195840 | 0x7fbb0abd5980 | 0x7fbb0a225ec0 | 3697736 | 3589966 | 65536 | 462216 | 462216 | 14790912.0 | 14105392.0 | 0.0 | 0.0 | 0.0 | 145.0 | 0.0 | 1052328.0 | 0.0 | 1052467.0 | 0.0 | 3718.0 | 1048749.0 | 1052464.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097351.0 | 0.0 | 264.0 | 0.0 | 0.0 | 0.0 | 54524.0 | 0.0 | 0.0 | 983076.0 | 0.0 | 2097302.0 | 0.0 | 555146368.0 | 0.0 | 16299704288360 | 16307580889984 | 16307581173826 | 16299704960545 |
| 129 | 127 | void benchmark_func<float, 256, 8u, 48u>(float, float*) [clone .kd] | 0 | 0 | 380 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 198592 | 0x7fbb0abd5800 | 0x7fbb0a225f00 | 1951328 | 1839825 | 65536 | 243915 | 243915 | 7805280.0 | 7209630.0 | 0.0 | 0.0 | 0.0 | 133.0 | 0.0 | 1051096.0 | 0.0 | 1051239.0 | 0.0 | 2497.0 | 1048742.0 | 1051233.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097332.0 | 0.0 | 270.0 | 0.0 | 0.0 | 158.0 | 56263.0 | 0.0 | 0.0 | 983065.0 | 0.0 | 2097281.0 | 0.0 | 563949467.0 | 0.0 | 16299705009234 | 16307581220386 | 16307581369507 | 16299705502333 |
| 130 | 128 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 48u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 383 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 202368 | 0x7fbb0abd5680 | 0x7fbb0a225f40 | 2673792 | 2556321 | 65536 | 334223 | 334223 | 10695136.0 | 10098678.0 | 0.0 | 0.0 | 0.0 | 268.0 | 0.0 | 2247897.0 | 0.0 | 2259695.0 | 0.0 | 162246.0 | 2097449.0 | 2256888.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194635.0 | 0.0 | 548.0 | 0.0 | 0.0 | 312422.0 | 1052037.0 | 0.0 | 0.0 | 2031647.0 | 0.0 | 4194576.0 | 0.0 | 2167982544.0 | 0.0 | 16299705585212 | 16307581394787 | 16307581603908 | 16299706203978 |
| 131 | 129 | void benchmark_func<double, 256, 8u, 48u>(double, double*) [clone .kd] | 0 | 0 | 386 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 205888 | 0x7fbb0cfbe500 | 0x7fbb0a225f80 | 2656336 | 2529312 | 65536 | 332041 | 332041 | 10625312.0 | 10018511.0 | 0.0 | 0.0 | 0.0 | 271.0 | 0.0 | 2206132.0 | 0.0 | 2201753.0 | 0.0 | 104306.0 | 2097447.0 | 2224556.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194620.0 | 0.0 | 518.0 | 0.0 | 0.0 | 295180.0 | 975974.0 | 0.0 | 0.0 | 2031647.0 | 0.0 | 4194563.0 | 0.0 | 2080353935.0 | 0.0 | 16299706223467 | 16307581644069 | 16307581854950 | 16299706848084 |
| 132 | 130 | void benchmark_func<__half2, 256, 8u, 48u>(__half2, __half2*) [clone .kd] | 0 | 0 | 389 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 209408 | 0x7fbb0cfbe380 | 0x7fbb0a225fc0 | 2456800 | 2344040 | 65536 | 307099 | 307099 | 9827168.0 | 9235273.0 | 0.0 | 0.0 | 0.0 | 128.0 | 0.0 | 1051712.0 | 0.0 | 1051849.0 | 0.0 | 3107.0 | 1048742.0 | 1051839.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097355.0 | 0.0 | 294.0 | 0.0 | 0.0 | 176.0 | 54648.0 | 0.0 | 0.0 | 983071.0 | 0.0 | 2097283.0 | 0.0 | 556189253.0 | 0.0 | 16299706868653 | 16307581893670 | 16307582079751 | 16299707413641 |
| 133 | 131 | void benchmark_func<int, 256, 8u, 48u>(int, int*) [clone .kd] | 0 | 0 | 392 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 214464 | 0x7fbb0cfbe200 | 0x7fbb0a226000 | 4341616 | 4226771 | 65536 | 542701 | 542701 | 17366432.0 | 16638904.0 | 0.0 | 0.0 | 0.0 | 138.0 | 0.0 | 1053000.0 | 0.0 | 1053140.0 | 0.0 | 4384.0 | 1048756.0 | 1053141.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097363.0 | 0.0 | 264.0 | 0.0 | 0.0 | 0.0 | 54316.0 | 0.0 | 0.0 | 983082.0 | 0.0 | 2097307.0 | 0.0 | 557384900.0 | 0.0 | 16299707497319 | 16307582106472 | 16307582441354 | 16299708219933 |
| 134 | 132 | void benchmark_func<float, 256, 8u, 56u>(float, float*) [clone .kd] | 0 | 0 | 395 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 217728 | 0x7fbb0cfbe080 | 0x7fbb0a226040 | 2182744 | 2071050 | 65536 | 272842 | 272842 | 8730944.0 | 8145465.0 | 0.0 | 0.0 | 0.0 | 143.0 | 0.0 | 1051432.0 | 0.0 | 1051570.0 | 0.0 | 2830.0 | 1048740.0 | 1051563.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097339.0 | 0.0 | 272.0 | 0.0 | 0.0 | 20.0 | 56823.0 | 0.0 | 0.0 | 983068.0 | 0.0 | 2097289.0 | 0.0 | 561146422.0 | 0.0 | 16299708271512 | 16307582510794 | 16307582678795 | 16299708774061 |
| 135 | 133 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 56u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 398 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 222016 | 0x7fbb0abd5f00 | 0x7fbb0a226080 | 2919024 | 2800707 | 65536 | 364877 | 364877 | 11676064.0 | 10970250.0 | 0.0 | 0.0 | 0.0 | 253.0 | 0.0 | 2206124.0 | 0.0 | 2219825.0 | 0.0 | 122390.0 | 2097435.0 | 2224711.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194617.0 | 0.0 | 494.0 | 0.0 | 0.0 | 214187.0 | 726659.0 | 0.0 | 0.0 | 2031651.0 | 0.0 | 4194572.0 | 0.0 | 1882579496.0 | 0.0 | 16299708857739 | 16307582705035 | 16307582934797 | 16299709470805 |
| 136 | 134 | void benchmark_func<double, 256, 8u, 56u>(double, double*) [clone .kd] | 0 | 0 | 401 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 226048 | 0x7fbb0abd5d80 | 0x7fbb0a2260c0 | 2929192 | 2813958 | 65536 | 366148 | 366148 | 11716736.0 | 11063295.0 | 0.0 | 0.0 | 0.0 | 236.0 | 0.0 | 2175160.0 | 0.0 | 2169878.0 | 0.0 | 72464.0 | 2097414.0 | 2172368.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194599.0 | 0.0 | 460.0 | 0.0 | 0.0 | 285088.0 | 919903.0 | 0.0 | 0.0 | 2031651.0 | 0.0 | 4194551.0 | 0.0 | 1704952624.0 | 0.0 | 16299709516244 | 16307582994477 | 16307583224238 | 16299710165200 |
| 137 | 135 | void benchmark_func<__half2, 256, 8u, 56u>(__half2, __half2*) [clone .kd] | 0 | 0 | 404 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 230080 | 0x7fbb0abd5c00 | 0x7fbb0a226100 | 2773336 | 2662381 | 65536 | 346666 | 346666 | 11093312.0 | 10506164.0 | 0.0 | 0.0 | 0.0 | 126.0 | 0.0 | 1052160.0 | 0.0 | 1052289.0 | 0.0 | 3551.0 | 1048738.0 | 1052315.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097342.0 | 0.0 | 252.0 | 0.0 | 0.0 | 0.0 | 54639.0 | 0.0 | 0.0 | 983075.0 | 0.0 | 2097315.0 | 0.0 | 552995878.0 | 0.0 | 16299710183430 | 16307583284079 | 16307583495920 | 16299710814136 |
| 138 | 136 | void benchmark_func<int, 256, 8u, 56u>(int, int*) [clone .kd] | 0 | 0 | 407 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 235904 | 0x7fbb0abd5a80 | 0x7fbb0a226140 | 5000400 | 4876934 | 65536 | 625049 | 625049 | 20001568.0 | 19003227.0 | 0.0 | 0.0 | 0.0 | 136.0 | 0.0 | 1053672.0 | 0.0 | 1053811.0 | 0.0 | 5050.0 | 1048761.0 | 1053807.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097380.0 | 0.0 | 274.0 | 0.0 | 0.0 | 0.0 | 54752.0 | 0.0 | 0.0 | 983088.0 | 0.0 | 2097321.0 | 0.0 | 559109864.0 | 0.0 | 16299710833125 | 16307583556400 | 16307583942323 | 16299711622028 |
| 139 | 137 | void benchmark_func<float, 256, 8u, 64u>(float, float*) [clone .kd] | 0 | 0 | 410 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 239424 | 0x7fbb0abd5900 | 0x7fbb0a226180 | 2427632 | 2318401 | 65536 | 303453 | 303453 | 9710496.0 | 9095219.0 | 0.0 | 0.0 | 0.0 | 143.0 | 0.0 | 1051768.0 | 0.0 | 1051905.0 | 0.0 | 3163.0 | 1048742.0 | 1051905.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097342.0 | 0.0 | 266.0 | 0.0 | 0.0 | 0.0 | 54165.0 | 0.0 | 0.0 | 983071.0 | 0.0 | 2097292.0 | 0.0 | 557068746.0 | 0.0 | 16299711669587 | 16307584008883 | 16307584195924 | 16299712195465 |
| 140 | 138 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 64u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 413 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 244224 | 0x7fbb0abd5780 | 0x7fbb0a2261c0 | 3183984 | 3071963 | 65536 | 397997 | 397997 | 12735904.0 | 12096568.0 | 0.0 | 0.0 | 0.0 | 235.0 | 0.0 | 2180864.0 | 0.0 | 2188194.0 | 0.0 | 90775.0 | 2097419.0 | 2178724.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194619.0 | 0.0 | 484.0 | 0.0 | 0.0 | 180700.0 | 588718.0 | 0.0 | 0.0 | 2031655.0 | 0.0 | 4194556.0 | 0.0 | 1616448488.0 | 0.0 | 16299712279224 | 16307584220244 | 16307584470326 | 16299712940519 |
| 141 | 139 | void benchmark_func<double, 256, 8u, 64u>(double, double*) [clone .kd] | 0 | 0 | 416 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 248768 | 0x7fbb0abd5600 | 0x7fbb0a226200 | 3122448 | 3014695 | 65536 | 390305 | 390305 | 12489760.0 | 11860188.0 | 0.0 | 0.0 | 0.0 | 230.0 | 0.0 | 2119069.0 | 0.0 | 2115398.0 | 0.0 | 17991.0 | 2097407.0 | 2116067.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194597.0 | 0.0 | 440.0 | 0.0 | 0.0 | 17579.0 | 106845.0 | 0.0 | 0.0 | 2031655.0 | 0.0 | 4194533.0 | 0.0 | 1226394448.0 | 0.0 | 16299712962179 | 16307584533206 | 16307584780568 | 16299713616714 |
| 142 | 140 | void benchmark_func<__half2, 256, 8u, 64u>(__half2, __half2*) [clone .kd] | 0 | 0 | 419 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 253312 | 0x7fbb0cfbe480 | 0x7fbb0a226240 | 3103776 | 2989976 | 65536 | 387971 | 387971 | 12415072.0 | 11780307.0 | 0.0 | 0.0 | 0.0 | 134.0 | 0.0 | 1052552.0 | 0.0 | 1052683.0 | 0.0 | 3940.0 | 1048743.0 | 1052708.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097350.0 | 0.0 | 254.0 | 0.0 | 0.0 | 0.0 | 54337.0 | 0.0 | 0.0 | 983078.0 | 0.0 | 2097310.0 | 0.0 | 545650304.0 | 0.0 | 16299713639194 | 16307584819608 | 16307585056730 | 16299714289299 |
| 143 | 141 | void benchmark_func<int, 256, 8u, 64u>(int, int*) [clone .kd] | 0 | 0 | 422 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 255552 | 0x7fbb0cfbe300 | 0x7fbb0a226280 | 5746624 | 5633482 | 65536 | 718327 | 718327 | 22986464.0 | 20878961.0 | 0.0 | 0.0 | 0.0 | 132.0 | 0.0 | 1050536.0 | 0.0 | 1050667.0 | 0.0 | 1942.0 | 1048725.0 | 1050670.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097322.0 | 0.0 | 270.0 | 0.0 | 0.0 | 0.0 | 55750.0 | 0.0 | 0.0 | 983060.0 | 0.0 | 2097259.0 | 0.0 | 539493570.0 | 0.0 | 16299714309369 | 16307585092890 | 16307585546493 | 16299715179410 |
| 144 | 142 | void benchmark_func<float, 256, 8u, 80u>(float, float*) [clone .kd] | 0 | 0 | 425 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 259840 | 0x7fbb0cfbe180 | 0x7fbb0a2262c0 | 2919384 | 2802533 | 65536 | 364922 | 364922 | 11677504.0 | 11021471.0 | 0.0 | 0.0 | 0.0 | 136.0 | 0.0 | 1052440.0 | 0.0 | 1052575.0 | 0.0 | 3829.0 | 1048746.0 | 1052577.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097354.0 | 0.0 | 266.0 | 0.0 | 0.0 | 0.0 | 54248.0 | 0.0 | 0.0 | 983077.0 | 0.0 | 2097304.0 | 0.0 | 549795909.0 | 0.0 | 16299715208789 | 16307585587613 | 16307585813534 | 16299715834845 |
| 145 | 143 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 80u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 428 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 265664 | 0x7fbb0cfbe000 | 0x7fbb0a226300 | 3763632 | 3648043 | 65536 | 470453 | 470453 | 15054496.0 | 14343460.0 | 0.0 | 0.0 | 0.0 | 200.0 | 0.0 | 2112574.0 | 0.0 | 2114973.0 | 0.0 | 17552.0 | 2097421.0 | 2113015.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194594.0 | 0.0 | 402.0 | 0.0 | 0.0 | 9405.0 | 81312.0 | 0.0 | 0.0 | 2031663.0 | 0.0 | 4194546.0 | 0.0 | 1140732483.0 | 0.0 | 16299715857455 | 16307585850654 | 16307586148736 | 16299716547189 |
| 146 | 144 | void benchmark_func<double, 256, 8u, 80u>(double, double*) [clone .kd] | 0 | 0 | 431 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 271232 | 0x7fbb0abd5e80 | 0x7fbb0a226340 | 3784400 | 3680888 | 65536 | 473049 | 473049 | 15137568.0 | 14464055.0 | 0.0 | 0.0 | 0.0 | 219.0 | 0.0 | 2135364.0 | 0.0 | 2137476.0 | 0.0 | 40062.0 | 2097414.0 | 2133964.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194608.0 | 0.0 | 430.0 | 0.0 | 0.0 | 13371.0 | 93698.0 | 0.0 | 0.0 | 2031663.0 | 0.0 | 4194551.0 | 0.0 | 1285461798.0 | 0.0 | 16299716589978 | 16307586214817 | 16307586513699 | 16299717277993 |
| 147 | 145 | void benchmark_func<__half2, 256, 8u, 80u>(__half2, __half2*) [clone .kd] | 0 | 0 | 434 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 273216 | 0x7fbb0abd5d00 | 0x7fbb0a226380 | 3864304 | 3750771 | 65536 | 483037 | 483037 | 15457184.0 | 14792970.0 | 0.0 | 0.0 | 0.0 | 144.0 | 0.0 | 1050312.0 | 0.0 | 1050457.0 | 0.0 | 1720.0 | 1048737.0 | 1050456.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097329.0 | 0.0 | 292.0 | 0.0 | 0.0 | 0.0 | 59748.0 | 0.0 | 0.0 | 983058.0 | 0.0 | 2097273.0 | 0.0 | 542608416.0 | 0.0 | 16299717317382 | 16307586554659 | 16307586851461 | 16299718004907 |
| 148 | 146 | void benchmark_func<int, 256, 8u, 80u>(int, int*) [clone .kd] | 0 | 0 | 437 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 275712 | 0x7fbb0abd5b80 | 0x7fbb0a2263c0 | 7052672 | 6931391 | 65536 | 881583 | 881583 | 28210656.0 | 26261350.0 | 0.0 | 0.0 | 0.0 | 129.0 | 0.0 | 1050872.0 | 0.0 | 1051002.0 | 0.0 | 2275.0 | 1048727.0 | 1051003.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097329.0 | 0.0 | 272.0 | 0.0 | 0.0 | 0.0 | 56862.0 | 0.0 | 0.0 | 983063.0 | 0.0 | 2097270.0 | 0.0 | 540377584.0 | 0.0 | 16299718048446 | 16307586886181 | 16307587444264 | 16299719052104 |
| 149 | 147 | void benchmark_func<float, 256, 8u, 96u>(float, float*) [clone .kd] | 0 | 0 | 440 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 280768 | 0x7fbb0abd5a00 | 0x7fbb0a226400 | 3419048 | 3304812 | 65536 | 427380 | 427380 | 13676160.0 | 13011346.0 | 0.0 | 0.0 | 0.0 | 130.0 | 0.0 | 1053000.0 | 0.0 | 1053137.0 | 0.0 | 4384.0 | 1048753.0 | 1053136.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097384.0 | 0.0 | 306.0 | 0.0 | 0.0 | 0.0 | 53886.0 | 0.0 | 0.0 | 983082.0 | 0.0 | 2097307.0 | 0.0 | 544696111.0 | 0.0 | 16299719078503 | 16307587511305 | 16307587774986 | 16299719754888 |
| 150 | 148 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 96u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 443 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 287616 | 0x7fbb0abd5880 | 0x7fbb0a226440 | 4387344 | 4284046 | 65536 | 548417 | 548417 | 17549344.0 | 16874754.0 | 0.0 | 0.0 | 0.0 | 217.0 | 0.0 | 2130529.0 | 0.0 | 2129019.0 | 0.0 | 31600.0 | 2097419.0 | 2131806.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194618.0 | 0.0 | 416.0 | 0.0 | 0.0 | 22444.0 | 120918.0 | 0.0 | 0.0 | 2031671.0 | 0.0 | 4194564.0 | 0.0 | 1249644826.0 | 0.0 | 16299719775048 | 16307587834987 | 16307588185869 | 16299720536251 |
| 151 | 149 | void benchmark_func<double, 256, 8u, 96u>(double, double*) [clone .kd] | 0 | 0 | 446 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 294208 | 0x7fbb0abd5700 | 0x7fbb0a226480 | 4408344 | 4300846 | 65536 | 551042 | 551042 | 17633344.0 | 16954065.0 | 0.0 | 0.0 | 0.0 | 215.0 | 0.0 | 2127385.0 | 0.0 | 2130514.0 | 0.0 | 33098.0 | 2097416.0 | 2129402.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194615.0 | 0.0 | 412.0 | 0.0 | 0.0 | 23476.0 | 123843.0 | 0.0 | 0.0 | 2031671.0 | 0.0 | 4194564.0 | 0.0 | 1181165853.0 | 0.0 | 16299720557741 | 16307588244589 | 16307588594671 | 16299721294935 |
| 152 | 150 | void benchmark_func<__half2, 256, 8u, 96u>(__half2, __half2*) [clone .kd] | 0 | 0 | 449 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 296960 | 0x7fbb0cfbe580 | 0x7fbb0a2264c0 | 4517936 | 4399297 | 65536 | 564741 | 564741 | 18071712.0 | 17254107.0 | 0.0 | 0.0 | 0.0 | 136.0 | 0.0 | 1050984.0 | 0.0 | 1051124.0 | 0.0 | 2386.0 | 1048738.0 | 1051121.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097333.0 | 0.0 | 276.0 | 0.0 | 0.0 | 0.0 | 56767.0 | 0.0 | 0.0 | 983064.0 | 0.0 | 2097275.0 | 0.0 | 543850846.0 | 0.0 | 16299721336924 | 16307588656272 | 16307589003794 | 16299722083517 |
| 153 | 151 | void benchmark_func<int, 256, 8u, 96u>(int, int*) [clone .kd] | 0 | 0 | 452 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 299968 | 0x7fbb0cfbe400 | 0x7fbb0a226500 | 8330656 | 8221207 | 65536 | 1041331 | 1041331 | 33322592.0 | 30226002.0 | 0.0 | 0.0 | 0.0 | 136.0 | 0.0 | 1051208.0 | 0.0 | 1051337.0 | 0.0 | 2608.0 | 1048729.0 | 1051338.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097329.0 | 0.0 | 260.0 | 0.0 | 0.0 | 0.0 | 56389.0 | 0.0 | 0.0 | 983066.0 | 0.0 | 2097273.0 | 0.0 | 546387278.0 | 0.0 | 16299722122996 | 16307589040594 | 16307589702838 | 16299723171733 |
| 154 | 152 | void benchmark_func<float, 256, 8u, 128u>(float, float*) [clone .kd] | 0 | 0 | 455 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 302720 | 0x7fbb0cfbe280 | 0x7fbb0a226540 | 5781872 | 5660365 | 65536 | 722733 | 722733 | 23127456.0 | 22115902.0 | 0.0 | 0.0 | 0.0 | 136.0 | 0.0 | 1050984.0 | 0.0 | 1051117.0 | 0.0 | 2386.0 | 1048731.0 | 1051123.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097331.0 | 0.0 | 272.0 | 0.0 | 0.0 | 0.0 | 49750.0 | 0.0 | 0.0 | 983064.0 | 0.0 | 2097280.0 | 0.0 | 547918727.0 | 0.0 | 16299723225402 | 16307589745558 | 16307590194521 | 16299724078403 |
| 155 | 153 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 128u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 458 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 305472 | 0x7fbb0cfbe100 | 0x7fbb0a226580 | 5789728 | 5674204 | 65536 | 723715 | 723715 | 23158880.0 | 22529846.0 | 0.0 | 0.0 | 0.0 | 217.0 | 0.0 | 2099622.0 | 0.0 | 2099919.0 | 0.0 | 2537.0 | 2097382.0 | 2099871.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194554.0 | 0.0 | 412.0 | 0.0 | 0.0 | 0.0 | 53062.0 | 0.0 | 0.0 | 2031640.0 | 0.0 | 4194501.0 | 0.0 | 1065673156.0 | 0.0 | 16299724098892 | 16307590258202 | 16307590724284 | 16299724966633 |
| 156 | 154 | void benchmark_func<double, 256, 8u, 128u>(double, double*) [clone .kd] | 0 | 0 | 461 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 308224 | 0x7fbb0abd5f80 | 0x7fbb0a2265c0 | 5771688 | 5663821 | 65536 | 721460 | 721460 | 23086720.0 | 22416546.0 | 0.0 | 0.0 | 0.0 | 204.0 | 0.0 | 2099639.0 | 0.0 | 2099818.0 | 0.0 | 2440.0 | 2097378.0 | 2099771.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194547.0 | 0.0 | 400.0 | 0.0 | 0.0 | 0.0 | 57133.0 | 0.0 | 0.0 | 2031640.0 | 0.0 | 4194488.0 | 0.0 | 1069240090.0 | 0.0 | 16299724987623 | 16307590791965 | 16307591253408 | 16299725869003 |
| 157 | 155 | void benchmark_func<__half2, 256, 8u, 128u>(__half2, __half2*) [clone .kd] | 0 | 0 | 464 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 310976 | 0x7fbb0abd5e00 | 0x7fbb0a226600 | 5825856 | 5698302 | 65536 | 728231 | 728231 | 23303392.0 | 22410612.0 | 0.0 | 0.0 | 0.0 | 134.0 | 0.0 | 1051040.0 | 0.0 | 1051172.0 | 0.0 | 2442.0 | 1048730.0 | 1051175.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097328.0 | 0.0 | 264.0 | 0.0 | 0.0 | 0.0 | 54292.0 | 0.0 | 0.0 | 983064.0 | 0.0 | 2097277.0 | 0.0 | 546880221.0 | 0.0 | 16299725890253 | 16307591317088 | 16307591769571 | 16299726755764 |
| 158 | 156 | void benchmark_func<int, 256, 8u, 128u>(int, int*) [clone .kd] | 0 | 0 | 467 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 313216 | 0x7fbb0abd5c80 | 0x7fbb0a226640 | 10939104 | 10810074 | 65536 | 1367387 | 1367387 | 43756384.0 | 35839232.0 | 0.0 | 0.0 | 0.0 | 131.0 | 0.0 | 1050536.0 | 0.0 | 1050666.0 | 0.0 | 1942.0 | 1048724.0 | 1050666.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097320.0 | 0.0 | 266.0 | 0.0 | 0.0 | 0.0 | 45159.0 | 0.0 | 0.0 | 983060.0 | 0.0 | 2097266.0 | 0.0 | 549664161.0 | 0.0 | 16299726775533 | 16307591834691 | 16307592704777 | 16299728059575 |
| 159 | 157 | void benchmark_func<float, 256, 8u, 256u>(float, float*) [clone .kd] | 0 | 0 | 470 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 315968 | 0x7fbb0abd5b00 | 0x7fbb0a226680 | 10968432 | 10849364 | 65536 | 1371053 | 1371053 | 43873696.0 | 34898231.0 | 0.0 | 0.0 | 0.0 | 131.0 | 0.0 | 1050984.0 | 0.0 | 1051115.0 | 0.0 | 2386.0 | 1048729.0 | 1051117.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097324.0 | 0.0 | 258.0 | 0.0 | 0.0 | 0.0 | 53469.0 | 0.0 | 0.0 | 983064.0 | 0.0 | 2097271.0 | 0.0 | 561107942.0 | 0.0 | 16299728087564 | 16307592776137 | 16307593629742 | 16299729351787 |
| 160 | 158 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 256u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 473 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 318720 | 0x7fbb0abd5980 | 0x7fbb0a2266c0 | 10979800 | 10859836 | 65536 | 1372474 | 1372474 | 43919168.0 | 42238188.0 | 0.0 | 0.0 | 0.0 | 198.0 | 0.0 | 2099630.0 | 0.0 | 2099843.0 | 0.0 | 2470.0 | 2097373.0 | 2099842.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194547.0 | 0.0 | 398.0 | 0.0 | 0.0 | 0.0 | 48580.0 | 0.0 | 0.0 | 2031640.0 | 0.0 | 4194487.0 | 0.0 | 1066416692.0 | 0.0 | 16299729371936 | 16307593693583 | 16307594578708 | 16299730715117 |
| 161 | 159 | void benchmark_func<double, 256, 8u, 256u>(double, double*) [clone .kd] | 0 | 0 | 476 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 321472 | 0x7fbb0abd5800 | 0x7fbb0a226700 | 10979072 | 10855859 | 65536 | 1372383 | 1372383 | 43916256.0 | 42891509.0 | 0.0 | 0.0 | 0.0 | 197.0 | 0.0 | 2099584.0 | 0.0 | 2099848.0 | 0.0 | 2471.0 | 2097377.0 | 2099820.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194546.0 | 0.0 | 398.0 | 0.0 | 0.0 | 0.0 | 40695.0 | 0.0 | 0.0 | 2031640.0 | 0.0 | 4194494.0 | 0.0 | 1072306360.0 | 0.0 | 16299730731926 | 16307594657428 | 16307595538394 | 16299732071087 |
| 162 | 160 | void benchmark_func<__half2, 256, 8u, 256u>(__half2, __half2*) [clone .kd] | 0 | 0 | 479 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 324224 | 0x7fbb0abd5680 | 0x7fbb0a226740 | 11009240 | 10884392 | 65536 | 1376154 | 1376154 | 44036928.0 | 34293570.0 | 0.0 | 0.0 | 0.0 | 133.0 | 0.0 | 1051040.0 | 0.0 | 1051169.0 | 0.0 | 2442.0 | 1048727.0 | 1051172.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097326.0 | 0.0 | 260.0 | 0.0 | 0.0 | 0.0 | 55355.0 | 0.0 | 0.0 | 983064.0 | 0.0 | 2097270.0 | 0.0 | 557364541.0 | 0.0 | 16299732088146 | 16307595610074 | 16307596469760 | 16299733404177 |
| 163 | 161 | void benchmark_func<int, 256, 8u, 256u>(int, int*) [clone .kd] | 0 | 0 | 482 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 326464 | 0x7fbb0cfbe500 | 0x7fbb0a226780 | 21318976 | 21189563 | 65536 | 2664871 | 2664871 | 85275872.0 | 56067432.0 | 0.0 | 0.0 | 0.0 | 130.0 | 0.0 | 1050536.0 | 0.0 | 1050662.0 | 0.0 | 1942.0 | 1048720.0 | 1050666.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097313.0 | 0.0 | 252.0 | 0.0 | 0.0 | 0.0 | 53331.0 | 0.0 | 0.0 | 983060.0 | 0.0 | 2097258.0 | 0.0 | 571133608.0 | 0.0 | 16299733423357 | 16307596535360 | 16307598234890 | 16299735574959 |
| 164 | 162 | void benchmark_func<float, 256, 8u, 512u>(float, float*) [clone .kd] | 0 | 0 | 485 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 329216 | 0x7fbb0cfbe380 | 0x7fbb0a2267c0 | 21347392 | 21226576 | 65536 | 2668423 | 2668423 | 85389536.0 | 51992813.0 | 0.0 | 0.0 | 0.0 | 132.0 | 0.0 | 1050984.0 | 0.0 | 1051113.0 | 0.0 | 2386.0 | 1048727.0 | 1051117.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097322.0 | 0.0 | 254.0 | 0.0 | 0.0 | 0.0 | 49753.0 | 0.0 | 0.0 | 983064.0 | 0.0 | 2097270.0 | 0.0 | 575933624.0 | 0.0 | 16299735600879 | 16307598328971 | 16307599995061 | 16299737722932 |
| 165 | 163 | void benchmark_func<HIP_vector_type<float, 2u>, 256, 8u, 512u>(HIP_vector_type<float, 2u>, HIP_vector_type<float, 2u>*) [clone .kd] | 0 | 0 | 488 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 331968 | 0x7fbb0cfbe200 | 0x7fbb0a226800 | 21376312 | 21240434 | 65536 | 2672038 | 2672038 | 85505216.0 | 76142718.0 | 0.0 | 0.0 | 0.0 | 202.0 | 0.0 | 2099638.0 | 0.0 | 2099843.0 | 0.0 | 2477.0 | 2097366.0 | 2099857.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194547.0 | 0.0 | 398.0 | 0.0 | 0.0 | 0.0 | 48657.0 | 0.0 | 0.0 | 2031640.0 | 0.0 | 4194490.0 | 0.0 | 1088743075.0 | 0.0 | 16299737736641 | 16307600082422 | 16307601808512 | 16299739927913 |
| 166 | 164 | void benchmark_func<double, 256, 8u, 512u>(double, double*) [clone .kd] | 0 | 0 | 491 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 12 | 24 | 334720 | 0x7fbb0cfbe080 | 0x7fbb0a226840 | 21347160 | 21229724 | 65536 | 2668394 | 2668394 | 85388608.0 | 73416382.0 | 0.0 | 0.0 | 0.0 | 203.0 | 0.0 | 2099560.0 | 0.0 | 2099786.0 | 0.0 | 2420.0 | 2097366.0 | 2099791.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4194542.0 | 0.0 | 390.0 | 0.0 | 0.0 | 0.0 | 43538.0 | 0.0 | 0.0 | 2031640.0 | 0.0 | 4194487.0 | 0.0 | 1092467963.0 | 0.0 | 16299739941813 | 16307601913153 | 16307603629164 | 16299742127435 |
| 167 | 165 | void benchmark_func<__half2, 256, 8u, 512u>(__half2, __half2*) [clone .kd] | 0 | 0 | 494 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 337472 | 0x7fbb0abd5f00 | 0x7fbb0a226880 | 21404016 | 21274659 | 65536 | 2675501 | 2675501 | 85616032.0 | 55248427.0 | 0.0 | 0.0 | 0.0 | 131.0 | 0.0 | 1051040.0 | 0.0 | 1051169.0 | 0.0 | 2442.0 | 1048727.0 | 1051172.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097326.0 | 0.0 | 260.0 | 0.0 | 0.0 | 0.0 | 52254.0 | 0.0 | 0.0 | 983064.0 | 0.0 | 2097273.0 | 0.0 | 574058520.0 | 0.0 | 16299742149834 | 16307603716684 | 16307605391894 | 16299744279527 |
| 168 | 166 | void benchmark_func<int, 256, 8u, 512u>(int, int*) [clone .kd] | 0 | 0 | 497 | 154513 | 154513 | 4194304 | 256 | 0 | 0 | 8 | 24 | 0 | 0x7fbb0abd5d80 | 0x7fbb0a2268c0 | 42048896 | 41931502 | 65536 | 5256111 | 5256111 | 168195552.0 | 68798963.0 | 0.0 | 0.0 | 0.0 | 126.0 | 0.0 | 1050536.0 | 0.0 | 1050664.0 | 0.0 | 1942.0 | 1048722.0 | 1050663.0 | 0.0 | 0.0 | 31.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2097314.0 | 0.0 | 254.0 | 0.0 | 0.0 | 0.0 | 49141.0 | 0.0 | 0.0 | 983060.0 | 0.0 | 2097259.0 | 0.0 | 576551943.0 | 0.0 | 16299744297687 | 16307605481175 | 16307608840075 | 16299748126322 |