SWDEV-409155: Adding ATT plugin tests
Change-Id: I684affa4a63ed1c6fd7d8a4bb18f83697c3181a3
This commit is contained in:
@@ -214,6 +214,19 @@ install(
|
||||
${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests/featuretests/profiler/apps
|
||||
COMPONENT tests)
|
||||
|
||||
# ATT correctness vectoradd
|
||||
set_source_files_properties(apps/att_vectoradd.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
rocprofiler_featuretests_profiler_add_executable(att_vectoradd apps/att_vectoradd.cpp)
|
||||
set_target_properties(
|
||||
att_vectoradd PROPERTIES RUNTIME_OUTPUT_DIRECTORY
|
||||
"${PROJECT_BINARY_DIR}/tests-v2/featuretests/profiler/apps")
|
||||
target_link_options(att_vectoradd PRIVATE "-Wl,--build-id=md5")
|
||||
install(
|
||||
TARGETS att_vectoradd
|
||||
RUNTIME
|
||||
DESTINATION
|
||||
${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests/featuretests/profiler/apps
|
||||
COMPONENT tests)
|
||||
|
||||
# hsa-mem_async_copy -- Not Enabled for Now
|
||||
set_source_files_properties(apps/async_mem_copy.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT
|
||||
@@ -425,4 +438,5 @@ find_package(
|
||||
# cmake based tests
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/counter_validation_tests.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/counter_correctness_tests.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/att_correctness_tests.cmake)
|
||||
endif()
|
||||
|
||||
+96
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define WIDTH 256
|
||||
#define HEIGHT 128
|
||||
|
||||
#define NUM (WIDTH * HEIGHT)
|
||||
|
||||
#define THREADS_PER_BLOCK_X 8
|
||||
#define THREADS_PER_BLOCK_Y 8
|
||||
#define THREADS_PER_BLOCK_Z 1
|
||||
|
||||
__global__ void vectoradd_att(float* __restrict__ a, const float* __restrict__ b,
|
||||
const float* __restrict__ c, int width, int height)
|
||||
{
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
int i = y * width + x;
|
||||
if (i < width * height)
|
||||
a[i] = b[i] + c[i];
|
||||
}
|
||||
|
||||
int main() {
|
||||
float* hostA;
|
||||
float* hostB;
|
||||
float* hostC;
|
||||
|
||||
float* deviceA;
|
||||
float* deviceB;
|
||||
float* deviceC;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
|
||||
hostA = reinterpret_cast<float*>(malloc(NUM * sizeof(float)));
|
||||
hostB = reinterpret_cast<float*>(malloc(NUM * sizeof(float)));
|
||||
hostC = reinterpret_cast<float*>(malloc(NUM * sizeof(float)));
|
||||
|
||||
// initialize the input data
|
||||
for (size_t i = 0; i < NUM; i++) {
|
||||
hostB[i] = static_cast<float>(i);
|
||||
hostC[i] = static_cast<float>(i) * 100.0f;
|
||||
}
|
||||
|
||||
hipMalloc(reinterpret_cast<void**>(&deviceA), NUM * sizeof(float));
|
||||
hipMalloc(reinterpret_cast<void**>(&deviceB), NUM * sizeof(float));
|
||||
hipMalloc(reinterpret_cast<void**>(&deviceC), NUM * sizeof(float));
|
||||
|
||||
hipMemcpy(deviceB, hostB, NUM * sizeof(float), hipMemcpyHostToDevice);
|
||||
hipMemcpy(deviceC, hostC, NUM * sizeof(float), hipMemcpyHostToDevice);
|
||||
|
||||
hipLaunchKernelGGL(vectoradd_att,
|
||||
dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
|
||||
0, 0, deviceA, deviceB, deviceC, WIDTH, HEIGHT);
|
||||
|
||||
hipMemcpy(hostA, deviceA, NUM * sizeof(float), hipMemcpyDeviceToHost);
|
||||
|
||||
hipFree(deviceA);
|
||||
hipFree(deviceB);
|
||||
hipFree(deviceC);
|
||||
|
||||
free(hostA);
|
||||
free(hostB);
|
||||
free(hostC);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
att: TARGET_CU=1
|
||||
SIMD_SELECT=0x3
|
||||
SE_MASK=0x55555555
|
||||
BUFFER_SIZE=192
|
||||
ISA_CAPTURE_MODE=0
|
||||
@@ -0,0 +1,31 @@
|
||||
# counter correctness test
|
||||
add_test(
|
||||
NAME att_correctness_vectoradd_run
|
||||
COMMAND
|
||||
${PROJECT_BINARY_DIR}/rocprofv2 -i
|
||||
${PROJECT_BINARY_DIR}/tests-v2/featuretests/profiler/apps/goldentraces/att_vadd.txt
|
||||
-d /tmp/tests-v2/att/ -o /tmp/tests-v2/att/vadd
|
||||
--plugin att auto --mode csv tests-v2/featuretests/profiler/apps/att_vectoradd.cpp
|
||||
WORKING_DIRECTORY "${PROJECT_BINARY_DIR}")
|
||||
|
||||
set_tests_properties(
|
||||
att_correctness_vectoradd_run PROPERTIES LABELS "v2;rocprofv2" ENVIRONMENT
|
||||
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")
|
||||
|
||||
add_test(
|
||||
NAME att_correctness_vectoradd_parse
|
||||
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_att_vectoradd.py
|
||||
WORKING_DIRECTORY "${PROJECT_BINARY_DIR}")
|
||||
|
||||
set_tests_properties(
|
||||
att_correctness_vectoradd_parse
|
||||
PROPERTIES DEPENDS
|
||||
att_correctness_vectoradd_run
|
||||
LABELS
|
||||
"v2;validation"
|
||||
PASS_REGULAR_EXPRESSION
|
||||
"Test Passed"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"Test Failed"
|
||||
SKIP_REGULAR_EXPRESSION
|
||||
"Skipped")
|
||||
@@ -0,0 +1,72 @@
|
||||
import numpy as np
|
||||
import pandas
|
||||
|
||||
MAX_CU = 16
|
||||
MAX_WAVE_SIZE = 64
|
||||
MAXIMUM_ATT_HITS = 256*128//MAX_WAVE_SIZE//MAX_CU
|
||||
|
||||
kernel_name = "vectoradd_att"
|
||||
csv_filename = "att_output_" + kernel_name + "_v0.csv"
|
||||
output_folder = "/tmp/tests-v2/att"
|
||||
|
||||
|
||||
def test_hitcount(csv):
|
||||
hits = {m: True for m in csv['Hitcount'] if m != 0}
|
||||
print('hits', hits)
|
||||
assert(len(hits) > 0)
|
||||
assert(np.max([k for k in hits.keys()]) <= MAXIMUM_ATT_HITS)
|
||||
|
||||
|
||||
def test_addr(csv):
|
||||
addrs = np.array([int(addr, 16) for addr in csv['Addr'] if len(addr) != 0 and addr[0] != '-'])
|
||||
print('addrs', addrs)
|
||||
assert(addrs.max() - addrs.min() > 32) # 32 bytes is a safe minimum value
|
||||
assert(addrs.max() - addrs.min() < 2**24) # Kernels are not anywhere near that large
|
||||
|
||||
|
||||
def test_memory_list(csv):
|
||||
inst_list = ' '.join(csv['Instruction'])
|
||||
assert('vectoradd_' in inst_list)
|
||||
assert('s_load_' in inst_list)
|
||||
assert('_store_' in inst_list)
|
||||
assert('s_waitcnt' in inst_list)
|
||||
assert('v_add' in inst_list)
|
||||
assert('global_load' in inst_list or 'buffer_load' in inst_list or 'flat_load' in inst_list)
|
||||
|
||||
|
||||
def test_mean_cycles(csv):
|
||||
cycles = np.array([c/float(h) for c, h in zip(csv['Cycles'], csv['Hitcount']) if c != 0])
|
||||
print('cycles', cycles)
|
||||
assert(cycles.min() < 5) # Waves should have some instructions with very few cycles
|
||||
assert(cycles.max() > 100) # s_waitcnt should have a large cost
|
||||
assert(cycles.mean() > 1) # Minimum cost per inst is 1
|
||||
assert(np.median(cycles) <= 16) # Majority of instructions are not that expensive
|
||||
|
||||
maxv = int(4*cycles.max()+5)//4
|
||||
histogram = np.histogram(cycles, range=[0,maxv], bins=max(maxv//8, 1))[0]
|
||||
assert(histogram[0] == np.max(histogram)) # 1~8 cycles should be most common cost
|
||||
|
||||
|
||||
def test_memory_cycles(csv):
|
||||
is_memory_op = lambda s: ('waitcnt' in s) or ('_load_' in s) or ('_store_' in s)
|
||||
|
||||
max_cycles = np.max(csv['Cycles'])
|
||||
most_exp_inst = [f for f in csv[csv['Cycles'] == max_cycles]['Instruction']][0]
|
||||
print('most_exp_inst', most_exp_inst)
|
||||
assert(is_memory_op(most_exp_inst)) # Memory ops should be the most expensive insts
|
||||
|
||||
memory_ops = [c for s, c in zip(csv['Instruction'],csv['Cycles']) if is_memory_op(s)]
|
||||
print('memory_ops', memory_ops) # Memory ops should be more than half the total cycles
|
||||
assert(np.sum(memory_ops) > np.sum(csv['Cycles'])*0.5)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
csv = pandas.read_csv(f"{output_folder}/{csv_filename}")
|
||||
|
||||
test_hitcount(csv)
|
||||
test_addr(csv)
|
||||
test_memory_list(csv)
|
||||
test_mean_cycles(csv)
|
||||
test_memory_cycles(csv)
|
||||
|
||||
print("All ATT correctness tests passed.")
|
||||
Reference in New Issue
Block a user