From 8b445d2c00ee08d4827c117663be9efeac7abed8 Mon Sep 17 00:00:00 2001 From: Xianwei Zhang Date: Fri, 12 Jul 2019 12:21:08 -0500 Subject: [PATCH] test: add tests to validate cache/mem blocks To validate cache and memory blocks profiling, this patch prepares tests to profile dedicated kernels using specified counters, to compare the profiled results against expected ones, and further show the test is a fail or pass. Tests here are focusing on cache hit/miss, memory fetch/write size. Change-Id: Icbc8096a6e15256dec66297597a57c7665a533b8 --- test/memory_validation/README | 38 +++ .../benches/test_cache/Makefile | 19 ++ .../benches/test_cache/cache.cpp | 179 ++++++++++++ .../pmc_config_files/cache_pmc.txt | 6 + test/memory_validation/run_scripts/global.cfg | 50 ++++ .../run_scripts/test_cache_miss.sh | 269 ++++++++++++++++++ .../run_scripts/test_fetchwrite_size.sh | 156 ++++++++++ test/tool/metrics.xml | 8 + 8 files changed, 725 insertions(+) create mode 100644 test/memory_validation/README create mode 100644 test/memory_validation/benches/test_cache/Makefile create mode 100644 test/memory_validation/benches/test_cache/cache.cpp create mode 100644 test/memory_validation/pmc_config_files/cache_pmc.txt create mode 100644 test/memory_validation/run_scripts/global.cfg create mode 100755 test/memory_validation/run_scripts/test_cache_miss.sh create mode 100755 test/memory_validation/run_scripts/test_fetchwrite_size.sh diff --git a/test/memory_validation/README b/test/memory_validation/README new file mode 100644 index 0000000000..f12bc44c8a --- /dev/null +++ b/test/memory_validation/README @@ -0,0 +1,38 @@ +Memory Validation Tests + +The tests here are used to validate TCP and TCC. The validation focuses on the +commonly used stats like cache hit/miss, and memory traffic. The kernels used +for testing are dedicated ones, e.g., pointer chase,showing regular parrterns, +and thus providing expected stats. + +The testing workflow is that: +1) dedicated kernels will be executed and profiled using the specified +counters/events in rocprofiler; +2) profiling results will be parsed using the provided scripts, and compared +against expected values (self-checking); +3) the comparion results are printed onto screen to show the test is a pass or +fail. + +#### Source tree #### +- run_scripts/ + - global.cfg: settings and global codes used by .sh files + - test_cache_miss.sh: test TCP/TCC miss rates + - test_fetchwrite_size.sh: test memory fetch/write sizes +- pmc_config_files/ + - cache_pmc.txt: counters used for cache tests +- benches/ + - test_cache/: benchmark used for cached-related tests + + +#### How to run #### +1) step into the test folder + $cd test/memory_validation/ + +2) run tests + $run_scripts/test_cache_miss.sh [TCP/TCC/TCP TCC] + $run_scripts/test_fetchwrite_size.sh + +#### Known issues #### +while all tests have be thoroughly tests on Vega 10 and all show [PASS], +occasionally some tests show [FAIL]. Possbile reasons is interference onto test +benchmarks from runtime processes. You just need to run the tests again. diff --git a/test/memory_validation/benches/test_cache/Makefile b/test/memory_validation/benches/test_cache/Makefile new file mode 100644 index 0000000000..17d69409ee --- /dev/null +++ b/test/memory_validation/benches/test_cache/Makefile @@ -0,0 +1,19 @@ +HIP_PATH?= $(wildcard /opt/rocm/hip) +ifeq (,$(HIP_PATH)) + HIP_PATH=../../.. +endif +HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform) +HIPCC=$(HIP_PATH)/bin/hipcc + +# specify .cpp filename here +filename=cache + +SOURCES=$(filename).cpp + +all: $(filename) + +$(filename): $(SOURCES) + $(HIPCC) $(CXXFLAGS) $(SOURCES) -o $@ + +clean: + rm -f *.o *.out $(filename) diff --git a/test/memory_validation/benches/test_cache/cache.cpp b/test/memory_validation/benches/test_cache/cache.cpp new file mode 100644 index 0000000000..ed07423230 --- /dev/null +++ b/test/memory_validation/benches/test_cache/cache.cpp @@ -0,0 +1,179 @@ +/* +* Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to deal +* in the Software without restriction, including without limitation the rights +* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +* THE SOFTWARE. +*/ + +# include +# include +# include + +typedef unsigned int ARRAY_TYPE;//array element type +#define K_LEN 1024 //number of first batch accesses to be timed +#define DELTA 0 +int CNT = 1 * K_LEN; //i.e., iterations * K_LEN + +__global__ void cache_test_RW (ARRAY_TYPE * my_array, int array_length, + int iterations, unsigned int *index) { + unsigned int j = 0; + int k; + + for (k = 0; k < iterations*K_LEN+DELTA; k++) { + j = my_array[j]; + index[k] = j; + } +} + +__global__ void cache_test_RO (ARRAY_TYPE * my_array, int array_length, + int iterations, unsigned int *index) { + unsigned int j = 0; + + int k; + + for (k = 0; k < iterations*K_LEN+DELTA; k++) { + j = my_array[j]; + clock(); + } + + index[0] = j; +} + +__global__ void cache_test_WO (ARRAY_TYPE * my_array, int array_length, + int iterations, int stride) { + uint64_t k; + uint64_t cnt = (uint64_t)iterations*K_LEN*stride; + + for (k = 0; k < cnt; k+=stride) { + my_array[k%array_length] = k; + } +} + +void run_test(int N, int iterations, int stride); + +int main(int argc, char* argv[]){ + if (argc <= 3) { + printf("Please input ...\n"); + return -1; + } + + int stride = atoi(argv[1]); + int N = atoi(argv[2]); + int iterations = atoi(argv[3]); + CNT = iterations * K_LEN + DELTA; + + hipSetDevice(1); + + printf("\n=====%10.4f KB (N=%d) array, %d total accesses, " + "%d iterations ====\n", sizeof(ARRAY_TYPE)*(float)N/1024, + N, CNT, iterations); + + printf("Stride = %d element, %lu byte\n", stride, + stride * sizeof(ARRAY_TYPE)); + + run_test(N, iterations, stride); + printf("===============================================\n\n"); + + hipDeviceReset(); + return 0; +} + +void run_test(int N, int iterations, int stride) { + hipDeviceReset(); + + hipError_t error_id; + + int i; + ARRAY_TYPE * h_a; + /* allocate on CPU */ + h_a = (ARRAY_TYPE *)malloc(sizeof(ARRAY_TYPE) * N); + ARRAY_TYPE * d_a; + /* allocate on GPU */ + error_id = hipMalloc ((void **) &d_a, sizeof(ARRAY_TYPE) * N); + if (error_id != hipSuccess) { + printf("Error: %s\n", hipGetErrorString(error_id)); + } + + /* pointer-chase: initialize array elements on CPU. */ + + for (i = 0; i < N; i++) { + h_a[i] = (ARRAY_TYPE)((i+stride)%N); + } + + /* copy array elements from CPU to GPU */ + error_id = hipMemcpy(d_a, h_a, sizeof(ARRAY_TYPE) * N, + hipMemcpyHostToDevice); + if (error_id != hipSuccess) { + printf("Error: is %s\n", hipGetErrorString(error_id)); + } + + unsigned int *h_index = (unsigned int *)malloc(sizeof(unsigned int)*CNT); + + unsigned int *d_index; + error_id = hipMalloc( (void **) &d_index, sizeof(unsigned int)*CNT ); + if (error_id != hipSuccess) { + printf("Error: %s\n", hipGetErrorString(error_id)); + } + + hipDeviceSynchronize (); + /* launch kernel: single thread*/ + dim3 Db = dim3(1); //dimGrid, how many WGs + dim3 Dg = dim3(1,1,1); //dimBlock. WG size + + hipLaunchKernelGGL((cache_test_RO), dim3(Dg), dim3(Db), 0, 0, d_a, N, + iterations, d_index); + hipDeviceSynchronize (); + + hipLaunchKernelGGL((cache_test_RW), dim3(Dg), dim3(Db), 0, 0, d_a, N, + iterations, d_index); + hipDeviceSynchronize (); + + hipLaunchKernelGGL((cache_test_WO), dim3(Dg), dim3(Db), 0, 0, d_a, N, + iterations, stride); + hipDeviceSynchronize (); + + error_id = hipGetLastError(); + if (error_id != hipSuccess) { + printf("Error kernel is %s\n", hipGetErrorString(error_id)); + } + + /* copy results from GPU to CPU */ + hipDeviceSynchronize (); + + if (error_id != hipSuccess) { + printf("Error: %s\n", hipGetErrorString(error_id)); + } + + error_id = hipMemcpy((void *)h_index, (void *)d_index, + sizeof(unsigned int)*CNT, hipMemcpyDeviceToHost); + if (error_id != hipSuccess) { + printf("Error: %s\n", hipGetErrorString(error_id)); + } + + hipDeviceSynchronize (); + + /* free memory on GPU */ + hipFree(d_a); + hipFree(d_index); + + /*free memory on CPU */ + free(h_a); + free(h_index); + + hipDeviceReset(); +} diff --git a/test/memory_validation/pmc_config_files/cache_pmc.txt b/test/memory_validation/pmc_config_files/cache_pmc.txt new file mode 100644 index 0000000000..314bcbc3f4 --- /dev/null +++ b/test/memory_validation/pmc_config_files/cache_pmc.txt @@ -0,0 +1,6 @@ +pmc : FlatVMemInsts SFetchInsts +pmc : TCC_HIT_sum TCC_MISS_sum +pmc : TCC_EA_RDREQ_sum TCC_EA_WRREQ_sum +pmc : TCC_EA_RDREQ_32B_sum TCC_EA_WRREQ_64B_sum +pmc : FetchSize +pmc : WriteSize Mem32Bwrites diff --git a/test/memory_validation/run_scripts/global.cfg b/test/memory_validation/run_scripts/global.cfg new file mode 100644 index 0000000000..387600bb3a --- /dev/null +++ b/test/memory_validation/run_scripts/global.cfg @@ -0,0 +1,50 @@ +#-- profiler path +#-- specify the path to your rocprofiler here, ow default one will be used +ROCP_PATH="" + +#-- benchmark path +#-- the one used for cache/mem validation +PATH_CACHE_BENCH="benches/test_cache" + +#-- colors +RED='\033[0;31m' +GREEN='\033[0;32m' +NC='\033[0m' + +#-- function to do some initializations +function initialize +{ + cd ${BASE_DIR}/../$PATH_CACHE_BENCH/ + # Build the benchmark + make + cd ${BASE_DIR}/../ + + ELEMENT_SIZE=4 + #-- extract TCP parameters from rocminfo + TCP_SIZE=`rocminfo | grep "L1:" | tail -n1 | awk '{print $NF}'` + LINE_SIZE=`rocminfo | grep "Cacheline Size:" | tail -n1 | awk '{print $NF}'` + if [[ $TCP_SIZE == *KB ]]; then TCP_SIZE=`echo "${TCP_SIZE//KB}"`; fi + C_tcp=`echo "$TCP_SIZE*1024/$ELEMENT_SIZE" | bc` + b_tcp=`echo "$LINE_SIZE/$ELEMENT_SIZE" | bc` +} + +#-- function to list columns in profiling file +function getColIds +{ + local file=$1 + local counterline=`head -n1 $file` + + IFS=',' read -ra CARR <<< "$counterline" + local colIds="" + for srch in $headers + do + local colId=1 + for ele in "${CARR[@]}"; do + if [[ $srch == $ele ]]; then break; fi + colId=$(( $colId+1 )) + done + colIds=$colIds" "$colId"|$srch" + done + echo $colIds +} + diff --git a/test/memory_validation/run_scripts/test_cache_miss.sh b/test/memory_validation/run_scripts/test_cache_miss.sh new file mode 100755 index 0000000000..5aa708f92d --- /dev/null +++ b/test/memory_validation/run_scripts/test_cache_miss.sh @@ -0,0 +1,269 @@ +#!/bin/bash + +############################################################################### +# Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +############################################################################### + +BASE_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $BASE_DIR/global.cfg +initialize +REQ_DIFF=40 # at most 30 TCC reads are not from TCP + +#-- test kernel: +#-- single thread, pointer chase +#-- test settings (format: Ns|M): +#-- issue M=512k accesses access the array with N elements using different +#-- strides (s=1/2/4/...) +#-- miss rate patterns: +#-- 1) if N <= C: r = (N/b)/M +#-- array fits into the cache, causing no replacement, and thus misses +#-- only happens when the line is being loaded, +#-- i.e., cold misses +#-- 2) if N > C: +#-- a). r = s/b, s in [1, b) +#-- b). r = 100%, s in [b, N/a) +#-- c). r = 0%, s in [N/a, ] +CACHES="TCP TCC" # which caches to test +input_args="$1 $2" +caches="${input_args// }" +if [[ ! -z $caches ]]; then + if [[ $caches == TCP ]] || [[ $caches == TCC ]]; then CACHES=$caches + elif [[ $caches != TCPTCC ]]; then + printf "${RED}Supported caches are TCP and TCC ...${NC}\n"; exit + fi +fi + +#-- TCP +TCP_Ns_M="64 16384|512" +#-- TCC +TCC_Ns_M="4096 8192 16384 32768 65536 131072 2097152 4194304|512@16" + + +headers="TCC_HIT_sum TCC_MISS_sum" + +# set up outputs +OUT_DIR="outs"; [ -d $OUT_DIR ] && rm -rf $OUT_DIR; mkdir $OUT_DIR +log_file=$OUT_DIR/"prof.log"; >$log_file + +kerns="cache_test_RO cache_test_WO" +# append '/' if a non-default/empty path is specified +if [[ ! -z $ROCP_PATH ]]; then ROCP_PATH=$ROCP_PATH"/"; fi + +function one_run +{ + local N=$1 + local s=$2 + local M=$3 + local level=$4 + rst_sym="N${N}_s${s}_M${M}k" + rst_file="${rst_sym}.csv" + #echo "$N: $s -- $OUT_DIR/$rst_file" + printf "\n Traverse %7s-int array in %5s-int stride with %4s accesses" \ + $N $s "${M}K" + ${ROCP_PATH}rocprof -i ${BASE_DIR}/../pmc_config_files/cache_pmc.txt -o \ + ${BASE_DIR}/../$OUT_DIR/$rst_file $PATH_CACHE_BENCH/cache $s $N $M \ + >> $log_file + colIds=$(getColIds $OUT_DIR/$rst_file) + + sed -i 's/(.*)/(args)/g' $OUT_DIR/$rst_file + + totTcpRds=0; totTcpWrs=0 # tcp rds/wrs + missTcpRds=0; missTcpWrs=0 # tcp rd/wr misses + hitTccReqs=0; missTccReqs=0; totTccReqs=0 # tcc hits/misses/reqs + totTccRds=0; totTccWrs=0 # tcc rds/wrs + missTccRds=0; missTccWrs=0 # tcc rd/wr misses + + for kern in $kerns + do + values=`grep $kern $OUT_DIR/$rst_file | sed 's/,/ /g'` + for colIdStr in $colIds + do + colId=`echo $colIdStr | cut -f1 -d'|'` + colStr=`echo $colIdStr | cut -f2 -d'|'` + colVal=`echo $values | cut -f$colId -d' '` + + if [[ $kern == cache_test_RO || $kern == cache_test_WO ]]; then + if [[ $colStr == TCC_HIT_sum ]]; then hitTccReqs=$colVal + elif [[ $colStr == TCC_MISS_sum ]]; then missTccReqs=$colVal; fi + fi + done + + rstdiff=1 # check result (0: pass/no-difference, 1: fail) + totTccReqs=$(( $hitTccReqs + $missTccReqs )) + + #-- use kernel 'cache_test_RO' to validate read miss rate + if [[ $kern == cache_test_RO ]]; then + totTcpRds=$(( $M*1024 )); totTcpWrs=1 # tcp rds/wrs + totTccWrs=1; missTccWrs=1 # one write, and miss + missTccRds=$(($missTccReqs-$missTccWrs)) # remaining are read misses + totTccRds=$(($totTccReqs - $totTccWrs)) # remaining are reads + missTcpRds=$totTccRds # tcp rd misses + other + + mn=0; md=0 # miss rate denoted using numerator and denomiator + line=$b_tcp; if (( $s > $b_tcp)); then line=$s; fi + expectedMissTcpRds=0 + if (( $N > $C_tcp )) && (( $s >= $b_tcp )); then + # array size is larger than cache capacity, and stride is larger + # than a cacheline size (N>C && s>=b) + # 100% miss if s in [b, N/a), only code misses if s is [N/a,] + if (($missTcpRds - $totTcpRds < $REQ_DIFF)) \ + && (($totTcpRds - $missTcpRds < $REQ_DIFF)); then + rstdiff=0; expectedMissTcpRds=$totTcpRds + elif (($missTcpRds - $N/$s < $REQ_DIFF)) \ + && (($N/$s - $missTcpRds < $REQ_DIFF)); then + rstdiff=0; expectedMissTcpRds=$(($N/$s)); fi + else + # array size is no larger than cache size (N<=C): only cold misses + # array size is larger than cache capacity, and stride is less than + # a cacheline size (N>C && s 0.98" | bc -l) )) \ + && (( $(echo "$tcprdmissrate > 0.98" | bc -l) )); then + expectedMissTccRds=$totTcpRds + printf "\n\tTCC-READ : expected=%6s±%s, profiled=%6s, " \ + $expectedMissTccRds ".5%" $missTccRds + else + printf "\n\tTCC-READ : expected=%6s±%s, profiled=%6s, " \ + $expectedMissTccRds $REQ_DIFF $missTccRds + fi + + # absolute difference between profiled and expected + diff=$(( $missTccRds - $coldmisses )) + if (( $(echo "$diff < 0" | bc -l) )); then + diff=`echo "$diff*-1" | bc -l`; fi + if (( $(echo "$tccrdmissrate > 0.98" | bc -l) )); then + printf "test [${GREEN}PASS${NC}]" + elif (( $diff < $REQ_DIFF )); then + printf "test [${GREEN}PASS${NC}]" + else printf "test [${RED}FAIL${NC}]"; fi + fi + #-- use kernel 'cache_test_WO' to validate TCP write miss rate + elif [[ $kern == cache_test_WO ]]; then + totTcpRds=0; totTcpWrs=$(( $M*1024 )); # tcp rds/wrs + totTccRds=0; missTccRds=$totTccRds # no reads from tcp + totTccWrs=$(($totTccReqs - $totTccRds)) # remaining are writes + missTccWrs=$(($missTccReqs - $missTccRds)) # remaining are write mis + missTcpWrs=$totTccWrs # all tcc wrs are from tcp + + if (($missTcpWrs - $totTcpWrs < $REQ_DIFF)) \ + && (($totTcpWrs - $missTcpWrs < $REQ_DIFF)); then + rstdiff=0; fi + # tcp is write through + expectedMissTcpWrs=$totTcpWrs + + if [[ $level == TCP ]]; then + printf "\n\tTCP-WRITE : expected=%6s±%s, profiled=%6s, " \ + $expectedMissTcpWrs $REQ_DIFF $missTcpWrs + if (( $rstdiff == 0 )); then printf "test [${GREEN}PASS${NC}]" + else printf "test [${RED}FAIL${NC}]"; fi + # tcc validation + elif [[ $level == TCC ]]; then + if (( $rstdiff != 0 )); then + printf "\n\tTCP-WRITE : test [${RED}FAIL${NC}]"; fi + tccwrmissrate=$(awk -v mw=$missTccWrs -v wr=$totTccWrs \ + 'BEGIN{printf("%f", mw/wr)}') + + coldmisses=`echo "scale=0; $N/$line" | bc` + + expectedMissTccWrs=$coldmisses + if (( $(echo "$tccwrmissrate > 0.98" | bc -l) )); then + expectedMissTccWrs=$totTcpWrs; + printf "\n\tTCC-WRITE : expected=%6s±%s, profiled=%6s, " \ + $expectedMissTccWrs ".5%" $missTccWrs + else + printf "\n\tTCC-WRITE : expected=%6s±%s, profiled=%6s, " \ + $expectedMissTccWrs $REQ_DIFF $missTccWrs + fi + if (($missTccWrs - $coldmisses < $REQ_DIFF)) \ + && (($missTccWrs - $coldmisses < $REQ_DIFF)); then + printf "test [${GREEN}PASS${NC}]" + elif (( $(echo "$tccwrmissrate > 0.98" | bc -l) )); then + printf "test [${GREEN}PASS${NC}]" + else printf "test [${RED}FAIL${NC}]"; fi + fi + fi + + done +} + +for cache in $CACHES +do +{ + cfgname="${cache}_Ns_M" + Ns_M=${!cfgname} + + Ns=`echo $Ns_M | cut -f1 -d'|'` #-- array sizes + M=`echo $Ns_M | cut -f2 -d'|' | cut -f1 -d'@'` #-- array accesses + S="" + if [[ $Ns_M == *@* ]]; then S=`echo $Ns_M | cut -f2 -d'@'`; fi #-- stride + #echo $S + + printf "\n\t=========================================================\n" + printf "\t==================== Test [$cache miss] ====================\n" + printf "\t=========================================================" + + for N in $Ns + do + if [[ x$S == x ]]; then + m_stride=$N + for (( s=1; s<=$m_stride; s*=2 )) + do + one_run $N $s $M $cache + done + else + one_run $N $S $M $cache + fi + done + printf "\n" +} +done diff --git a/test/memory_validation/run_scripts/test_fetchwrite_size.sh b/test/memory_validation/run_scripts/test_fetchwrite_size.sh new file mode 100755 index 0000000000..1d876fb28b --- /dev/null +++ b/test/memory_validation/run_scripts/test_fetchwrite_size.sh @@ -0,0 +1,156 @@ +#!/bin/bash + +############################################################################### +# Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +############################################################################### + +BASE_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $BASE_DIR/global.cfg +initialize + +Ns_M="8192 131072|512" + +headers="Mem32Bwrites FetchSize WriteSize" + +# set up outputs +OUT_DIR="outs"; [ -d $OUT_DIR ] && rm -rf $OUT_DIR; mkdir $OUT_DIR +log_file=$OUT_DIR/"prof.log"; >$log_file + +kerns="cache_test_RO cache_test_WO" +# append '/' if a non-default/empty path is specified +if [[ ! -z $ROCP_PATH ]]; then ROCP_PATH=$ROCP_PATH"/"; fi + +function one_run +{ + local N=$1 + local s=$2 + local M=$3 + local level=$4 + rst_sym="N${N}_s${s}_M${M}k" + rst_file="${rst_sym}.csv" + #echo "$N: $s -- $OUT_DIR/$rst_file" + printf "\n Traverse %5s-int array in %5s-int stride with %4s accesses" \ + $N $s "${M}K" + ${ROCP_PATH}rocprof -i ${BASE_DIR}/../pmc_config_files/cache_pmc.txt -o \ + ${BASE_DIR}/../$OUT_DIR/$rst_file $PATH_CACHE_BENCH/cache $s $N $M \ + >> $log_file + colIds=$(getColIds $OUT_DIR/$rst_file) + + sed -i 's/(.*)/(args)/g' $OUT_DIR/$rst_file + + for kern in $kerns + do + mc32wrs=0; fetchsize=0; writesize=0 + values=`grep $kern $OUT_DIR/$rst_file | sed 's/,/ /g'` + for colIdStr in $colIds + do + colId=`echo $colIdStr | cut -f1 -d'|'` + colStr=`echo $colIdStr | cut -f2 -d'|'` + colVal=`echo $values | cut -f$colId -d' '` + + if [[ $kern == cache_test_RO || $kern == cache_test_WO ]]; then + if [[ $colStr == Mem32Bwrites ]]; then mc32wrs=$colVal + elif [[ $colStr == FetchSize ]]; then fetchsize=$colVal + elif [[ $colStr == WriteSize ]]; then writesize=$colVal; fi + fi + done + + rstdiff=1 # check result (0: pass/no-difference, 1: fail) + + line=$b_tcp; if (( $s > $b_tcp)); then line=$s; fi + coldmisses=`echo "scale=0; $N/$line" | bc` + #-- use kernel 'cache_test_RO' to validate fetch size + if [[ $kern == cache_test_RO ]]; then + # program-level expectation: coldmisses*cacheline_size + expect_fetchKB=$(awk -v n=$coldmisses \ + 'BEGIN{printf("%.0f", 64*n/1024)}') + # profiled value + profile_fetchKB=$fetchsize + + printf "\n\tFetch-Size: expected=%4s KB, profiled=%4s KB, " \ + $expect_fetchKB $profile_fetchKB + if (( $profile_fetchKB == $expect_fetchKB )); then + printf "test [${GREEN}PASS${NC}]" + else printf "test [${RED}FAIL${NC}]"; fi + #-- use kernel 'cache_test_WO' to validate write size + elif [[ $kern == cache_test_WO ]]; then + # program-level expectation: coldmisses*req_size + expect0Max_writeKB=$(awk -v n=$coldmisses \ + 'BEGIN{printf("%.0f", 64*n/1024)}') + expect0Min_writeKB=$(awk -v n=$coldmisses \ + 'BEGIN{printf("%.0f", 32*n/1024)}') + expect1_writeKB=$(awk -v wr32B=$mc32wrs \ + 'BEGIN{printf("%.0f", (32*wr32B)/1024)}') + profile_writeKB=$writesize + + # stride is less then a line, always write 64B + if (( $s < $b_tcp )); then expect1_writeKB=$expect0Max_writeKB; fi + + rstdiff=1 #different by default + expect_writeKB=$expect1_writeKB #expected size + if (( $profile_writeKB >= $expect0Min_writeKB )) \ + && (( $profile_writeKB <= $expect0Max_writeKB )) \ + && (( $profile_writeKB == $expect1_writeKB )); then + rstdiff=0 + # not fall in expected range (min, max) + elif (( $profile_writeKB == $expect1_writeKB )); then + rstdiff=1; expect_writeKB=-1 + # in the range, but not as desired + else rstdiff=1; fi + + if (( $expect_writeKB == -1 )); then + printf "\n\tWrite-Size: expected>=%3s KB, profiled=%4s KB, " \ + $expect0Min_writeKB $profile_writeKB + printf "test [${RED}FAIL${NC}]" + else + printf "\n\tWrite-Size: expected=%4s KB, profiled=%4s KB, " \ + $expect_writeKB $profile_writeKB + if [[ $rstdiff == 0 ]]; then printf "test [${GREEN}PASS${NC}]" + else printf "test [${RED}FAIL${NC}]"; fi + fi + fi + + done +} + +Ns=`echo $Ns_M | cut -f1 -d'|'` #-- array sizes +M=`echo $Ns_M | cut -f2 -d'|' | cut -f1 -d'@'` #-- array accesses +S="" +if [[ $Ns_M == *@* ]]; then S=`echo $Ns_M | cut -f2 -d'@'`; fi #-- stride +#echo $S + +printf "\n\t=========================================================\n" +printf "\t================ Test [fetch/write size] ================\n" +printf "\t=========================================================" + +for N in $Ns +do + if [[ x$S == x ]]; then + m_stride=$N + for (( s=1; s<=$m_stride/32; s*=2 )) + do + one_run $N $s $M $cache + done + else + one_run $N $S $M $cache + fi +done +printf "\n" diff --git a/test/tool/metrics.xml b/test/tool/metrics.xml index c679b91e7b..e2efdbcadf 100644 --- a/test/tool/metrics.xml +++ b/test/tool/metrics.xml @@ -32,6 +32,7 @@ + @@ -137,6 +138,13 @@ expr=100*SQ_INST_CYCLES_SALU*4/SIMD_NUM/GRBM_GUI_ACTIVE > + # Mem32Bwrites The number of effective 32B write transactions into memory + + # FetchSize The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account.