diff --git a/test/memory_validation/README b/test/memory_validation/README new file mode 100644 index 0000000000..f12bc44c8a --- /dev/null +++ b/test/memory_validation/README @@ -0,0 +1,38 @@ +Memory Validation Tests + +The tests here are used to validate TCP and TCC. The validation focuses on the +commonly used stats like cache hit/miss, and memory traffic. The kernels used +for testing are dedicated ones, e.g., pointer chase,showing regular parrterns, +and thus providing expected stats. + +The testing workflow is that: +1) dedicated kernels will be executed and profiled using the specified +counters/events in rocprofiler; +2) profiling results will be parsed using the provided scripts, and compared +against expected values (self-checking); +3) the comparion results are printed onto screen to show the test is a pass or +fail. + +#### Source tree #### +- run_scripts/ + - global.cfg: settings and global codes used by .sh files + - test_cache_miss.sh: test TCP/TCC miss rates + - test_fetchwrite_size.sh: test memory fetch/write sizes +- pmc_config_files/ + - cache_pmc.txt: counters used for cache tests +- benches/ + - test_cache/: benchmark used for cached-related tests + + +#### How to run #### +1) step into the test folder + $cd test/memory_validation/ + +2) run tests + $run_scripts/test_cache_miss.sh [TCP/TCC/TCP TCC] + $run_scripts/test_fetchwrite_size.sh + +#### Known issues #### +while all tests have be thoroughly tests on Vega 10 and all show [PASS], +occasionally some tests show [FAIL]. Possbile reasons is interference onto test +benchmarks from runtime processes. You just need to run the tests again. diff --git a/test/memory_validation/benches/test_cache/Makefile b/test/memory_validation/benches/test_cache/Makefile new file mode 100644 index 0000000000..17d69409ee --- /dev/null +++ b/test/memory_validation/benches/test_cache/Makefile @@ -0,0 +1,19 @@ +HIP_PATH?= $(wildcard /opt/rocm/hip) +ifeq (,$(HIP_PATH)) + HIP_PATH=../../.. +endif +HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform) +HIPCC=$(HIP_PATH)/bin/hipcc + +# specify .cpp filename here +filename=cache + +SOURCES=$(filename).cpp + +all: $(filename) + +$(filename): $(SOURCES) + $(HIPCC) $(CXXFLAGS) $(SOURCES) -o $@ + +clean: + rm -f *.o *.out $(filename) diff --git a/test/memory_validation/benches/test_cache/cache.cpp b/test/memory_validation/benches/test_cache/cache.cpp new file mode 100644 index 0000000000..ed07423230 --- /dev/null +++ b/test/memory_validation/benches/test_cache/cache.cpp @@ -0,0 +1,179 @@ +/* +* Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to deal +* in the Software without restriction, including without limitation the rights +* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +* THE SOFTWARE. +*/ + +# include +# include +# include + +typedef unsigned int ARRAY_TYPE;//array element type +#define K_LEN 1024 //number of first batch accesses to be timed +#define DELTA 0 +int CNT = 1 * K_LEN; //i.e., iterations * K_LEN + +__global__ void cache_test_RW (ARRAY_TYPE * my_array, int array_length, + int iterations, unsigned int *index) { + unsigned int j = 0; + int k; + + for (k = 0; k < iterations*K_LEN+DELTA; k++) { + j = my_array[j]; + index[k] = j; + } +} + +__global__ void cache_test_RO (ARRAY_TYPE * my_array, int array_length, + int iterations, unsigned int *index) { + unsigned int j = 0; + + int k; + + for (k = 0; k < iterations*K_LEN+DELTA; k++) { + j = my_array[j]; + clock(); + } + + index[0] = j; +} + +__global__ void cache_test_WO (ARRAY_TYPE * my_array, int array_length, + int iterations, int stride) { + uint64_t k; + uint64_t cnt = (uint64_t)iterations*K_LEN*stride; + + for (k = 0; k < cnt; k+=stride) { + my_array[k%array_length] = k; + } +} + +void run_test(int N, int iterations, int stride); + +int main(int argc, char* argv[]){ + if (argc <= 3) { + printf("Please input ...\n"); + return -1; + } + + int stride = atoi(argv[1]); + int N = atoi(argv[2]); + int iterations = atoi(argv[3]); + CNT = iterations * K_LEN + DELTA; + + hipSetDevice(1); + + printf("\n=====%10.4f KB (N=%d) array, %d total accesses, " + "%d iterations ====\n", sizeof(ARRAY_TYPE)*(float)N/1024, + N, CNT, iterations); + + printf("Stride = %d element, %lu byte\n", stride, + stride * sizeof(ARRAY_TYPE)); + + run_test(N, iterations, stride); + printf("===============================================\n\n"); + + hipDeviceReset(); + return 0; +} + +void run_test(int N, int iterations, int stride) { + hipDeviceReset(); + + hipError_t error_id; + + int i; + ARRAY_TYPE * h_a; + /* allocate on CPU */ + h_a = (ARRAY_TYPE *)malloc(sizeof(ARRAY_TYPE) * N); + ARRAY_TYPE * d_a; + /* allocate on GPU */ + error_id = hipMalloc ((void **) &d_a, sizeof(ARRAY_TYPE) * N); + if (error_id != hipSuccess) { + printf("Error: %s\n", hipGetErrorString(error_id)); + } + + /* pointer-chase: initialize array elements on CPU. */ + + for (i = 0; i < N; i++) { + h_a[i] = (ARRAY_TYPE)((i+stride)%N); + } + + /* copy array elements from CPU to GPU */ + error_id = hipMemcpy(d_a, h_a, sizeof(ARRAY_TYPE) * N, + hipMemcpyHostToDevice); + if (error_id != hipSuccess) { + printf("Error: is %s\n", hipGetErrorString(error_id)); + } + + unsigned int *h_index = (unsigned int *)malloc(sizeof(unsigned int)*CNT); + + unsigned int *d_index; + error_id = hipMalloc( (void **) &d_index, sizeof(unsigned int)*CNT ); + if (error_id != hipSuccess) { + printf("Error: %s\n", hipGetErrorString(error_id)); + } + + hipDeviceSynchronize (); + /* launch kernel: single thread*/ + dim3 Db = dim3(1); //dimGrid, how many WGs + dim3 Dg = dim3(1,1,1); //dimBlock. WG size + + hipLaunchKernelGGL((cache_test_RO), dim3(Dg), dim3(Db), 0, 0, d_a, N, + iterations, d_index); + hipDeviceSynchronize (); + + hipLaunchKernelGGL((cache_test_RW), dim3(Dg), dim3(Db), 0, 0, d_a, N, + iterations, d_index); + hipDeviceSynchronize (); + + hipLaunchKernelGGL((cache_test_WO), dim3(Dg), dim3(Db), 0, 0, d_a, N, + iterations, stride); + hipDeviceSynchronize (); + + error_id = hipGetLastError(); + if (error_id != hipSuccess) { + printf("Error kernel is %s\n", hipGetErrorString(error_id)); + } + + /* copy results from GPU to CPU */ + hipDeviceSynchronize (); + + if (error_id != hipSuccess) { + printf("Error: %s\n", hipGetErrorString(error_id)); + } + + error_id = hipMemcpy((void *)h_index, (void *)d_index, + sizeof(unsigned int)*CNT, hipMemcpyDeviceToHost); + if (error_id != hipSuccess) { + printf("Error: %s\n", hipGetErrorString(error_id)); + } + + hipDeviceSynchronize (); + + /* free memory on GPU */ + hipFree(d_a); + hipFree(d_index); + + /*free memory on CPU */ + free(h_a); + free(h_index); + + hipDeviceReset(); +} diff --git a/test/memory_validation/pmc_config_files/cache_pmc.txt b/test/memory_validation/pmc_config_files/cache_pmc.txt new file mode 100644 index 0000000000..314bcbc3f4 --- /dev/null +++ b/test/memory_validation/pmc_config_files/cache_pmc.txt @@ -0,0 +1,6 @@ +pmc : FlatVMemInsts SFetchInsts +pmc : TCC_HIT_sum TCC_MISS_sum +pmc : TCC_EA_RDREQ_sum TCC_EA_WRREQ_sum +pmc : TCC_EA_RDREQ_32B_sum TCC_EA_WRREQ_64B_sum +pmc : FetchSize +pmc : WriteSize Mem32Bwrites diff --git a/test/memory_validation/run_scripts/global.cfg b/test/memory_validation/run_scripts/global.cfg new file mode 100644 index 0000000000..387600bb3a --- /dev/null +++ b/test/memory_validation/run_scripts/global.cfg @@ -0,0 +1,50 @@ +#-- profiler path +#-- specify the path to your rocprofiler here, ow default one will be used +ROCP_PATH="" + +#-- benchmark path +#-- the one used for cache/mem validation +PATH_CACHE_BENCH="benches/test_cache" + +#-- colors +RED='\033[0;31m' +GREEN='\033[0;32m' +NC='\033[0m' + +#-- function to do some initializations +function initialize +{ + cd ${BASE_DIR}/../$PATH_CACHE_BENCH/ + # Build the benchmark + make + cd ${BASE_DIR}/../ + + ELEMENT_SIZE=4 + #-- extract TCP parameters from rocminfo + TCP_SIZE=`rocminfo | grep "L1:" | tail -n1 | awk '{print $NF}'` + LINE_SIZE=`rocminfo | grep "Cacheline Size:" | tail -n1 | awk '{print $NF}'` + if [[ $TCP_SIZE == *KB ]]; then TCP_SIZE=`echo "${TCP_SIZE//KB}"`; fi + C_tcp=`echo "$TCP_SIZE*1024/$ELEMENT_SIZE" | bc` + b_tcp=`echo "$LINE_SIZE/$ELEMENT_SIZE" | bc` +} + +#-- function to list columns in profiling file +function getColIds +{ + local file=$1 + local counterline=`head -n1 $file` + + IFS=',' read -ra CARR <<< "$counterline" + local colIds="" + for srch in $headers + do + local colId=1 + for ele in "${CARR[@]}"; do + if [[ $srch == $ele ]]; then break; fi + colId=$(( $colId+1 )) + done + colIds=$colIds" "$colId"|$srch" + done + echo $colIds +} + diff --git a/test/memory_validation/run_scripts/test_cache_miss.sh b/test/memory_validation/run_scripts/test_cache_miss.sh new file mode 100755 index 0000000000..5aa708f92d --- /dev/null +++ b/test/memory_validation/run_scripts/test_cache_miss.sh @@ -0,0 +1,269 @@ +#!/bin/bash + +############################################################################### +# Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +############################################################################### + +BASE_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $BASE_DIR/global.cfg +initialize +REQ_DIFF=40 # at most 30 TCC reads are not from TCP + +#-- test kernel: +#-- single thread, pointer chase +#-- test settings (format: Ns|M): +#-- issue M=512k accesses access the array with N elements using different +#-- strides (s=1/2/4/...) +#-- miss rate patterns: +#-- 1) if N <= C: r = (N/b)/M +#-- array fits into the cache, causing no replacement, and thus misses +#-- only happens when the line is being loaded, +#-- i.e., cold misses +#-- 2) if N > C: +#-- a). r = s/b, s in [1, b) +#-- b). r = 100%, s in [b, N/a) +#-- c). r = 0%, s in [N/a, ] +CACHES="TCP TCC" # which caches to test +input_args="$1 $2" +caches="${input_args// }" +if [[ ! -z $caches ]]; then + if [[ $caches == TCP ]] || [[ $caches == TCC ]]; then CACHES=$caches + elif [[ $caches != TCPTCC ]]; then + printf "${RED}Supported caches are TCP and TCC ...${NC}\n"; exit + fi +fi + +#-- TCP +TCP_Ns_M="64 16384|512" +#-- TCC +TCC_Ns_M="4096 8192 16384 32768 65536 131072 2097152 4194304|512@16" + + +headers="TCC_HIT_sum TCC_MISS_sum" + +# set up outputs +OUT_DIR="outs"; [ -d $OUT_DIR ] && rm -rf $OUT_DIR; mkdir $OUT_DIR +log_file=$OUT_DIR/"prof.log"; >$log_file + +kerns="cache_test_RO cache_test_WO" +# append '/' if a non-default/empty path is specified +if [[ ! -z $ROCP_PATH ]]; then ROCP_PATH=$ROCP_PATH"/"; fi + +function one_run +{ + local N=$1 + local s=$2 + local M=$3 + local level=$4 + rst_sym="N${N}_s${s}_M${M}k" + rst_file="${rst_sym}.csv" + #echo "$N: $s -- $OUT_DIR/$rst_file" + printf "\n Traverse %7s-int array in %5s-int stride with %4s accesses" \ + $N $s "${M}K" + ${ROCP_PATH}rocprof -i ${BASE_DIR}/../pmc_config_files/cache_pmc.txt -o \ + ${BASE_DIR}/../$OUT_DIR/$rst_file $PATH_CACHE_BENCH/cache $s $N $M \ + >> $log_file + colIds=$(getColIds $OUT_DIR/$rst_file) + + sed -i 's/(.*)/(args)/g' $OUT_DIR/$rst_file + + totTcpRds=0; totTcpWrs=0 # tcp rds/wrs + missTcpRds=0; missTcpWrs=0 # tcp rd/wr misses + hitTccReqs=0; missTccReqs=0; totTccReqs=0 # tcc hits/misses/reqs + totTccRds=0; totTccWrs=0 # tcc rds/wrs + missTccRds=0; missTccWrs=0 # tcc rd/wr misses + + for kern in $kerns + do + values=`grep $kern $OUT_DIR/$rst_file | sed 's/,/ /g'` + for colIdStr in $colIds + do + colId=`echo $colIdStr | cut -f1 -d'|'` + colStr=`echo $colIdStr | cut -f2 -d'|'` + colVal=`echo $values | cut -f$colId -d' '` + + if [[ $kern == cache_test_RO || $kern == cache_test_WO ]]; then + if [[ $colStr == TCC_HIT_sum ]]; then hitTccReqs=$colVal + elif [[ $colStr == TCC_MISS_sum ]]; then missTccReqs=$colVal; fi + fi + done + + rstdiff=1 # check result (0: pass/no-difference, 1: fail) + totTccReqs=$(( $hitTccReqs + $missTccReqs )) + + #-- use kernel 'cache_test_RO' to validate read miss rate + if [[ $kern == cache_test_RO ]]; then + totTcpRds=$(( $M*1024 )); totTcpWrs=1 # tcp rds/wrs + totTccWrs=1; missTccWrs=1 # one write, and miss + missTccRds=$(($missTccReqs-$missTccWrs)) # remaining are read misses + totTccRds=$(($totTccReqs - $totTccWrs)) # remaining are reads + missTcpRds=$totTccRds # tcp rd misses + other + + mn=0; md=0 # miss rate denoted using numerator and denomiator + line=$b_tcp; if (( $s > $b_tcp)); then line=$s; fi + expectedMissTcpRds=0 + if (( $N > $C_tcp )) && (( $s >= $b_tcp )); then + # array size is larger than cache capacity, and stride is larger + # than a cacheline size (N>C && s>=b) + # 100% miss if s in [b, N/a), only code misses if s is [N/a,] + if (($missTcpRds - $totTcpRds < $REQ_DIFF)) \ + && (($totTcpRds - $missTcpRds < $REQ_DIFF)); then + rstdiff=0; expectedMissTcpRds=$totTcpRds + elif (($missTcpRds - $N/$s < $REQ_DIFF)) \ + && (($N/$s - $missTcpRds < $REQ_DIFF)); then + rstdiff=0; expectedMissTcpRds=$(($N/$s)); fi + else + # array size is no larger than cache size (N<=C): only cold misses + # array size is larger than cache capacity, and stride is less than + # a cacheline size (N>C && s 0.98" | bc -l) )) \ + && (( $(echo "$tcprdmissrate > 0.98" | bc -l) )); then + expectedMissTccRds=$totTcpRds + printf "\n\tTCC-READ : expected=%6s±%s, profiled=%6s, " \ + $expectedMissTccRds ".5%" $missTccRds + else + printf "\n\tTCC-READ : expected=%6s±%s, profiled=%6s, " \ + $expectedMissTccRds $REQ_DIFF $missTccRds + fi + + # absolute difference between profiled and expected + diff=$(( $missTccRds - $coldmisses )) + if (( $(echo "$diff < 0" | bc -l) )); then + diff=`echo "$diff*-1" | bc -l`; fi + if (( $(echo "$tccrdmissrate > 0.98" | bc -l) )); then + printf "test [${GREEN}PASS${NC}]" + elif (( $diff < $REQ_DIFF )); then + printf "test [${GREEN}PASS${NC}]" + else printf "test [${RED}FAIL${NC}]"; fi + fi + #-- use kernel 'cache_test_WO' to validate TCP write miss rate + elif [[ $kern == cache_test_WO ]]; then + totTcpRds=0; totTcpWrs=$(( $M*1024 )); # tcp rds/wrs + totTccRds=0; missTccRds=$totTccRds # no reads from tcp + totTccWrs=$(($totTccReqs - $totTccRds)) # remaining are writes + missTccWrs=$(($missTccReqs - $missTccRds)) # remaining are write mis + missTcpWrs=$totTccWrs # all tcc wrs are from tcp + + if (($missTcpWrs - $totTcpWrs < $REQ_DIFF)) \ + && (($totTcpWrs - $missTcpWrs < $REQ_DIFF)); then + rstdiff=0; fi + # tcp is write through + expectedMissTcpWrs=$totTcpWrs + + if [[ $level == TCP ]]; then + printf "\n\tTCP-WRITE : expected=%6s±%s, profiled=%6s, " \ + $expectedMissTcpWrs $REQ_DIFF $missTcpWrs + if (( $rstdiff == 0 )); then printf "test [${GREEN}PASS${NC}]" + else printf "test [${RED}FAIL${NC}]"; fi + # tcc validation + elif [[ $level == TCC ]]; then + if (( $rstdiff != 0 )); then + printf "\n\tTCP-WRITE : test [${RED}FAIL${NC}]"; fi + tccwrmissrate=$(awk -v mw=$missTccWrs -v wr=$totTccWrs \ + 'BEGIN{printf("%f", mw/wr)}') + + coldmisses=`echo "scale=0; $N/$line" | bc` + + expectedMissTccWrs=$coldmisses + if (( $(echo "$tccwrmissrate > 0.98" | bc -l) )); then + expectedMissTccWrs=$totTcpWrs; + printf "\n\tTCC-WRITE : expected=%6s±%s, profiled=%6s, " \ + $expectedMissTccWrs ".5%" $missTccWrs + else + printf "\n\tTCC-WRITE : expected=%6s±%s, profiled=%6s, " \ + $expectedMissTccWrs $REQ_DIFF $missTccWrs + fi + if (($missTccWrs - $coldmisses < $REQ_DIFF)) \ + && (($missTccWrs - $coldmisses < $REQ_DIFF)); then + printf "test [${GREEN}PASS${NC}]" + elif (( $(echo "$tccwrmissrate > 0.98" | bc -l) )); then + printf "test [${GREEN}PASS${NC}]" + else printf "test [${RED}FAIL${NC}]"; fi + fi + fi + + done +} + +for cache in $CACHES +do +{ + cfgname="${cache}_Ns_M" + Ns_M=${!cfgname} + + Ns=`echo $Ns_M | cut -f1 -d'|'` #-- array sizes + M=`echo $Ns_M | cut -f2 -d'|' | cut -f1 -d'@'` #-- array accesses + S="" + if [[ $Ns_M == *@* ]]; then S=`echo $Ns_M | cut -f2 -d'@'`; fi #-- stride + #echo $S + + printf "\n\t=========================================================\n" + printf "\t==================== Test [$cache miss] ====================\n" + printf "\t=========================================================" + + for N in $Ns + do + if [[ x$S == x ]]; then + m_stride=$N + for (( s=1; s<=$m_stride; s*=2 )) + do + one_run $N $s $M $cache + done + else + one_run $N $S $M $cache + fi + done + printf "\n" +} +done diff --git a/test/memory_validation/run_scripts/test_fetchwrite_size.sh b/test/memory_validation/run_scripts/test_fetchwrite_size.sh new file mode 100755 index 0000000000..1d876fb28b --- /dev/null +++ b/test/memory_validation/run_scripts/test_fetchwrite_size.sh @@ -0,0 +1,156 @@ +#!/bin/bash + +############################################################################### +# Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +############################################################################### + +BASE_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $BASE_DIR/global.cfg +initialize + +Ns_M="8192 131072|512" + +headers="Mem32Bwrites FetchSize WriteSize" + +# set up outputs +OUT_DIR="outs"; [ -d $OUT_DIR ] && rm -rf $OUT_DIR; mkdir $OUT_DIR +log_file=$OUT_DIR/"prof.log"; >$log_file + +kerns="cache_test_RO cache_test_WO" +# append '/' if a non-default/empty path is specified +if [[ ! -z $ROCP_PATH ]]; then ROCP_PATH=$ROCP_PATH"/"; fi + +function one_run +{ + local N=$1 + local s=$2 + local M=$3 + local level=$4 + rst_sym="N${N}_s${s}_M${M}k" + rst_file="${rst_sym}.csv" + #echo "$N: $s -- $OUT_DIR/$rst_file" + printf "\n Traverse %5s-int array in %5s-int stride with %4s accesses" \ + $N $s "${M}K" + ${ROCP_PATH}rocprof -i ${BASE_DIR}/../pmc_config_files/cache_pmc.txt -o \ + ${BASE_DIR}/../$OUT_DIR/$rst_file $PATH_CACHE_BENCH/cache $s $N $M \ + >> $log_file + colIds=$(getColIds $OUT_DIR/$rst_file) + + sed -i 's/(.*)/(args)/g' $OUT_DIR/$rst_file + + for kern in $kerns + do + mc32wrs=0; fetchsize=0; writesize=0 + values=`grep $kern $OUT_DIR/$rst_file | sed 's/,/ /g'` + for colIdStr in $colIds + do + colId=`echo $colIdStr | cut -f1 -d'|'` + colStr=`echo $colIdStr | cut -f2 -d'|'` + colVal=`echo $values | cut -f$colId -d' '` + + if [[ $kern == cache_test_RO || $kern == cache_test_WO ]]; then + if [[ $colStr == Mem32Bwrites ]]; then mc32wrs=$colVal + elif [[ $colStr == FetchSize ]]; then fetchsize=$colVal + elif [[ $colStr == WriteSize ]]; then writesize=$colVal; fi + fi + done + + rstdiff=1 # check result (0: pass/no-difference, 1: fail) + + line=$b_tcp; if (( $s > $b_tcp)); then line=$s; fi + coldmisses=`echo "scale=0; $N/$line" | bc` + #-- use kernel 'cache_test_RO' to validate fetch size + if [[ $kern == cache_test_RO ]]; then + # program-level expectation: coldmisses*cacheline_size + expect_fetchKB=$(awk -v n=$coldmisses \ + 'BEGIN{printf("%.0f", 64*n/1024)}') + # profiled value + profile_fetchKB=$fetchsize + + printf "\n\tFetch-Size: expected=%4s KB, profiled=%4s KB, " \ + $expect_fetchKB $profile_fetchKB + if (( $profile_fetchKB == $expect_fetchKB )); then + printf "test [${GREEN}PASS${NC}]" + else printf "test [${RED}FAIL${NC}]"; fi + #-- use kernel 'cache_test_WO' to validate write size + elif [[ $kern == cache_test_WO ]]; then + # program-level expectation: coldmisses*req_size + expect0Max_writeKB=$(awk -v n=$coldmisses \ + 'BEGIN{printf("%.0f", 64*n/1024)}') + expect0Min_writeKB=$(awk -v n=$coldmisses \ + 'BEGIN{printf("%.0f", 32*n/1024)}') + expect1_writeKB=$(awk -v wr32B=$mc32wrs \ + 'BEGIN{printf("%.0f", (32*wr32B)/1024)}') + profile_writeKB=$writesize + + # stride is less then a line, always write 64B + if (( $s < $b_tcp )); then expect1_writeKB=$expect0Max_writeKB; fi + + rstdiff=1 #different by default + expect_writeKB=$expect1_writeKB #expected size + if (( $profile_writeKB >= $expect0Min_writeKB )) \ + && (( $profile_writeKB <= $expect0Max_writeKB )) \ + && (( $profile_writeKB == $expect1_writeKB )); then + rstdiff=0 + # not fall in expected range (min, max) + elif (( $profile_writeKB == $expect1_writeKB )); then + rstdiff=1; expect_writeKB=-1 + # in the range, but not as desired + else rstdiff=1; fi + + if (( $expect_writeKB == -1 )); then + printf "\n\tWrite-Size: expected>=%3s KB, profiled=%4s KB, " \ + $expect0Min_writeKB $profile_writeKB + printf "test [${RED}FAIL${NC}]" + else + printf "\n\tWrite-Size: expected=%4s KB, profiled=%4s KB, " \ + $expect_writeKB $profile_writeKB + if [[ $rstdiff == 0 ]]; then printf "test [${GREEN}PASS${NC}]" + else printf "test [${RED}FAIL${NC}]"; fi + fi + fi + + done +} + +Ns=`echo $Ns_M | cut -f1 -d'|'` #-- array sizes +M=`echo $Ns_M | cut -f2 -d'|' | cut -f1 -d'@'` #-- array accesses +S="" +if [[ $Ns_M == *@* ]]; then S=`echo $Ns_M | cut -f2 -d'@'`; fi #-- stride +#echo $S + +printf "\n\t=========================================================\n" +printf "\t================ Test [fetch/write size] ================\n" +printf "\t=========================================================" + +for N in $Ns +do + if [[ x$S == x ]]; then + m_stride=$N + for (( s=1; s<=$m_stride/32; s*=2 )) + do + one_run $N $s $M $cache + done + else + one_run $N $S $M $cache + fi +done +printf "\n" diff --git a/test/tool/metrics.xml b/test/tool/metrics.xml index c679b91e7b..e2efdbcadf 100644 --- a/test/tool/metrics.xml +++ b/test/tool/metrics.xml @@ -32,6 +32,7 @@ + @@ -137,6 +138,13 @@ expr=100*SQ_INST_CYCLES_SALU*4/SIMD_NUM/GRBM_GUI_ACTIVE > + # Mem32Bwrites The number of effective 32B write transactions into memory + + # FetchSize The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account.