Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-03-17 11:22:40

0001 #!/bin/bash
0002 
0003 ###############################################################################
0004 ##                                    README!                                ##
0005 ##                                                                           ##
0006 ## Stress test script to run on phiN, testing different thread/MEIF combos   ##
0007 ## with different instruction set architecture extensions, using default     ## 
0008 ## settings of benchmarking scripts for clone engine track finding + CMSSW   ##
0009 ## n2-seeding, input sample ttbar PU70.                                      ##
0010 ##                                                                           ##
0011 ## Can vary thread/MEIF combos, input file, seeds, building algo by editting ##
0012 ## this script manually.                                                     ##
0013 ##                                                                           ##
0014 ## Command line inputs are which platform to stress (ben_arch), enable       ##
0015 ## TurboBoost OFF/ON (no_turbo), the min time per test (min_duration), the   ##   
0016 ## time between each test (sleep_time), and the number of events to process  ## 
0017 ## per physical core (base_events).                                          ##
0018 ##                                                                           ##
0019 ## N.B.: base_events MUST be a number divisible by 4! This is because the    ##
0020 ## max physical cores on KNL is 64, but the highest nTH/nJOB test is 256.    ##       
0021 ##                                                                           ##
0022 ## Output file lists stress test time per event processed per physical core. ##
0023 ###############################################################################
0024 
0025 ########################
0026 ## Source Environment ##
0027 ########################
0028 
0029 source xeon_scripts/init-env.sh
0030 source xeon_scripts/stress-test-common.sh
0031 
0032 ###################
0033 ## Configuration ##
0034 ###################
0035 
0036 ## Command line inputs
0037 ben_arch=${1} # SNB (phi1), KNL (phi2), SKL-SP (phi3)
0038 no_turbo=${2:-1} # Turbo OFF or ON --> default is OFF!
0039 min_duration=${3:-1800} # min time spent for each test [s]
0040 sleep_time=${4:-300} # sleep time between tests [s]
0041 base_nevents=${5:-120} # number of events to process per physical core, must be divisible by 4
0042 
0043 ## platform specific settings
0044 if [[ "${ben_arch}" == "SNB" ]]
0045 then
0046     mOpt="-j 12"
0047     maxcore=12
0048     declare -a instruction_sets=(SSE3 AVX)
0049     declare -a thread_combo_arr=("1 1" "6 6" "12 6" "12 12" "24 6" "24 12" "24 24")
0050     declare -a njob_arr=("12" "24")
0051 elif [[ "${ben_arch}" == "KNL" ]]
0052 then
0053     mOpt="-j 64"
0054     maxcore=64
0055     declare -a instruction_sets=(SSE3 AVX AVX2 AVX512)
0056     declare -a thread_combo_arr=("1 1" "32 32" "64 32" "64 64" "128 32" "128 64" "128 128" "256 32" "256 64" "256 128" "256 256")
0057     declare -a njob_arr=("32" "64" "128" "256")
0058 elif [[ "${ben_arch}" == "SKL-SP" ]]
0059 then
0060     mOpt="-j 32"
0061     maxcore=32
0062     declare -a instruction_sets=(SSE3 AVX AVX2 AVX512)
0063     declare -a thread_combo_arr=("1 1" "16 16" "32 16" "32 32" "48 16" "48 32" "64 16" "64 32" "64 64")
0064     declare -a njob_arr=("32" "64")
0065 else 
0066     echo "${ben_arch} is not a valid architecture! Exiting..."
0067     exit
0068 fi
0069 
0070 ## Common file setup
0071 dir=/data2/slava77/samples/
0072 subdir=2021/11834.0_TTbar_14TeV+2021/AVE_50_BX01_25ns/
0073 file=memoryFile.fv6.default.211008-c6b7c67.bin
0074 
0075 ## Common mkFit options
0076 seeds="--cmssw-n2seeds"
0077 algo="--build-ce"
0078 opts="--silent --remove-dup --use-dead-modules --backward-fit"
0079 base_exe="./mkFit/mkFit --input-file ${dir}/${subdir}/${file} ${seeds} ${algo} ${opts}"
0080 
0081 ## Output options
0082 base_outname="stress_test"
0083 output_file="${base_outname}_results.${ext}"
0084 
0085 ## Set TurboBoost option
0086 echo "${no_turbo}" | PATH=/bin sudo /usr/bin/tee /sys/devices/system/cpu/intel_pstate/no_turbo > /dev/null 2>&1  
0087 
0088 ###############
0089 ## Run tests ##
0090 ###############
0091 
0092 ## loop instruction sets (i.e. build minimally)
0093 for instruction_set in "${instruction_sets[@]}"
0094 do
0095     ## compile once, using settings for the given instruction set
0096     make distclean
0097     make ${mOpt} ${!instruction_set}
0098     
0099     ## run thread combo tests (nThreads, nEventsInFlight)
0100     for thread_combo in "${thread_combo_arr[@]}"
0101     do echo "${thread_combo}" | while read -r nth nev
0102         do
0103             ## compute total number of events to process
0104             ncore=$( GetNCore "${nth}" "${maxcore}" ) 
0105             nproc=$(( ${base_nevents} * ${ncore} ))
0106 
0107             ## print out which test is being performed
0108             test_label="${instruction_set}_${nth_label}${nth}_${nev_label}${nev}"
0109             echo "Running stress test for: ${test_label}..."
0110 
0111             ## test executable
0112             test_exe="${base_exe} --num-thr ${nth} --num-thr-ev ${nev}"
0113 
0114             ## output file
0115             tmp_output_file="${base_outname}_${test_label}.${tmp_ext}"
0116             
0117             ## execute test and pipe time to output file: https://stackoverflow.com/a/2409214
0118             { time MkFitLoop "${min_duration}" "${test_exe}" "${nproc}" "1" > /dev/null 2>&1 ; } 2> "${tmp_output_file}"
0119 
0120             ## pause to let machine cool down between each test
0121             sleep "${sleep_time}"
0122 
0123             ## add other info about test to tmp file
0124             AppendTmpFile "${tmp_output_file}" "${ncore}" "${nproc}" "${nloop}"
0125         done # end loop over reading thread combo
0126     done # end loop over thread combos
0127 
0128     ## run special test of N jobs, single thread each
0129     for njob in "${njob_arr[@]}"
0130     do
0131         ## compute total number of events to process
0132         ncore=$( GetNCore "${njob}" "${maxcore}" ) 
0133         nproc=$(( ${base_nevents} * ${ncore} ))
0134 
0135         ## print out which test is being performed
0136         test_label="${instruction_set}_${njob_label}${njob}"
0137         echo "Running stress test for: ${test_label}..."
0138 
0139         ## test executable
0140         test_exe="${base_exe} --num-thr 1 --num-thr-ev 1"
0141 
0142         ## output file
0143         tmp_output_file="${base_outname}_${test_label}.${tmp_ext}"
0144             
0145         ## execute test and pipe time to output file: https://stackoverflow.com/a/2409214
0146         { time MkFitLoop "${min_duration}" "${test_exe}" "${nproc}" "${njob}" > /dev/null 2>&1 ; } 2> "${tmp_output_file}"
0147 
0148         ## add other info about test to tmp file
0149         AppendTmpFile "${tmp_output_file}" "${ncore}" "${nproc}" "${nloop}"
0150 
0151         ## pause to let machine cool down between each test
0152         sleep "${sleep_time}"
0153     done # end loop over njob for single thread
0154 
0155 done # end loop over instruction set
0156 
0157 #######################
0158 ## Make Final Output ##
0159 #######################
0160 
0161 ## init output file
0162 > "${output_file}"
0163 echo -e "Stress test meta-data\n" >> "${output_file}"
0164 echo "ben_arch: ${ben_arch}" >> "${output_file}"
0165 echo "no_turbo: ${no_turbo}" >> "${output_file}"
0166 echo "min_duration [s]: ${min_duration}" >> "${output_file}"
0167 echo "sleep_time [s]: ${sleep_time}" >> "${output_file}"
0168 echo "base_exe: ${base_exe}" >> "${output_file}"
0169 echo "base_nevents: ${base_nevents}" >> "${output_file}"
0170 echo -e "\nResults\n" >> "${output_file}"
0171 
0172 ## loop over all output files, and append results to single file
0173 for instruction_set in "${instruction_sets[@]}"
0174 do
0175     ## loop over nThread/MEIF tests, and append to single file
0176     for thread_combo in "${thread_combo_arr[@]}"
0177     do echo "${thread_combo}" | while read -r nth nev
0178         do
0179             ## get test label, print it
0180             test_label="${instruction_set}_${nth_label}${nth}_${nev_label}${nev}"
0181             echo "Computing time for: ${test_label}"
0182             
0183             ## get tmp output file name
0184             tmp_output_file="${base_outname}_${test_label}.${tmp_ext}"
0185             
0186             ## dump into output file
0187             DumpIntoFile "${tmp_output_file}" "${output_file}"
0188         done # end loop over reading thread combo
0189     done # end loop over thread combos
0190 
0191     ## loop over single thread njob tests, and append to single file
0192     for njob in "${njob_arr[@]}"
0193     do
0194         ## get test label, print it
0195         test_label="${instruction_set}_${njob_label}${njob}"
0196         echo "Computing time for: ${test_label}"
0197         
0198         ## get tmp output file name
0199         tmp_output_file="${base_outname}_${test_label}.${tmp_ext}"
0200         
0201         ## dump into output file
0202         DumpIntoFile "${tmp_output_file}" "${output_file}"
0203     done # end loop over njob array
0204 
0205 done # end loop over instruction set
0206 
0207 #########################################
0208 ## Clean up and Restore Default Status ##
0209 #########################################
0210 
0211 make distclean
0212 echo 1 | PATH=/bin sudo /usr/bin/tee /sys/devices/system/cpu/intel_pstate/no_turbo > /dev/null 2>&1
0213 
0214 ###################
0215 ## Final Message ##
0216 ###################
0217 
0218 echo "Finished stress test!"