#!/bin/bash # ./chkexpjlt.sh arch expid sesth expvol >& expid.log & #set -xuve arch=$1 expid=$2 sesth=$3 # sim (job) estimated computing time (in hours) expvol=$4 src=/esnas/autosubmit/$expid/tmp case $arch in mn-*) rsrc=/gpfs/scratch/*/*; rptr="\.err" ;; ithaca) rsrc=/scratch/cfu/*; rptr="\.e" ;; ecmwf) rsrc=c2a:/scratch/ms; rptr="\.err" ;; ht-*) rsrc=/work/pr1u1011/pr1u1011/* ; rptr="\.e" ;; lindgren) rsrc=/cfs/klemming/scratch/*/* ;; jaguar) rsrc= ; rptr="" ;; ar-*) rsrc=/work/pr1u1011/pr1u1011/* ; rptr="\.e" ;; *) echo "!!! $arch is not available !!!"; exit 1 ;; esac rlst=/tmp/$expid.chkexpjlt.$$ case $arch in ecmwf) hpcproj=$(grep -w HPCPROJ /esnas/autosubmit/$expid/conf/expdef_${expid}.conf | cut -d '=' -f2 |sed 's/ //g') hpcuser=$(grep -w HPCUSER /esnas/autosubmit/$expid/conf/expdef_${expid}.conf | cut -d '=' -f2 | sed 's/ //g') ecaccess-file-dir $rsrc/$hpcproj/$hpcuser/$expid/LOG_$expid > $rlst ;; *) ssh $arch ls -1 $rsrc/$expid/LOG_$expid > $rlst ;; esac #compute job life time (jlt) for sim jobs echo "JNo. JLT(Hours)* JET(Hours) JFailed JName Chunk_start_date Chunk_end_date" typ=sim cnt=0 sdates=$(ls -1 $src/${expid}_*_${typ}_COMPLETED | cut -d '_' -f 2 | uniq) started=$(ls -rt $src/*.cmd | head -n 1 | xargs stat -c %Y) for s in $sdates; do mems=$(ls -1 $src/${expid}_${s}_*_${typ}_COMPLETED | cut -d '_' -f 3 | uniq) for m in $mems; do cnt2=0 chunks=$(ls -1rt $src/${expid}_${s}_${m}_*_${typ}_COMPLETED | cut -d '_' -f 4 | uniq) for c in $chunks; do cnt=$((cnt+1)); ncnt=$(printf "%04d" $cnt) cnt2=$((cnt2+1)) job=${expid}_${s}_${m}_${c}_${typ} csd=$(grep 'Chunk_start_date=' $src/$job.cmd | cut -d '=' -f 2) ced=$(grep 'Chunk_end_date=' $src/$job.cmd | cut -d '=' -f 2) case $cnt2 in 1) job_1=${expid}_${s}_${m}_ini ;; #1) job_1=${expid}_${s}_${m}_1_init ;; *) job_1=$job_tmp ;; esac ts=$(ls -rt $src/${job_1}_COMPLETED | xargs stat -c %Y) te=$(ls -rt $src/${job}_COMPLETED | xargs stat -c %Y) jlt=$((te - ts)) jlt=$(echo | awk "{print $jlt/3600}"); njlt=$(printf "%0.2f" $jlt) # job life time jwt=$(echo | awk "{print $jlt-$sesth}"); njwt=$(printf "%0.2f" $jwt) # job wasted time nfj=$(cat $rlst | grep ${job} | grep $rptr | wc -l); nfj=$((nfj-1)); nnfj=$(printf "%02d" $nfj) # no. of failed jobs echo $ncnt $njlt $njwt $nnfj $job $csd $ced job_tmp=$job done done done till=$(ls -rt $src/*_COMPLETED | tail -n 1 | xargs stat -c %Y) elt=$((till - started)) elt=$(echo | awk "{print $elt/86400}"); nelt=$(printf "%0.2f" $elt) ajpd=$(echo | awk "{print $cnt/$nelt}"); najpd=$(printf "%0.2f" $ajpd) echo echo "$expid (optimum computing time per job ~${sesth}H)" echo "started: $(date -d @$started)" echo "...till: $(date -d @$till)" echo "$cnt jobs completed in ~${nelt} days which implies" echo "average ~$najpd jobs completed per single day." echo "Until: `date`" echo "no. of pending jobs is $(($expvol-$cnt)) out of $expvol." echo echo "* Job life time (JLT) since Autosubmit started the job first time" echo "* Job excess time (JET) including queue + machine + autosubmit + human intervention etc" #rm -f $rlst