12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182 |
- #!/bin/bash
- # ./chkexpjlt.sh arch expid sesth expvol >& expid.log &
- #set -xuve
- arch=$1
- expid=$2
- sesth=$3 # sim (job) estimated computing time (in hours)
- expvol=$4
- src=/esnas/autosubmit/$expid/tmp
- case $arch in
- mn-*) rsrc=/gpfs/scratch/*/*; rptr="\.err" ;;
- ithaca) rsrc=/scratch/cfu/*; rptr="\.e" ;;
- ecmwf) rsrc=c2a:/scratch/ms; rptr="\.err" ;;
- ht-*) rsrc=/work/pr1u1011/pr1u1011/* ; rptr="\.e" ;;
- lindgren) rsrc=/cfs/klemming/scratch/*/* ;;
- jaguar) rsrc= ; rptr="" ;;
- ar-*) rsrc=/work/pr1u1011/pr1u1011/* ; rptr="\.e" ;;
- *) echo "!!! $arch is not available !!!"; exit 1 ;;
- esac
- rlst=/tmp/$expid.chkexpjlt.$$
- case $arch in
- ecmwf)
- hpcproj=$(grep -w HPCPROJ /esnas/autosubmit/$expid/conf/expdef_${expid}.conf | cut -d '=' -f2 |sed 's/ //g')
- hpcuser=$(grep -w HPCUSER /esnas/autosubmit/$expid/conf/expdef_${expid}.conf | cut -d '=' -f2 | sed 's/ //g')
- ecaccess-file-dir $rsrc/$hpcproj/$hpcuser/$expid/LOG_$expid > $rlst
- ;;
- *)
- ssh $arch ls -1 $rsrc/$expid/LOG_$expid > $rlst
- ;;
- esac
- #compute job life time (jlt) for sim jobs
- echo "JNo. JLT(Hours)* JET(Hours) JFailed JName Chunk_start_date Chunk_end_date"
- typ=sim
- cnt=0
- sdates=$(ls -1 $src/${expid}_*_${typ}_COMPLETED | cut -d '_' -f 2 | uniq)
- started=$(ls -rt $src/*.cmd | head -n 1 | xargs stat -c %Y)
- for s in $sdates; do
- mems=$(ls -1 $src/${expid}_${s}_*_${typ}_COMPLETED | cut -d '_' -f 3 | uniq)
- for m in $mems; do
- cnt2=0
- chunks=$(ls -1rt $src/${expid}_${s}_${m}_*_${typ}_COMPLETED | cut -d '_' -f 4 | uniq)
- for c in $chunks; do
- cnt=$((cnt+1)); ncnt=$(printf "%04d" $cnt)
- cnt2=$((cnt2+1))
- job=${expid}_${s}_${m}_${c}_${typ}
- csd=$(grep 'Chunk_start_date=' $src/$job.cmd | cut -d '=' -f 2)
- ced=$(grep 'Chunk_end_date=' $src/$job.cmd | cut -d '=' -f 2)
- case $cnt2 in
- 1) job_1=${expid}_${s}_${m}_ini ;;
- #1) job_1=${expid}_${s}_${m}_1_init ;;
- *) job_1=$job_tmp ;;
- esac
- ts=$(ls -rt $src/${job_1}_COMPLETED | xargs stat -c %Y)
- te=$(ls -rt $src/${job}_COMPLETED | xargs stat -c %Y)
- jlt=$((te - ts))
- jlt=$(echo | awk "{print $jlt/3600}"); njlt=$(printf "%0.2f" $jlt) # job life time
- jwt=$(echo | awk "{print $jlt-$sesth}"); njwt=$(printf "%0.2f" $jwt) # job wasted time
- nfj=$(cat $rlst | grep ${job} | grep $rptr | wc -l); nfj=$((nfj-1)); nnfj=$(printf "%02d" $nfj) # no. of failed jobs
- echo $ncnt $njlt $njwt $nnfj $job $csd $ced
- job_tmp=$job
- done
- done
- done
- till=$(ls -rt $src/*_COMPLETED | tail -n 1 | xargs stat -c %Y)
- elt=$((till - started))
- elt=$(echo | awk "{print $elt/86400}"); nelt=$(printf "%0.2f" $elt)
- ajpd=$(echo | awk "{print $cnt/$nelt}"); najpd=$(printf "%0.2f" $ajpd)
- echo
- echo "$expid (optimum computing time per job ~${sesth}H)"
- echo "started: $(date -d @$started)"
- echo "...till: $(date -d @$till)"
- echo "$cnt jobs completed in ~${nelt} days which implies"
- echo "average ~$najpd jobs completed per single day."
- echo "Until: `date`"
- echo "no. of pending jobs is $(($expvol-$cnt)) out of $expvol."
- echo
- echo "* Job life time (JLT) since Autosubmit started the job first time"
- echo "* Job excess time (JET) including queue + machine + autosubmit + human intervention etc"
- #rm -f $rlst
|