chkexpjlt.sh 3.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. #!/bin/bash
  2. # ./chkexpjlt.sh arch expid sesth expvol >& expid.log &
  3. #set -xuve
  4. arch=$1
  5. expid=$2
  6. sesth=$3 # sim (job) estimated computing time (in hours)
  7. expvol=$4
  8. src=/esnas/autosubmit/$expid/tmp
  9. case $arch in
  10. mn-*) rsrc=/gpfs/scratch/*/*; rptr="\.err" ;;
  11. ithaca) rsrc=/scratch/cfu/*; rptr="\.e" ;;
  12. ecmwf) rsrc=c2a:/scratch/ms; rptr="\.err" ;;
  13. ht-*) rsrc=/work/pr1u1011/pr1u1011/* ; rptr="\.e" ;;
  14. lindgren) rsrc=/cfs/klemming/scratch/*/* ;;
  15. jaguar) rsrc= ; rptr="" ;;
  16. ar-*) rsrc=/work/pr1u1011/pr1u1011/* ; rptr="\.e" ;;
  17. *) echo "!!! $arch is not available !!!"; exit 1 ;;
  18. esac
  19. rlst=/tmp/$expid.chkexpjlt.$$
  20. case $arch in
  21. ecmwf)
  22. hpcproj=$(grep -w HPCPROJ /esnas/autosubmit/$expid/conf/expdef_${expid}.conf | cut -d '=' -f2 |sed 's/ //g')
  23. hpcuser=$(grep -w HPCUSER /esnas/autosubmit/$expid/conf/expdef_${expid}.conf | cut -d '=' -f2 | sed 's/ //g')
  24. ecaccess-file-dir $rsrc/$hpcproj/$hpcuser/$expid/LOG_$expid > $rlst
  25. ;;
  26. *)
  27. ssh $arch ls -1 $rsrc/$expid/LOG_$expid > $rlst
  28. ;;
  29. esac
  30. #compute job life time (jlt) for sim jobs
  31. echo "JNo. JLT(Hours)* JET(Hours) JFailed JName Chunk_start_date Chunk_end_date"
  32. typ=sim
  33. cnt=0
  34. sdates=$(ls -1 $src/${expid}_*_${typ}_COMPLETED | cut -d '_' -f 2 | uniq)
  35. started=$(ls -rt $src/*.cmd | head -n 1 | xargs stat -c %Y)
  36. for s in $sdates; do
  37. mems=$(ls -1 $src/${expid}_${s}_*_${typ}_COMPLETED | cut -d '_' -f 3 | uniq)
  38. for m in $mems; do
  39. cnt2=0
  40. chunks=$(ls -1rt $src/${expid}_${s}_${m}_*_${typ}_COMPLETED | cut -d '_' -f 4 | uniq)
  41. for c in $chunks; do
  42. cnt=$((cnt+1)); ncnt=$(printf "%04d" $cnt)
  43. cnt2=$((cnt2+1))
  44. job=${expid}_${s}_${m}_${c}_${typ}
  45. csd=$(grep 'Chunk_start_date=' $src/$job.cmd | cut -d '=' -f 2)
  46. ced=$(grep 'Chunk_end_date=' $src/$job.cmd | cut -d '=' -f 2)
  47. case $cnt2 in
  48. 1) job_1=${expid}_${s}_${m}_ini ;;
  49. #1) job_1=${expid}_${s}_${m}_1_init ;;
  50. *) job_1=$job_tmp ;;
  51. esac
  52. ts=$(ls -rt $src/${job_1}_COMPLETED | xargs stat -c %Y)
  53. te=$(ls -rt $src/${job}_COMPLETED | xargs stat -c %Y)
  54. jlt=$((te - ts))
  55. jlt=$(echo | awk "{print $jlt/3600}"); njlt=$(printf "%0.2f" $jlt) # job life time
  56. jwt=$(echo | awk "{print $jlt-$sesth}"); njwt=$(printf "%0.2f" $jwt) # job wasted time
  57. nfj=$(cat $rlst | grep ${job} | grep $rptr | wc -l); nfj=$((nfj-1)); nnfj=$(printf "%02d" $nfj) # no. of failed jobs
  58. echo $ncnt $njlt $njwt $nnfj $job $csd $ced
  59. job_tmp=$job
  60. done
  61. done
  62. done
  63. till=$(ls -rt $src/*_COMPLETED | tail -n 1 | xargs stat -c %Y)
  64. elt=$((till - started))
  65. elt=$(echo | awk "{print $elt/86400}"); nelt=$(printf "%0.2f" $elt)
  66. ajpd=$(echo | awk "{print $cnt/$nelt}"); najpd=$(printf "%0.2f" $ajpd)
  67. echo
  68. echo "$expid (optimum computing time per job ~${sesth}H)"
  69. echo "started: $(date -d @$started)"
  70. echo "...till: $(date -d @$till)"
  71. echo "$cnt jobs completed in ~${nelt} days which implies"
  72. echo "average ~$najpd jobs completed per single day."
  73. echo "Until: `date`"
  74. echo "no. of pending jobs is $(($expvol-$cnt)) out of $expvol."
  75. echo
  76. echo "* Job life time (JLT) since Autosubmit started the job first time"
  77. echo "* Job excess time (JET) including queue + machine + autosubmit + human intervention etc"
  78. #rm -f $rlst