multicore.tmpl 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. #!/bin/bash
  2. #
  3. # submit.sh
  4. #
  5. # Portable bash script to run LPJ-GUESS version:
  6. # BINARY
  7. # as a parallel job on a multicore system.
  8. #
  9. # Created automatically on DATE
  10. #
  11. # Usage:
  12. #
  13. # 1. Copy script to the directory where you want output written.
  14. # This will be called the RUN DIRECTORY.
  15. # 2. In an editor, set appropriate values for the variables NPROCESS,
  16. # INSFILE, GRIDLIST and OUTFILES (NB: no space after the = sign):
  17. NPROCESS=15
  18. INSFILE=guess.ins
  19. INPUT_MODULE=cru_ncep
  20. GRIDLIST=gridlist.txt
  21. OUTFILES='*.out'
  22. # Where:
  23. # NPROCESS = number of processes in parallel job
  24. # INSFILE = path to ins file from run directory
  25. # INPUT_MODULE = input module to use
  26. # GRIDLIST = path to gridlist file from run directory
  27. # OUTFILES = list of LPJ-GUESS output files in single quotes,
  28. # and separated by spaces (filenames only, including
  29. # extension, no directory.) Shell wildcards are allowed.
  30. #
  31. # 3. Run the script using the command:
  32. # ./submit.sh
  33. # or:
  34. # ./submit.sh [-s <file>] [-i <ins-file>]
  35. #
  36. # All arguments are optional and interpreted as:
  37. # file = filename of a file which can override the variables
  38. # above
  39. # ins-file = instruction file to use, overrides the INSFILE
  40. # variable above
  41. #
  42. # Nothing to change past here
  43. ########################################################################
  44. # Handle the command line arguments
  45. while getopts ":s:i:" opt; do
  46. case $opt in
  47. s ) submit_vars_file=$OPTARG ;;
  48. i ) ins=$OPTARG ;;
  49. esac
  50. done
  51. # Override the submit variables with the contents of a file, if given
  52. if [ -n "$submit_vars_file" ]; then
  53. source $submit_vars_file
  54. fi
  55. # Override INSFILE with the ins-file parameter, if given
  56. if [ -n "$ins" ]; then
  57. INSFILE=$ins
  58. fi
  59. # Convert INSFILE to an absolute path since we will be starting the
  60. # guess instances from different directories.
  61. # Please note when porting this script: readlink may not be available
  62. # on non-Linux systems. Also, using absolute path names means the
  63. # instruction file needs to be in a place accessible from the nodes.
  64. INSFILE=$(readlink -f "$INSFILE")
  65. GRIDLIST_FILENAME=$(basename $GRIDLIST)
  66. # This function creates the gridlist files for each run by splitting
  67. # the original gridlist file into approximately equal parts.
  68. function split_gridlist {
  69. # Create empty gridlists first to make sure each run gets one
  70. for ((a=1; a <= NPROCESS ; a++))
  71. do
  72. echo > run$a/$GRIDLIST_FILENAME
  73. done
  74. # Figure out suitable number of lines per gridlist, get the number of
  75. # lines in original gridlist file, divide by NPROCESS and round up.
  76. local lines_per_run=$(wc -l $GRIDLIST | \
  77. awk '{ x = $1/'$NPROCESS'; d = (x == int(x)) ? x : int(x)+1; print d}')
  78. # Use the split command to split the files into temporary files
  79. split --suffix-length=4 --lines $lines_per_run $GRIDLIST tmpSPLITGRID_
  80. # Move the temporary files into the runX-directories
  81. local files=$(ls tmpSPLITGRID_*)
  82. local i=1
  83. for file in $files
  84. do
  85. mv $file run$i/$GRIDLIST_FILENAME
  86. i=$((i+1))
  87. done
  88. }
  89. # Create header of progress.sh script
  90. echo "##############################################################" > progress.sh
  91. echo "# PROGRESS.SH" >> progress.sh
  92. echo "# Upload current guess.log files from local nodes and check" >> progress.sh
  93. echo "# Usage: sh progress.sh" >> progress.sh
  94. echo >> progress.sh
  95. # Create a run subdirectory for each process and clean up
  96. for ((a=1; a <= NPROCESS ; a++))
  97. do
  98. mkdir -p run$a
  99. cd run$a ; rm -f guess.log ; rm -f $GRIDLIST_FILENAME ; cd ..
  100. echo "echo '********** Last few lines of ./run${a}/guess.log: **********'" >> progress.sh
  101. echo "tail ./run${a}/guess.log" >> progress.sh
  102. done
  103. split_gridlist
  104. # The failed.txt file lists the processes that didn't finish properly.
  105. # If it exists, remove it before running again.
  106. rm -f failed.txt
  107. # Start all processes in the background
  108. for ((a=1; a <= NPROCESS ; a++))
  109. do
  110. echo "Starting process $a..."
  111. cd run$a
  112. ( if ! BINARY -input $INPUT_MODULE $INSFILE 1>> ../stdout.txt 2>> ../stderr.txt ; then
  113. echo -n $a" " >> ../failed.txt
  114. fi ) &
  115. cd ..
  116. done
  117. # Install a clean up trap to handle Ctrl-C from the user.
  118. # kill 0 kills all sub processes (so all GUESS instances).
  119. trap 'kill 0; exit 1' INT
  120. # Wait for them to finish
  121. echo "Done!"
  122. echo
  123. echo "Check progress with the progress script, either in a different shell, or"
  124. echo "by placing this job in the background."
  125. echo
  126. echo "Waiting for all processes to finish..."
  127. wait
  128. echo "Done!"
  129. echo
  130. # Don't append if one of the sub processes failed
  131. if [ -f failed.txt ]; then
  132. echo "The following processes failed: " $(cat failed.txt)
  133. echo "Check the log files for these processes for more information."
  134. exit 1
  135. fi
  136. # Append the results
  137. function append_files {
  138. local number_of_jobs=$1
  139. local file=$2
  140. cp run1/$file $file
  141. local i=""
  142. for ((i=2; i <= number_of_jobs; i++))
  143. do
  144. if [ -f run$i/$file ]; then
  145. cat run$i/$file | awk 'NR!=1 || NF==0 || $1 == $1+0 { print $0 }' >> $file
  146. fi
  147. done
  148. }
  149. pushd run1 &> /dev/null
  150. outfiles_unexpanded=$OUTFILES
  151. outfiles_expanded=$(echo $outfiles_unexpanded)
  152. popd &> /dev/null
  153. echo "Appending results..."
  154. for file in $outfiles_expanded
  155. do
  156. append_files $NPROCESS $file
  157. done
  158. echo "Done!"