123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211 |
- #!/bin/bash
- #
- # submit.sh
- #
- # Portable bash script to run LPJ-GUESS version:
- # BINARY
- # as a parallel job using SLURM on Aurora.
- #
- # Created automatically on DATE
- #
- # Usage:
- #
- # 1. Copy script to the directory where you want output written.
- # This will be called the RUN DIRECTORY.
- # 2. In an editor, set appropriate values for the variables NPROCESS,
- # INSFILE, GRIDLIST and OUTFILES (NB: no space after the = sign):
- NPROCESS=20 # NB: Should be multiple of 20 on Aurora!
- WALLTIME=150:00:00
- INSFILE=guess.ins
- INPUT_MODULE=cru_ncep
- GRIDLIST=gridlist.txt
- OUTFILES='*.out'
- # Where:
- # NPROCESS = number of processes in parallel job
- # WALLTIME = maximum wall (real) time for job hh:mm:ss
- # INSFILE = path to ins file from run directory
- # INPUT_MODULE = input module to use
- # GRIDLIST = path to gridlist file from run directory
- # OUTFILES = list of LPJ-GUESS output files in single quotes,
- # and separated by spaces (filenames only, including
- # extension, no directory.) Shell wildcards are allowed.
- #
- # 3. Run the script using the command:
- # ./submit.sh
- # or:
- # ./submit.sh [-n <name>] [-s <file>] [-i <ins-file>]
- #
- # All arguments are optional and interpreted as:
- # name = the name of the job (shown in PBS queue)
- # file = filename of a file which can override the variables
- # above
- # ins-file = instruction file to use, overrides the INSFILE
- # variable above
- #
- # Nothing to change past here
- ########################################################################
- # Exit if any command fails
- set -e
- # Handle the command line arguments
- while getopts ":n:s:i:" opt; do
- case $opt in
- n ) name=$OPTARG ;;
- s ) submit_vars_file=$OPTARG ;;
- i ) ins=$OPTARG ;;
- esac
- done
- # Override the submit variables with the contents of a file, if given
- if [ -n "$submit_vars_file" ]; then
- source $submit_vars_file
- fi
- # Override INSFILE with the ins-file parameter, if given
- if [ -n "$ins" ]; then
- INSFILE=$ins
- fi
- # On Aurora, the recommendation is to submit jobs with the --exclusive
- # option, so we get exclusive nodes. Since each node has 20 cores, we
- # should set NPROCESS to a multiple of 20 to avoid waste.
- # If you really want to, you could remove this check and the --exclusive
- # option below, but your jobs might then be disturbed by other jobs
- # sharing your nodes.
- CORES_PER_NODE=20
- if [[ $((NPROCESS%CORES_PER_NODE)) != 0 ]]; then
- echo "Please set NPROCESS to a multiple of 20 on Aurora!" >&2
- exit 1
- fi
- # Convert INSFILE to an absolute path since we will be starting the
- # guess instances from different directories.
- # Please note when porting this script: readlink may not be available
- # on non-Linux systems. Also, using absolute path names means the
- # instruction file needs to be in a place accessible from the nodes.
- INSFILE=$(readlink -f "$INSFILE")
- GRIDLIST_FILENAME=$(basename $GRIDLIST)
- # This function creates the gridlist files for each run by splitting
- # the original gridlist file into approximately equal parts.
- function split_gridlist {
- # Create empty gridlists first to make sure each run gets one
- for ((a=1; a <= NPROCESS ; a++))
- do
- echo > run$a/$GRIDLIST_FILENAME
- done
- # Figure out suitable number of lines per gridlist, get the number of
- # lines in original gridlist file, divide by NPROCESS and round up.
- local lines_per_run=$(wc -l $GRIDLIST | \
- awk '{ x = $1/'$NPROCESS'; d = (x == int(x)) ? x : int(x)+1; print d}')
- # Use the split command to split the files into temporary files
- split --suffix-length=4 --lines $lines_per_run $GRIDLIST tmpSPLITGRID_
- # Move the temporary files into the runX-directories
- local files=$(ls tmpSPLITGRID_*)
- local i=1
- for file in $files
- do
- mv $file run$i/$GRIDLIST_FILENAME
- i=$((i+1))
- done
- }
- # Create header of progress.sh script
- echo "##############################################################" > progress.sh
- echo "# PROGRESS.SH" >> progress.sh
- echo "# Upload current guess.log files from local nodes and check" >> progress.sh
- echo "# Usage: sh progress.sh" >> progress.sh
- echo >> progress.sh
- # Create a run subdirectory for each process and clean up
- for ((a=1; a <= NPROCESS ; a++))
- do
- mkdir -p run$a
- cd run$a ; rm -f guess.log ; rm -f $GRIDLIST_FILENAME ; cd ..
- echo "echo '********** Last few lines of ./run${a}/guess.log: **********'" >> progress.sh
- echo "tail ./run${a}/guess.log" >> progress.sh
- done
- split_gridlist
- # Create SLURM script to request place in queue
- cat <<EOF > guess.cmd
- #!/bin/bash
- #SBATCH -n $NPROCESS
- #SBATCH --time=$WALLTIME
- #SBATCH --exclusive
- set -e
- if ! type -P mpirun &> /dev/null; then
- echo "Didn't find mpirun! Make sure an MPI module is loaded in your" >&2
- echo "login script (~/.bashrc) and recompile LPJ-GUESS with MPI support!" >&2
- exit 1
- fi
- # If there's a script for setting up files on local disk, run it
- if [ -f setup_local.sh ]; then
- srun -n \$SLURM_NNODES -N \$SLURM_NNODES setup_local.sh
- fi
- # In each run directory, create a symbolic link to the node local storage
- for ((a=1; a <= $NPROCESS ; a++))
- do
- cd run\$a
- if [ -h local ]; then
- rm local
- fi
- ln -s \$SNIC_TMP local
- cd ..
- done
- mpirun -bind-to core BINARY -parallel -input $INPUT_MODULE $INSFILE
- EOF
- cat <<EOF > append.cmd
- #!/bin/bash
- #SBATCH -n 1
- #SBATCH --time=$WALLTIME
- set -e
- function append_files {
- local number_of_jobs=\$1
- local file=\$2
- cp run1/\$file \$file
- local i=""
- for ((i=2; i <= number_of_jobs; i++))
- do
- if [ -f run\$i/\$file ]; then
- cat run\$i/\$file | awk 'NR!=1 || NF==0 || \$1 == \$1+0 { print \$0 }' >> \$file
- fi
- done
- }
- pushd run1 &> /dev/null
- outfiles_unexpanded='$OUTFILES'
- outfiles_expanded=\$(echo \$outfiles_unexpanded)
- popd &> /dev/null
- for file in \$outfiles_expanded
- do
- append_files $NPROCESS \$file
- done
- cat run*/guess.log > guess.log
- EOF
- # Submit guess job
- append_dependency=$(sbatch -J ${name:-"guess"} guess.cmd | awk '{print $NF}')
- # Submit append job
- sbatch --dependency=afterok:$append_dependency -J ${name:-"guess"}"_append" append.cmd | awk '{print $NF}'
|