pbarriat
/
ecearth3


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
							#!/bin/bash
#
# submit.sh
#
# Portable bash script to run LPJ-GUESS version:
# BINARY
# as a parallel job using SLURM on Aurora.
#
# Created automatically on DATE
# 
# Usage: 
#
#   1. Copy script to the directory where you want output written.
#      This will be called the RUN DIRECTORY.
#   2. In an editor, set appropriate values for the variables NPROCESS,
#      INSFILE, GRIDLIST and OUTFILES (NB: no space after the = sign):

NPROCESS=20            # NB: Should be multiple of 20 on Aurora!
WALLTIME=150:00:00
INSFILE=guess.ins
INPUT_MODULE=cru_ncep
GRIDLIST=gridlist.txt
OUTFILES='*.out'

#      Where:
#      NPROCESS     = number of processes in parallel job
#      WALLTIME     = maximum wall (real) time for job hh:mm:ss
#      INSFILE      = path to ins file from run directory
#      INPUT_MODULE = input module to use
#      GRIDLIST     = path to gridlist file from run directory
#      OUTFILES     = list of LPJ-GUESS output files in single quotes,
#                     and separated by spaces (filenames only, including
#                     extension, no directory.) Shell wildcards are allowed.
#
#   3. Run the script using the command:
#        ./submit.sh
#      or:
#        ./submit.sh [-n <name>] [-s <file>] [-i <ins-file>]
#
#      All arguments are optional and interpreted as:
#      name     = the name of the job (shown in PBS queue)
#      file     = filename of a file which can override the variables
#                 above
#      ins-file = instruction file to use, overrides the INSFILE
#                 variable above
#
# Nothing to change past here
########################################################################

# Exit if any command fails
set -e

# Handle the command line arguments
while getopts ":n:s:i:" opt; do
    case $opt in
	n ) name=$OPTARG ;;
	s ) submit_vars_file=$OPTARG ;;
	i ) ins=$OPTARG ;;
    esac
done

# Override the submit variables with the contents of a file, if given
if [ -n "$submit_vars_file" ]; then
    source $submit_vars_file
fi

# Override INSFILE with the ins-file parameter, if given
if [ -n "$ins" ]; then
    INSFILE=$ins
fi

# On Aurora, the recommendation is to submit jobs with the --exclusive
# option, so we get exclusive nodes. Since each node has 20 cores, we
# should set NPROCESS to a multiple of 20 to avoid waste.
# If you really want to, you could remove this check and the --exclusive
# option below, but your jobs might then be disturbed by other jobs 
# sharing your nodes.
CORES_PER_NODE=20
if [[ $((NPROCESS%CORES_PER_NODE)) != 0 ]]; then
    echo "Please set NPROCESS to a multiple of 20 on Aurora!" >&2
    exit 1
fi

# Convert INSFILE to an absolute path since we will be starting the
# guess instances from different directories.
# Please note when porting this script: readlink may not be available
# on non-Linux systems. Also, using absolute path names means the
# instruction file needs to be in a place accessible from the nodes.
INSFILE=$(readlink -f "$INSFILE")

GRIDLIST_FILENAME=$(basename $GRIDLIST)

# This function creates the gridlist files for each run by splitting
# the original gridlist file into approximately equal parts.
function split_gridlist {
    # Create empty gridlists first to make sure each run gets one
    for ((a=1; a <= NPROCESS ; a++)) 
    do
      echo > run$a/$GRIDLIST_FILENAME
    done

    # Figure out suitable number of lines per gridlist, get the number of
    # lines in original gridlist file, divide by NPROCESS and round up.
    local lines_per_run=$(wc -l $GRIDLIST | \
	awk '{ x = $1/'$NPROCESS'; d = (x == int(x)) ? x : int(x)+1; print d}')

    # Use the split command to split the files into temporary files
    split --suffix-length=4 --lines $lines_per_run $GRIDLIST tmpSPLITGRID_

    # Move the temporary files into the runX-directories
    local files=$(ls tmpSPLITGRID_*)
    local i=1
    for file in $files
    do
      mv $file run$i/$GRIDLIST_FILENAME
      i=$((i+1))
    done
}

# Create header of progress.sh script

echo "##############################################################" > progress.sh
echo "# PROGRESS.SH" >> progress.sh
echo "# Upload current guess.log files from local nodes and check" >> progress.sh
echo "# Usage: sh progress.sh" >> progress.sh
echo >> progress.sh

# Create a run subdirectory for each process and clean up

for ((a=1; a <= NPROCESS ; a++))
do
  mkdir -p run$a
  cd run$a ; rm -f guess.log ; rm -f $GRIDLIST_FILENAME ; cd ..
  echo "echo '********** Last few lines of ./run${a}/guess.log: **********'" >> progress.sh
  echo "tail ./run${a}/guess.log" >> progress.sh
done

split_gridlist

# Create SLURM script to request place in queue
cat <<EOF > guess.cmd
#!/bin/bash
#SBATCH -n $NPROCESS
#SBATCH --time=$WALLTIME
#SBATCH --exclusive
set -e

if ! type -P mpirun &> /dev/null; then
    echo "Didn't find mpirun! Make sure an MPI module is loaded in your" >&2
    echo "login script (~/.bashrc) and recompile LPJ-GUESS with MPI support!" >&2
    exit 1
fi

# If there's a script for setting up files on local disk, run it
if [ -f setup_local.sh ]; then
    srun -n \$SLURM_NNODES -N \$SLURM_NNODES setup_local.sh
fi

# In each run directory, create a symbolic link to the node local storage
for ((a=1; a <= $NPROCESS ; a++))
do
  cd run\$a
  if [ -h local ]; then
      rm local
  fi
  ln -s \$SNIC_TMP local
  cd ..
done

mpirun -bind-to core BINARY -parallel -input $INPUT_MODULE $INSFILE

EOF

cat <<EOF > append.cmd
#!/bin/bash
#SBATCH -n 1
#SBATCH --time=$WALLTIME
set -e

function append_files {
    local number_of_jobs=\$1
    local file=\$2

    cp run1/\$file \$file

    local i=""
    for ((i=2; i <= number_of_jobs; i++))
    do
      if [ -f run\$i/\$file ]; then
        cat run\$i/\$file | awk 'NR!=1 || NF==0 || \$1 == \$1+0 { print \$0 }' >> \$file
      fi
    done
}

pushd run1 &> /dev/null
outfiles_unexpanded='$OUTFILES'
outfiles_expanded=\$(echo \$outfiles_unexpanded)
popd &> /dev/null

for file in \$outfiles_expanded
do
  append_files $NPROCESS \$file
done
cat run*/guess.log > guess.log
EOF

# Submit guess job
append_dependency=$(sbatch -J ${name:-"guess"} guess.cmd | awk '{print $NF}')

# Submit append job
sbatch --dependency=afterok:$append_dependency -J ${name:-"guess"}"_append" append.cmd | awk '{print $NF}'