123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337 |
- #! /bin/bash
- #
- # Coral - collaborative job submission script manager for NEMO
- #
- output_script="run.sh"
- CORAL_HOME="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" ; export CORAL_HOME
- HOSTNAME="$(hostname -s)" ; export HOSTNAME
- FQDN=$(hostname) ; export FQDN
- USER_EMAIL=$(getent passwd "${USER}" | egrep -o '[a-zA-Z0-9_-.]*@[]*\.[a-z]+') ; export USER_EMAIL
- MAINTAINER_EMAIL="damien.francois@uclouvain.be" ; export MAINTAINER_EMAIL
- source "${CORAL_HOME}"/utils/tools.inc
- init() {
- doc "$@" <<EOT
- init [<template name>] - Initialize current directory with files from template <template name>
- Templates contain configuration files, namelists, and xios configurations.
- By default, the template name is the machine short name (as given by hostname -s)
- EOT
- local template_name=${1:-${HOSTNAME}}
- local template_path=${CORAL_HOME}/templates/${template_name}
- [[ ! -d ${template_path} ]] && die 1 "Error: Template '${template_name}' not found."
- [[ -f experiment.cfg ]] && die 2 "Error: Directory not empty. Remove all *.cfg files."
- [[ -d namelists ]] && die 2 "Error: Directory not empty. Remove directory namelists."
- [[ -d xios_config ]] && die 2 "Error: Directory not empty. Remove directory xios_config."
- [[ -f ${output_script} ]] && die 2 "Error: Directory not empty. Remove ${output_script}"
- echo -e "Importing '${template_name}' template in the current directory..."
- < "${template_path}"/../files.txt column -t -s\|
- cp "${template_path}"/*cfg .
- cp -r "${template_path}"/xios_config .
- cp -r "${template_path}"/namelists .
- echo "${template_name}" > .coral_template
- echo -e "Done. Modify them to your needs and then run 'coral build' to build and"
- echo -e "review the submission script and 'coral submit' to submit the job."
- }
- build() {
- doc "$@" <<EOT
- build - Build submission script from the configuraiton files in the current directory.
- EOT
- [[ -f experiment.cfg ]] || die 1 "Error: configuration files not found. Run 'coral init' first"
- echo -e "Generating job submission script '${output_script}'..."
- echo "#!/bin/bash" >${output_script} ; (
- set -e
- source experiment.cfg
- export exp_name
- export nem_config_name
- source programs.cfg
- export nem_numproc
- export xio_numproc
- export total_nb_cpu=$((nem_numproc + xio_numproc))
- export email=$USER_EMAIL
- submit_command=$(grep -q SBATCH job.cfg && echo sbatch || echo qsub) ; export submit_command
- echo -e "\n#\n# Job options\n#"
- <job.cfg egrep -v "(^\#|^$)" | sed "s/SBATCH/#SBATCH/" | sed "s/PBS -/#PBS -/" | envsubst \$exp_name,\$email,\$total_nb_cpu
- echo -e "\n#\n# Experiment options\n#"
- <experiment.cfg egrep -v "(^\#|^$)"
- echo -e "\n#\n# Data configuration\n#"
- <data.cfg cat
- echo -e "\n#\n# Program configuration\n#"
- <programs.cfg egrep -v "(^\#|^$)"
- echo -e "\n#\n# Script logic\n#"
- <"${CORAL_HOME}"/templates/scripts/skeleton.sh egrep -v '^[[:space:]]*\#' | envsubst \$submit_command
- ) >>${output_script} 2>/dev/null \
- || die 3 "Error: Syntax error in configuration files. Please review them."
- echo -e "Done. Run 'coral submit' to submit the job, or submit it manually."
- }
- submit() {
- doc "$@" <<EOT
- submit [<options>] - Submit the job based on the submission script present in the current directory.
- options can include:
- * local: run the script locally rather than submitting it to the job scheduler
- * preponly: run only the part of the script that prepares the run (copies files, links data, etc.)
- * noresubmit: run only one job and prevent submission of continuation jobs.
- * fromscratch: restart computations from scratch ignoring checkpoint files. Use with care.
- * verbose: show everything during submission
- EOT
- [[ -f ${output_script} ]] || die 1 "Error: submission script not found. Run 'coral build' first"
- echo Making script executable and submitting it...
- chmod +x ${output_script}
- (
- set -e
- source experiment.cfg
- eval "$(grep run_dir job.cfg)"
- [[ "$@" == *fromscratch* ]] && rm -rf "${run_dir:?}"
- [[ "$@" == *local* ]] && { ./"${output_script}" "$@" ; exit 0; }
- which sbatch &> /dev/null && {
- jobid="$(sbatch "${output_script}" "$@")"
- sleep 2
- jobid=${jobid##* }
- mkdir -p "${run_dir:?}"
- echo "${jobid}" >> "${run_dir}"/.coral_jobs ;
- #squeue --start -j "${jobid}" # FIXME
- squeue -j "${jobid}"
- exit 0;
- }
- which qsub &> /dev/null && {
- if [[ $USER == "vsc"* ]]; then
- credits=$@
- if [ -z "$credits" ]; then
- credits="laerocloud"
- fi
- mkdir -p tmp
- echo ${output_script} > tmp/script_name
- echo ${PWD} > tmp/address
- qsub -A $credits "${output_script}" | tee -a coral_jobs;
- sleep 2
- jobid=`cat coral_jobs`
- rm -f coral_jobs
- jobid=${jobid%%.*}
- mkdir -p "${run_dir:?}"
- echo "${jobid}" >> "${run_dir}"/.coral_jobs ;
- qstat
- else
- qsub -v PBS_OPTIONS="$@" "${output_script}" | tee -a coral_jobs;
- sleep 2
- jobid=`cat coral_jobs`
- rm -f coral_jobs
- jobid=${jobid%.*}
- mkdir -p "${run_dir:?}"
- echo "${jobid}" >> "${run_dir}"/.coral_jobs ;
- qstat -J "${jobid}"
- fi
- exit 0;
- }
- )
- echo "Done."
- }
- save() {
- doc "$@" <<EOT
- save [<name>] - Save configuration files in the current template or create a new template if <name> is given.
- The new template can consequently be used with 'coral init <name>' in another directory'
- EOT
- local target
- local current_template
- local template
- local target
- current_template=$(<.coral_template)
- template=${1-$current_template}
- target=$CORAL_HOME/templates/$template
- [[ -z ${template} ]] && die 1 "Error: No template name defined."
- [[ -f experiment.cfg ]] || die 1 "Error: No configuration files found."
- [[ -d namelists ]] || die 1 "Error: namelist directory not found."
- [[ -d xios_config ]] || die 1 "Error: xios_config directory not found."
- echo "Saving template ${1-$current_template} to ${target}..."
- mkdir -p "${target}"
- rsync -q -va -- *.cfg namelists xios_config "${target}"
- echo "${template}" > .coral_template
- echo Done.
- }
- share() {
- doc "$@" <<EOT
- share [<name>] - Make template files for template '<name>' world readable and display
- instructions on how to import the template.'
- EOT
- local curr
- local target
- local current_template
- [[ -f .coral_template ]] || die 1 "No coral template found in current directory."
- current_template=$(<.coral_template)
- template=${1-$current_template}
- target=$CORAL_HOME/templates/${template}
- [[ -d ${target} ]] || save "${template}"
- echo "Making sure the template is world readable..."
- chmod o+rx "${target}"
- export curr=${target}
- while curr="$(dirname "${curr}")" ; do
- chmod o+x "${curr}"
- [[ "${curr}" == "$HOME" || "${curr}" == / ]] && break ;
- done
- echo -e "Done. Other users can now use template '${template}' by issuing \n coral import $target"
- }
- import() {
- doc "$@" <<EOT
- import <path> - Import template located at <path> into the local coral installation.
- EOT
- [[ -n $1 ]] || die 1 "Usage: $(basename "$0") import <path>"
- local template_name
- template_name=$(basename "$1")
- echo "Importing template in $1..."
- rsync -q -va "$1" "$CORAL_HOME"/templates
- echo "Done. You can test it with 'coral init ${template_name}'"
- }
- publish() {
- doc "$@" <<EOT
- publish [<template>] - Submits template to the central repository manager for inclusion
- in the main coral repository.
- EOT
- [[ -f .coral_template ]] || die 1 "Error: No template name defined."
- local current_template
- current_template=$(<.coral_template)
- export template=${1-$current_template}
- export target=$CORAL_HOME/templates/${template}
- [[ -d $target ]] || save "${template}"
- echo "Commiting template to local Hg repository and contacting repository master..."
- cd "$CORAL_HOME" && (
- hg status | grep -q "^?.*$template" && hg -q add "${target}"
- hg status | grep -q "$template" && hg -q commit "${target}" -m"Commit template ${template}" )
- <"${CORAL_HOME}"/templates/scripts/repomaster.txt envsubst \$HOSTNAME,\$FQDN,\$USER,\$CORAL_HOME |\
- mail -s "Coral template publish request" $MAINTAINER_EMAIL
- echo "Email to $MAINTAINER_EMAIL sent."
- }
- status() {
- doc "$@" <<EOT
- status - Displays the current status of the simulation.
- EOT
- [[ -f .coral_jobs ]] || die 2 "Error: no job information found in current directory. Make sure you run 'coral status' in the run directory."
- local jobid
- jobid=$(tail -1 .coral_jobs)
- [[ -f nemo.info ]] && {
- tail -4 nemo.info
- }
- which squeue &> /dev/null && squeue -j "${jobid}" &>/dev/null && {
- echo "Current job:"
- squeue -j "${jobid}"
- }
-
- [[ -f time.step ]] && {
- echo "Current timestep: (hit CTRL-C to stop)"
- (while [ -f time.step ] ;do echo -en "$(cat time.step)" ; sleep 1 ; echo -en "\e[0K\r" ; done)
- }
- }
- update() {
- doc "$@" <<EOT
- update - undocumented. Do not use.
- EOT
- ( cd "${CORAL_HOME}" && hg pull --update ; )
- }
- list() {
- doc "$@" <<EOT
- list - List all templates available in local coral install
- EOT
- ls -l "${CORAL_HOME}"/templates | grep -v files.txt | grep -v scripts
- }
- help() {
- cat <<RTFM
- Usage: $0 <command> [<arguments>], where command is one of:
- init - Initialize template in current directory. Default template is the machine name.
- build - Build submission script from template in current directory.
- submit - Submit job from submission script in current directory.
- status - Displays the current status of the simulation.
- save - Save modifications to template in current directory
- share - Share template with other users on the same machine
- import - Import template from other user on the same machine
- publish - Submit template for inclusing in central coral repository
- list - List all available templates
- Use 'coral <command> -h' for more information about a specific command.
- RTFM
- }
- completion() { #FIXME
- cat <<'EOTCOMPLETION'
- _coral () # By convention, the function name
- { #+ starts with an underscore.
- _get_comp_words_by_ref cur prev words cword
- _split_long_opt
- COMPREPLY=( $(compgen -W "init commit submit build edit share save import publish update list status" -- $cur) )
- }
- complete -F _coral coral
- EOTCOMPLETION
- }
- if [[ ! "$1" =~ ^(init|commit|submit|build|edit|share|save|import|publish|update|list|status|completion)$ ]]; then
- help >&2
- exit 1
- else
- "$@"
- fi
|