123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209 |
- # Platform dependent configuration functions for the 'rhino' machine
- # (KNMI, NL)
- function configure()
- {
- # This function should configure all settings/modules needed to
- # later prepare the EC-Earth run directory and set variables used
- # in the run script
- # Configure paths for building/running EC-Earth
- export ecearth_src_dir=[[[PLT:ACTIVE:ECEARTH_SRC_DIR]]]
- export run_dir=[[[PLT:ACTIVE:RUN_DIR]]]
- export ini_data_dir=[[[PLT:ACTIVE:INI_DATA_DIR]]]
- # File for standard output.
- # NOTE: This will be modified for restart jobs!
- stdout_file=${start_dir}/out/$(basename ${SLURM_JOB_NAME}).out
- # Resubmit this job for automatic restarts? [true/false]
- # Also, add options for the resubmit command here.
- resubmit_job=[[[PLT:ACTIVE:RESUBMIT_JOB]]]
- resubmit_opt="[[[PLT:ACTIVE:RESUBMIT_OPT]]]"
- # Configure grib api paths
- export GRIB_DEFINITION_PATH=[[[PLT:ACTIVE:ECEARTH_SRC_DIR]]]/util/grib_table_126:[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_DEFINITION_SUBDIR]]]
- export GRIB_SAMPLES_PATH=[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_SAMPLES_SUBDIR]]]
- export GRIB_BIN_PATH=[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_BIN_SUBDIR]]]
- # Configure GRIBEX paths
- export LOCAL_DEFINITION_TEMPLATES=[[[PLT:ACTIVE:GRIBEX_DEFINITION_PATH]]]
- #export ECMWF_LOCAL_TABLE_PATH=[[[PLT:ACTIVE:GRIBEX_DEFINITION_PATH]]]
- # Configure number of processors per node
- proc_per_node=[[[PLT:ACTIVE:PROC_PER_NODE]]]
- # Configure and load modules
- pre_load_modules_cmd="[[[PLT:ACTIVE:PRE_LOAD_MODULES_CMD]]]"
- module_list="[[[PLT:ACTIVE:MODULE_LIST]]]"
- if [ -n "${module_list}" ]
- then
- set +u
- if [ -n "${pre_load_modules_cmd}" ]
- then
- ${pre_load_modules_cmd}
- fi
- for m in "${module_list}"
- do
- eval $(/usr/libexec/cmod sh add $m)
- done
- set -u
- fi
-
- # Add directories to the shared library search path
- if [ -n "[[[PLT:ACTIVE:ADD_TO_LD_LIBRARY_PATH]]]" ]
- then
- export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}"[[[PLT:ACTIVE:ADD_TO_LD_LIBRARY_PATH]]]"
- fi
- ulimit -s unlimited
- ulimit -n 2048
- ulimit -c unlimited
- #ulimit -a
- }
- function launch_atos()
- {
- # version using srun
- # banner launch
- cmd="srun --kill-on-bad-exit=1"
- export I_MPI_PMI_LIBRARY=/usr/lib64/libpmi.so
- export I_MPI_FAST_COLLECTIVES=1
- export I_MPI_EXTRA_FILESYSTEM=on
- export I_MPI_EXTRA_FILESYSTEM_LIST=lustre
- export OMP_PROC_BIND=true
- export KMP_AFFINITY=verbose,compact,granularity=fine
- export KMP_AFFINITY=compact,granularity=fine
- export PMI_TIME=10
- export MKL_NUM_THREADS=1
- export OMP_STACKSIZE=256m
- export MXM_LOG_LEVEL=ERROR
- export OMPI_MCA_hwloc_base_binding_policy=none
- CONF_FILE=$1
- NODESET=$2
- NBTASKS=$3
- BINDING=$4
- export OMP_NUM_THREADS=$5
- export TIME="launch timing : %e elapsed %U user %S system"
- NBNODES=`nodeset -c $NODESET`
- /usr/bin/time $cmd --nodes=$NBNODES --nodelist=$NODESET --ntasks=$NBTASKS --distribution=block --cpu_bind=$BINDING -l --multi-prog $CONF_FILE
- }
- function launch()
- {
- # version using srun
- NB_CORES_PER_SOCKET_ACTUAL=14
- NB_CORES_PER_SOCKET_TARGET=14
- FIRST_CORE_SOCKET0=0
- LAST_CORE_SOCKET0=$((NB_CORES_PER_SOCKET_TARGET-1))
- FIRST_CORE_SOCKET1=$NB_CORES_PER_SOCKET_ACTUAL
- LAST_CORE_SOCKET1=$((FIRST_CORE_SOCKET1+NB_CORES_PER_SOCKET_TARGET-1))
- LIST_CORES_SOCKET0=`seq -s',' $FIRST_CORE_SOCKET0 $LAST_CORE_SOCKET0`
- LIST_CORES_SOCKET1=`seq -s',' $FIRST_CORE_SOCKET1 $LAST_CORE_SOCKET1`
- LIST_CORES=`echo $LIST_CORES_SOCKET0,$LIST_CORES_SOCKET1`
- # hack for one node case
- nb_nodes=$(nodeset -c $SLURM_NODELIST)
- NODES_ECE_IFS_NEMO=`nodeset -f $SLURM_NODELIST`
-
- rm -f conf.txt
- _task1=-1
- NBTASKS=0
- while (( "$#" ))
- do
- nranks=$1
- executable=./$(basename $2)
- shift
- shift
- _task0=$((_task1+1))
- _task1=$((_task0+nranks-1))
- cmd="${_task0}-${_task1} ${executable}"
- NBTASKS=$((NBTASKS+nranks))
-
- while (( "$#" )) && [ "$1" != "--" ]
- do
- cmd+=" $1"
- shift
- done
- echo ${cmd} >>conf.txt
- shift || true
- done
-
- export OMP_NUM_THREADS=1
- case $OMP_NUM_THREADS in
- 1) BINDING=map_cpu:$LIST_CORES ;;
- 2) MASK=`~/KNMI/mytools/build_mask.14cores.sh 28x2`
- BINDING=mask_cpu:$MASK ;;
- *) echo "$OMP_NUM_THREADS OpenMP not yet implemented." ; exit ;;
- esac
-
- cmd="srun --kill-on-bad-exit=1"
- export I_MPI_PMI_LIBRARY=/usr/lib64/libpmi.so
- export I_MPI_FAST_COLLECTIVES=1
- export I_MPI_EXTRA_FILESYSTEM=on
- export I_MPI_EXTRA_FILESYSTEM_LIST=lustre
- export OMP_PROC_BIND=true
- export KMP_AFFINITY=verbose,compact,granularity=fine
- export KMP_AFFINITY=compact,granularity=fine
- export PMI_TIME=10
- export MKL_NUM_THREADS=1
- export OMP_STACKSIZE=256m
- export MXM_LOG_LEVEL=ERROR
- export OMPI_MCA_hwloc_base_binding_policy=none
- CONF_FILE=conf.txt
- export TIME="launch timing : %e elapsed %U user %S system"
- NBNODES=`nodeset -c $NODES_ECE_IFS_NEMO`
- /usr/bin/time $cmd --nodes=$NBNODES --nodelist=$NODES_ECE_IFS_NEMO --ntasks=$NBTASKS --distribution=block --cpu_bind=$BINDING -l --multi-prog $CONF_FILE
- }
- function finalise()
- {
- # This function should execute of any post run functionality, e.g.
- # platform dependent cleaning or a resubmit
- if ${resubmit_job} && [ $(date -d "${leg_end_date}" +%s) -lt $(date -d "${run_end_date}" +%s) ]
- then
- info "Resubmitting job for leg $((leg_number+1))"
- info "No implemented yet!"
-
- # # Need to go to start_dir to find the run script
- # cd ${start_dir}
- # # Submit command
- # # Note: This does not work if you specify a job name with sbatch -J jobname!
- # sbatch -N ${SLURM_JOB_NUM_NODES} \
- # -n $((ifs_numproc + nem_numproc + tm5_numproc)) \
- # --exclusive \
- # --ntasks-per-node=${proc_per_node} \
- # -o ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
- # -e ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
- # -d ${SLURM_JOB_ID} \
- # ${resubmit_opt} \
- # ./${SLURM_JOB_NAME}
- fi
- }
- function postprocess()
- {
- # This function submits a script to postprocess the IFS output
- data_dir=`pwd`/$1
- script_file="$1/postprocess"
- cat >"${script_file}" <<EOF
- #! /bin/bash
- cd "${start_dir}/../../../postprocessing"
- ./postprocess_leg "${data_dir}" "${data_dir}/postprocessed"
- EOF
- sbatch -N 1 --exclusive \
- -o ${data_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
- -e ${data_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
- "${script_file}"
- }
|