# Platform dependent configuration functions for the 'rhino' machine # (KNMI, NL) function configure() { # This function should configure all settings/modules needed to # later prepare the EC-Earth run directory and set variables used # in the run script # Configure paths for building/running EC-Earth export ecearth_src_dir=[[[PLT:ACTIVE:ECEARTH_SRC_DIR]]] export run_dir=[[[PLT:ACTIVE:RUN_DIR]]] export ini_data_dir=[[[PLT:ACTIVE:INI_DATA_DIR]]] # File for standard output. # NOTE: This will be modified for restart jobs! stdout_file=${start_dir}/out/$(basename ${SLURM_JOB_NAME}).out # Resubmit this job for automatic restarts? [true/false] # Also, add options for the resubmit command here. resubmit_job=[[[PLT:ACTIVE:RESUBMIT_JOB]]] resubmit_opt="[[[PLT:ACTIVE:RESUBMIT_OPT]]]" # Configure grib api paths export GRIB_DEFINITION_PATH=[[[PLT:ACTIVE:ECEARTH_SRC_DIR]]]/util/grib_table_126:[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_DEFINITION_SUBDIR]]] export GRIB_SAMPLES_PATH=[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_SAMPLES_SUBDIR]]] export GRIB_BIN_PATH=[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_BIN_SUBDIR]]] # Configure GRIBEX paths export LOCAL_DEFINITION_TEMPLATES=[[[PLT:ACTIVE:GRIBEX_DEFINITION_PATH]]] #export ECMWF_LOCAL_TABLE_PATH=[[[PLT:ACTIVE:GRIBEX_DEFINITION_PATH]]] # Configure number of processors per node proc_per_node=[[[PLT:ACTIVE:PROC_PER_NODE]]] # Configure and load modules pre_load_modules_cmd="[[[PLT:ACTIVE:PRE_LOAD_MODULES_CMD]]]" module_list="[[[PLT:ACTIVE:MODULE_LIST]]]" if [ -n "${module_list}" ] then set +u if [ -n "${pre_load_modules_cmd}" ] then ${pre_load_modules_cmd} fi for m in "${module_list}" do eval $(/usr/libexec/cmod sh add $m) done set -u fi # Add directories to the shared library search path if [ -n "[[[PLT:ACTIVE:ADD_TO_LD_LIBRARY_PATH]]]" ] then export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}"[[[PLT:ACTIVE:ADD_TO_LD_LIBRARY_PATH]]]" fi ulimit -s unlimited ulimit -n 2048 ulimit -c unlimited #ulimit -a } function launch_atos() { # version using srun # banner launch cmd="srun --kill-on-bad-exit=1" export I_MPI_PMI_LIBRARY=/usr/lib64/libpmi.so export I_MPI_FAST_COLLECTIVES=1 export I_MPI_EXTRA_FILESYSTEM=on export I_MPI_EXTRA_FILESYSTEM_LIST=lustre export OMP_PROC_BIND=true export KMP_AFFINITY=verbose,compact,granularity=fine export KMP_AFFINITY=compact,granularity=fine export PMI_TIME=10 export MKL_NUM_THREADS=1 export OMP_STACKSIZE=256m export MXM_LOG_LEVEL=ERROR export OMPI_MCA_hwloc_base_binding_policy=none CONF_FILE=$1 NODESET=$2 NBTASKS=$3 BINDING=$4 export OMP_NUM_THREADS=$5 export TIME="launch timing : %e elapsed %U user %S system" NBNODES=`nodeset -c $NODESET` /usr/bin/time $cmd --nodes=$NBNODES --nodelist=$NODESET --ntasks=$NBTASKS --distribution=block --cpu_bind=$BINDING -l --multi-prog $CONF_FILE } function launch() { # version using srun NB_CORES_PER_SOCKET_ACTUAL=14 NB_CORES_PER_SOCKET_TARGET=14 FIRST_CORE_SOCKET0=0 LAST_CORE_SOCKET0=$((NB_CORES_PER_SOCKET_TARGET-1)) FIRST_CORE_SOCKET1=$NB_CORES_PER_SOCKET_ACTUAL LAST_CORE_SOCKET1=$((FIRST_CORE_SOCKET1+NB_CORES_PER_SOCKET_TARGET-1)) LIST_CORES_SOCKET0=`seq -s',' $FIRST_CORE_SOCKET0 $LAST_CORE_SOCKET0` LIST_CORES_SOCKET1=`seq -s',' $FIRST_CORE_SOCKET1 $LAST_CORE_SOCKET1` LIST_CORES=`echo $LIST_CORES_SOCKET0,$LIST_CORES_SOCKET1` # hack for one node case nb_nodes=$(nodeset -c $SLURM_NODELIST) NODES_ECE_IFS_NEMO=`nodeset -f $SLURM_NODELIST` rm -f conf.txt _task1=-1 NBTASKS=0 while (( "$#" )) do nranks=$1 executable=./$(basename $2) shift shift _task0=$((_task1+1)) _task1=$((_task0+nranks-1)) cmd="${_task0}-${_task1} ${executable}" NBTASKS=$((NBTASKS+nranks)) while (( "$#" )) && [ "$1" != "--" ] do cmd+=" $1" shift done echo ${cmd} >>conf.txt shift || true done export OMP_NUM_THREADS=1 case $OMP_NUM_THREADS in 1) BINDING=map_cpu:$LIST_CORES ;; 2) MASK=`~/KNMI/mytools/build_mask.14cores.sh 28x2` BINDING=mask_cpu:$MASK ;; *) echo "$OMP_NUM_THREADS OpenMP not yet implemented." ; exit ;; esac cmd="srun --kill-on-bad-exit=1" export I_MPI_PMI_LIBRARY=/usr/lib64/libpmi.so export I_MPI_FAST_COLLECTIVES=1 export I_MPI_EXTRA_FILESYSTEM=on export I_MPI_EXTRA_FILESYSTEM_LIST=lustre export OMP_PROC_BIND=true export KMP_AFFINITY=verbose,compact,granularity=fine export KMP_AFFINITY=compact,granularity=fine export PMI_TIME=10 export MKL_NUM_THREADS=1 export OMP_STACKSIZE=256m export MXM_LOG_LEVEL=ERROR export OMPI_MCA_hwloc_base_binding_policy=none CONF_FILE=conf.txt export TIME="launch timing : %e elapsed %U user %S system" NBNODES=`nodeset -c $NODES_ECE_IFS_NEMO` /usr/bin/time $cmd --nodes=$NBNODES --nodelist=$NODES_ECE_IFS_NEMO --ntasks=$NBTASKS --distribution=block --cpu_bind=$BINDING -l --multi-prog $CONF_FILE } function finalise() { # This function should execute of any post run functionality, e.g. # platform dependent cleaning or a resubmit if ${resubmit_job} && [ $(date -d "${leg_end_date}" +%s) -lt $(date -d "${run_end_date}" +%s) ] then info "Resubmitting job for leg $((leg_number+1))" info "No implemented yet!" # # Need to go to start_dir to find the run script # cd ${start_dir} # # Submit command # # Note: This does not work if you specify a job name with sbatch -J jobname! # sbatch -N ${SLURM_JOB_NUM_NODES} \ # -n $((ifs_numproc + nem_numproc + tm5_numproc)) \ # --exclusive \ # --ntasks-per-node=${proc_per_node} \ # -o ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \ # -e ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \ # -d ${SLURM_JOB_ID} \ # ${resubmit_opt} \ # ./${SLURM_JOB_NAME} fi } function postprocess() { # This function submits a script to postprocess the IFS output data_dir=`pwd`/$1 script_file="$1/postprocess" cat >"${script_file}" <