#!/bin/bash # Platform dependent configuration functions for the 'zenobe' machine #(zenobe.hpc.cenaero.be) function configure() { # This function should configure all settings/modules needed to # later prepare the EC-Earth run directory and set variables used # in the run script # SCRATCH is not defined in MN3, define it here # and also make sure it is defined when compiling export SCRATCH=/gpfs/scratch/acad/ecearth/${USER} # Configure paths for building/running EC-Earth ecearth_src_dir=${HOME}/models/ecearth_3.3.4.2/sources run_dir=/gpfs/scratch/acad/ecearth/${USER}/ecearth/run/${exp_name} ini_data_dir=/gpfs/scratch/acad/ecearth/data/bsc32/v3.3.4/inidata archive_dir=/gpfs/scratch/acad/ecearth/${USER}/ecearth/archive/${exp_name} # File for standard output. # NOTE: This will be modified for restart jobs! stdout_file=${SLURM_SUBMIT_DIR-$PWD}/${SLURM_JOB_NAME-"local"}_${SLURM_JOB_ID-"id"}.log # Resubmit this job for automatic restarts? [true/false] # Also, add options for the resubmit command here. resubmit_job=true resubmit_opt="" module load EasyBuild/2023a export MODULEPATH=$MODULEPATH:/gpfs/projects/acad/ecearth/softs/easybuild/2023a/modules/all module load netCDF-Fortran/4.6.1-iompi-2023a module load CDO/2.2.2-iompi-2023a module load NCO/5.1.3-iomkl-2023a module load ecCodes/2.31.0-iompi-2023a # Configure grib api paths export GRIB_DEFINITION_PATH=${HOME}/models/ecearth_3.3.4.2/sources/util/grib_table_126:${EBROOTECCODES}/share/eccodes/definitions export GRIB_SAMPLES_PATH=${EBROOTECCODES}/share/eccodes/ifs_samples/grib1 export GRIB_BIN_PATH=${EBROOTECCODES}/bin # Configure number of processors per node proc_per_node=128 # Use machinefiles or not [[ `echo "$use_machinefile" | tr '[:upper:]' '[:lower:]'` == true ]] && use_machinefile=true || use_machinefile=false ulimit -s unlimited # Load specific MPI environment configuration configure_mpi } function configure_python() { # specific for python+eccodes setup - used for OSM pre/post-processing # it would be simple to do the following in configure # module load eccodes/2.8.0 python/2.7.13 module load eccodes/2.8.0 python/2.7.13 unset GRIB_DEFINITION_PATH unset GRIB_SAMPLES_PATH unset GRIB_BIN_PATH export GRIB_BIN_PATH=/apps/ECCODES/2.8.0/INTEL/bin } function configure_mpi() { [ -z "${OMP_NUM_THREADS-}" ] && export OMP_NUM_THREADS=1 #export I_MPI_DEBUG=5 #export I_MPI_ADJUST_BCAST=3 #export PSM2_MTU=8196 #export PSM2_MEMORY=large #export PSM2_MQ_RNDV_HFI_THRESH=1 #export I_MPI_DEBUG=5 #export I_MPI_FABRIC=tmi #unset I_MPI_PMI_LIBRARY #export I_MPI_JOB_RESPECT_PROCESS_PLACEMENT=0 #export I_MPI_FABRICS=shm:ofi } function get_hosts() { # This function uses a scheduler command to get the hosts allocated for the current job hosts=(`scontrol show hostname | paste -s`) } function machinefile_config() { # User configuration starts here # hard-coded c4mip configurations, must use the proper _numproc settings if has_config ifs nemo pisces rnfmapper xios lpjg ; then if ! has_config tm5 ; then ifs_ppn=48 ; [[ ${ifs_numproc} != 336 ]] && info "wrong numproc setting for ifs in machinefile_config" || true nem_ppn=43 ; [[ ${nem_numproc} != 380 ]] && info "wrong numproc setting for nemo in machinefile_config" || true xio_ppn=5 ; [[ ${xio_numproc} != 5 ]] && info "wrong numproc setting for xios in machinefile_config" || true lpjg_ppn=5 ; [[ ${lpjg_numproc} != 40 ]] && info "wrong numproc setting for lpjg in machinefile_config" || true else ifs_ppn=48 ; [[ ${ifs_numproc} != 256 ]] && info "wrong numproc setting for ifs in machinefile_config" || true nem_ppn=46 ; [[ ${nem_numproc} != 192 ]] && info "wrong numproc setting for nemo in machinefile_config" || true xio_ppn=2 ; [[ ${xio_numproc} != 2 ]] && info "wrong numproc setting for xios in machinefile_config" || true lpjg_ppn=2 ; [[ ${lpjg_numproc} != 8 ]] && info "wrong numproc setting for lpjg in machinefile_config" || true tm5_ppn=4 ; [[ ${tm5_numproc} != 4 ]] && info "wrong numproc setting for tm5 in machinefile_config" || true fi else # Add any new exclusive binary here ifs_exc=TRUE nem_exc=TRUE xio_exc=TRUE lpjg_exc=TRUE tm5_exc=TRUE # Modify the allocation to each binary using more than one process here ifs_ppn=48 nem_ppn=48 xio_ppn=48 lpjg_ppn=48 tm5_ppn=45 fi } function machinefile_init() { # Get max processes per node from the platform variable max_ppn=$proc_per_node components=( ifs nem xio rnf amip lpjg ) if $(has_config tm5) then components=( "${components[@]}" "tm5" ) fi for component in ${components[@]} do eval ${component}_exc=FALSE eval ${component}_ppn=1 done # Call user configuration and get_host functions machinefile_config get_hosts # Declare array to store the processes as they are assigned declare -a -g processes_hosts for n in `seq 0 ${#hosts[@]}` do processes_hosts[$n]=0 done > machinefile current_hostid=0 } machinefile_find_available_node() { while [ $((${processes_hosts[$current_hostid]} + ${!ppn})) -gt $max_ppn ] do let "current_hostid += 1" done } machinefile_add() { total_proc=$2 # Iterate through all the possible binaries for component in ${components[@]} do binary="${component}_exe_file" exclusive="${component}_exc" # Check if the current binary matches the input executable if [ ./$(basename ${!binary}) = "$1" ] then ppn="${component}_ppn" # Exclusive mode: start allocation at the first empty node if [[ ${!exclusive} == "TRUE" ]] then while [ ${processes_hosts[$current_hostid]} -gt 0 ] do let "current_hostid += 1" done # Shared mode: start allocation in the first node with enough free cores # Notice that only the first node is checked # Then, if a previous binary had "exc=TRUE", allocation space is not ensure in subsequent nodes else current_hostid=0 machinefile_find_available_node fi # Allocate ppn cores in each of the subsequent nodes till there are no more processes to assign count=0 while [ ${total_proc} -gt 0 ] do if [ ${current_hostid} -ge ${#hosts[@]} ] then echo "Not enough computing nodes" exit 1 fi current_hostname=${hosts[$current_hostid]} while [[ ${total_proc} -gt 0 && ${count} -lt ${!ppn} ]] do echo ${hosts[$current_hostid]} >> machinefile let "count += 1" let "processes_hosts[$current_hostid] += 1" let "total_proc -= 1" || true done if [ ${count} -eq ${!ppn} ] then let "current_hostid += 1" machinefile_find_available_node count=0 fi done fi done } function launch() { # Compute and check the node distribution info "=======================" info "Node/proc distribution:" info "-----------------------" info "IFS: ${ifs_numproc}" info "NEMO: ${nem_numproc}" info "XIOS: ${xio_numproc}" info "RUNOFF: ${rnf_numproc}" info "=======================" cmd="mpirun" cat /dev/null > prog.conf proc_id=0 if [ "$use_machinefile" = "true" ] then cmd="mpirun -machinefile machinefile" machinefile_init fi while (( "$#" )) do # Get number of MPI ranks and executable name nranks=$1 executable=./$(basename $2) if [ "$use_machinefile" = "true" ] then machinefile_add $executable $nranks fi shift shift cmd+=" -n $nranks $executable" # Add any arguments to executable while (( "$#" )) && [ "$1" != "--" ] do cmd+=" $1" shift done shift || true for i in $(eval echo "{1..${nranks}}") do echo "$proc_id ${executable}" >> prog.conf proc_id=$(($proc_id+1)) done # Add colon of more executables follow (( "$#" )) && cmd+=" :" done #cmd="srun --kill-on-bad-exit=1 --multi-prog prog.conf" pwd echo $cmd #exit $cmd } function finalise() { # This function should execute of any post run functionality, e.g. # platform dependent cleaning or a resubmit if ${resubmit_job} && [ $(date -d "${leg_end_date}" +%s) -lt $(date -d "${run_end_date}" +%s) ] then info "Resubmitting job for leg $((leg_number+1))" # Need to go to start_dir to find the run script cd ${start_dir} # Submit command # Note: This does not work if you specify a job name with sbatch -J jobname! sbatch -N ${SLURM_JOB_NUM_NODES} \ -o ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \ -e ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \ -d ${SLURM_JOB_ID} \ ${resubmit_opt} \ ./${SLURM_JOB_NAME} fi }