pbarriat
/
ecearth3


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
							#!/bin/bash 

# Platform dependent configuration functions for the 'zenobe' machine
#(zenobe.hpc.cenaero.be)

function configure()
{
    # This function should configure all settings/modules needed to
    # later prepare the EC-Earth run directory and set variables used
    # in the run script

    # SCRATCH is not defined in MN3, define it here
    # and also make sure it is defined when compiling
    export SCRATCH=/gpfs/scratch/acad/ecearth/${USER}

    # Configure paths for building/running EC-Earth
    ecearth_src_dir=${HOME}/models/ecearth_3.3.3.2/sources
    run_dir=/gpfs/scratch/acad/ecearth/${USER}/ecearth/run/${exp_name}
    ini_data_dir=/gpfs/scratch/acad/ecearth/data/bsc32/v3.3.3.2/inidata
    archive_dir=/gpfs/scratch/acad/ecearth/${USER}/ecearth/archive/${exp_name}

    # File for standard output.
    # NOTE: This will be modified for restart jobs!
    stdout_file=${SLURM_SUBMIT_DIR-$PWD}/${SLURM_JOB_NAME-"local"}_${SLURM_JOB_ID-"id"}.log

    # Resubmit this job for automatic restarts? [true/false]
    # Also, add options for the resubmit command here.
    resubmit_job=true
    resubmit_opt=""

    module purge
    module load EasyBuild/2023a
    export MODULEPATH=$MODULEPATH:/gpfs/projects/acad/ecearth/softs/easybuild/2023a/modules/all
    module load netCDF-Fortran/4.6.1-iompi-2023a
    module load CDO/2.2.2-iompi-2023a
    module load NCO/5.1.3-iomkl-2023a
    module load ecCodes/2.31.0-iompi-2023a
    
    # Configure grib api paths
    export GRIB_DEFINITION_PATH=${HOME}/models/ecearth_3.3.3.2/sources/util/grib_table_126:${EBROOTECCODES}/share/eccodes/definitions
    export GRIB_SAMPLES_PATH=${EBROOTECCODES}/share/eccodes/ifs_samples/grib1
    export GRIB_BIN_PATH=${EBROOTECCODES}/bin

    # Configure number of processors per node
    proc_per_node=128

    # Use machinefiles or not
    [[ `echo "$use_machinefile" | tr '[:upper:]' '[:lower:]'` == true ]] && use_machinefile=true || use_machinefile=false

    ulimit -s unlimited

    # Load specific MPI environment configuration
    configure_mpi
}

function configure_python()
{
    # specific for python+eccodes setup - used for OSM pre/post-processing
    # it would be simple to do the following in configure
    # module load eccodes/2.8.0 python/2.7.13
    module load eccodes/2.8.0 python/2.7.13
    unset GRIB_DEFINITION_PATH
    unset GRIB_SAMPLES_PATH
    unset GRIB_BIN_PATH
    export GRIB_BIN_PATH=/apps/ECCODES/2.8.0/INTEL/bin
}

function configure_mpi()
{
    [ -z "${OMP_NUM_THREADS-}" ] && export OMP_NUM_THREADS=1
    #export I_MPI_DEBUG=5
    #export I_MPI_ADJUST_BCAST=3
    #export PSM2_MTU=8196
    #export PSM2_MEMORY=large
    #export PSM2_MQ_RNDV_HFI_THRESH=1
    #export I_MPI_FABRIC=ofi
    #unset I_MPI_PMI_LIBRARY
    #export I_MPI_JOB_RESPECT_PROCESS_PLACEMENT=0
    #export I_MPI_FABRICS=shm:ofi
}

function get_hosts()
{
    # This function uses a scheduler command to get the hosts allocated for the current job
    hosts=(`scontrol show hostname | paste -s`)
}

function machinefile_config()
{
    # User configuration starts here
    # hard-coded c4mip configurations, must use the proper _numproc settings
    if has_config ifs nemo pisces rnfmapper xios lpjg ; then
      if ! has_config tm5 ; then
            ifs_ppn=48 ; [[ ${ifs_numproc}  != 336 ]] && info "wrong numproc setting for ifs in machinefile_config" || true
            nem_ppn=43 ; [[ ${nem_numproc}  != 380 ]] && info "wrong numproc setting for nemo in machinefile_config" || true
            xio_ppn=5  ; [[ ${xio_numproc}  !=   5 ]] && info "wrong numproc setting for xios in machinefile_config" || true
            lpjg_ppn=5 ; [[ ${lpjg_numproc} !=  40 ]] && info "wrong numproc setting for lpjg in machinefile_config" || true
        else
            ifs_ppn=48 ; [[ ${ifs_numproc}  != 256 ]] && info "wrong numproc setting for ifs in machinefile_config" || true
            nem_ppn=46 ; [[ ${nem_numproc}  != 192 ]] && info "wrong numproc setting for nemo in machinefile_config" || true
            xio_ppn=2  ; [[ ${xio_numproc}  !=   2 ]] && info "wrong numproc setting for xios in machinefile_config" || true
            lpjg_ppn=2 ; [[ ${lpjg_numproc} !=   8 ]] && info "wrong numproc setting for lpjg in machinefile_config" || true
            tm5_ppn=4  ; [[ ${tm5_numproc}  !=   4 ]] && info "wrong numproc setting for tm5 in machinefile_config" || true
        fi
      else
        # Add any new exclusive binary here
        ifs_exc=TRUE
        nem_exc=TRUE
        xio_exc=TRUE
        lpjg_exc=TRUE
        tm5_exc=TRUE

        # Modify the allocation to each binary using more than one process here
        ifs_ppn=48
        nem_ppn=48
        xio_ppn=48
        lpjg_ppn=48
        tm5_ppn=45
    fi
}

function machinefile_init()
{
    # Get max processes per node from the platform variable
    max_ppn=$proc_per_node

    components=( ifs nem xio rnf amip lpjg )
    if $(has_config tm5)
    then
      components=( "${components[@]}" "tm5" )
    fi
    
    for component in ${components[@]}
    do
      eval ${component}_exc=FALSE
      eval ${component}_ppn=1
    done
  
    # Call user configuration and get_host functions
    machinefile_config
    get_hosts
 
    # Declare array to store the processes as they are assigned
    declare -a -g processes_hosts
    for n in `seq 0 ${#hosts[@]}`
    do
      processes_hosts[$n]=0
    done

    > machinefile
    
    current_hostid=0
}

machinefile_find_available_node()
{
  while [ $((${processes_hosts[$current_hostid]} + ${!ppn})) -gt $max_ppn ]
  do
    let "current_hostid += 1"
  done
}

machinefile_add()
{
  total_proc=$2
  # Iterate through all the possible binaries
  for component in ${components[@]} 
  do
    binary="${component}_exe_file"
    exclusive="${component}_exc"
    # Check if the current binary matches the input executable
    if [ ./$(basename ${!binary}) = "$1" ]
    then
       ppn="${component}_ppn"
       # Exclusive mode: start allocation at the first empty node
       if [[ ${!exclusive} == "TRUE" ]]
       then
         while [ ${processes_hosts[$current_hostid]} -gt 0 ]
         do
           let "current_hostid += 1"
         done
       # Shared mode: start allocation in the first node with enough free cores
       # Notice that only the first node is checked
       # Then, if a previous binary had "exc=TRUE", allocation space is not ensure in subsequent nodes
       else
         current_hostid=0
         machinefile_find_available_node
       fi

       # Allocate ppn cores in each of the subsequent nodes till there are no more processes to assign
       count=0
       while [ ${total_proc} -gt 0 ]
       do
         if [ ${current_hostid} -ge ${#hosts[@]} ]
         then
           echo "Not enough computing nodes"
           exit 1
         fi
         current_hostname=${hosts[$current_hostid]}
         while [[ ${total_proc} -gt 0 && ${count} -lt ${!ppn} ]] 
         do 
           echo ${hosts[$current_hostid]} >> machinefile
           let "count += 1"
           let "processes_hosts[$current_hostid] += 1"
           let "total_proc -= 1" || true 
         done 
         if [ ${count} -eq ${!ppn} ]
         then
           let "current_hostid += 1"
           machinefile_find_available_node
           count=0
         fi
       done
    fi
  done
}

function launch()
{
    # Compute and check the node distribution
    info "======================="
    info "Node/proc distribution:"
    info "-----------------------"
    info "IFS:    ${ifs_numproc}"
    info "NEMO:   ${nem_numproc}"
    info "XIOS:   ${xio_numproc}"
    info "RUNOFF: ${rnf_numproc}"
    info "======================="

    cmd="mpirun"
    cat /dev/null > prog.conf
    proc_id=0

    if [ "$use_machinefile" = "true" ]
    then
      cmd="mpirun -machinefile machinefile"
      machinefile_init
    fi

    while (( "$#" ))
    do
        # Get number of MPI ranks and executable name
        nranks=$1
        executable=./$(basename $2)
        
        if [ "$use_machinefile" = "true" ]
        then
          machinefile_add $executable $nranks
        fi

        shift
        shift

        cmd+=" -n $nranks $executable"

        # Add any arguments to executable
        while (( "$#" )) && [ "$1" != "--" ]
        do
            cmd+=" $1"
            shift
        done
        shift || true

        for i in $(eval echo "{1..${nranks}}")
        do
            echo "$proc_id ${executable}" >> prog.conf
            proc_id=$(($proc_id+1))
        done

        # Add colon of more executables follow
        (( "$#" )) && cmd+=" :"
    done

    #cmd="srun --kill-on-bad-exit=1 --multi-prog prog.conf"

    pwd
    echo $cmd
    #exit
    $cmd
}

function finalise()
{
    # This function should execute of any post run functionality, e.g.
    # platform dependent cleaning or a resubmit

    if ${resubmit_job} && [ $(date -d "${leg_end_date}" +%s) -lt $(date -d "${run_end_date}" +%s) ]
    then
        info "Resubmitting job for leg $((leg_number+1))"
        # Need to go to start_dir to find the run script
        cd ${start_dir}
        # Submit command
        echo "sbatch -N ${SLURM_JOB_NUM_NODES-"1"} -o ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
              -e ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) -d ${SLURM_JOB_ID-"id"} \
              ./${SLURM_JOB_NAME-"run"}.sh"
        # Note: This does not work if you specify a job name with sbatch -J jobname!
        sbatch -N ${SLURM_JOB_NUM_NODES-"1"}                                             \
               -o ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
               -e ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
               -d ${SLURM_JOB_ID-"id"}                                                   \
               ./${SLURM_JOB_NAME-"run"}.sh
    fi
}