pbarriat
/
ecearth3


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
							# Platform dependent configuration functions for the 'tetralith' machine
# (tetralith.nsc.liu.se)

function configure()
{
    # This function should configure all settings/modules needed to
    # later prepare the EC-Earth run directory and set variables used
    # in the run script

    # Configure paths for building/running EC-Earth
    ecearth_src_dir=[[[PLT:ACTIVE:ECEARTH_SRC_DIR]]]
    run_dir=[[[PLT:ACTIVE:RUN_DIR]]]
    ini_data_dir=[[[PLT:ACTIVE:INI_DATA_DIR]]]

    # File for standard output.
    # NOTE: This will be modified for restart jobs!
    stdout_file=${start_dir}/out/$(basename ${SLURM_JOB_NAME}).out

    # Resubmit this job for automatic restarts? [true/false]
    # Also, add options for the resubmit command here.
    resubmit_job=[[[PLT:ACTIVE:RESUBMIT_JOB]]]
    resubmit_opt="[[[PLT:ACTIVE:RESUBMIT_OPT]]]"

    # In order to use a specific number of cores per node for LPJG:
    # - Set use_machinefile=true
    # - Set lpjg_ppn to the required value
    # - Adapt the number of nodes for your job!
    #   Note that executables are not mixed on a node, when use_machinefile is
    #   true. This means that every executable uses at least one node!
    use_machinefile=false
    lpjg_ppn=4

    # Configure grib api paths
    export GRIB_DEFINITION_PATH=[[[PLT:ACTIVE:ECEARTH_SRC_DIR]]]/util/grib_table_126:[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_DEFINITION_SUBDIR]]]
    export GRIB_SAMPLES_PATH=[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_SAMPLES_SUBDIR]]]
    export GRIB_BIN_PATH=[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_BIN_SUBDIR]]]

    # Configure number of processors per node
    proc_per_node=[[[PLT:ACTIVE:PROC_PER_NODE]]]

    # Configure and load modules
    pre_load_modules_cmd="[[[PLT:ACTIVE:PRE_LOAD_MODULES_CMD]]]"
    module_list="[[[PLT:ACTIVE:MODULE_LIST]]]"

    module () {
        eval $($LMOD_CMD bash "$@") && eval $(${LMOD_SETTARG_CMD:-:} -s sh)
    }

    if [ -n "${module_list}" ]
    then
        set +u
        module purge
        for m in "${module_list}"
        do
            module add $m
        done
        set -u
    fi

    # Add directories to the shared library search path
    if [ -n "[[[PLT:ACTIVE:ADD_TO_LD_LIBRARY_PATH]]]" ]
    then
        export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}"[[[PLT:ACTIVE:ADD_TO_LD_LIBRARY_PATH]]]"
    fi

    # Set tetralith specific environment
    unset I_MPI_PMI_LIBRARY
    export I_MPI_FABRICS=shm:tmi
    export TMI_CONFIG=${I_MPI_ROOT}/intel64/etc/tmi.conf
    export LD_LIBRARY_PATH=${I_MPI_ROOT}/intel64/lib

    ulimit -s unlimited
}


function configure_python()
{
    # specific for python+eccodes setup - used for OSM pre- and post-processing
    unset GRIB_DEFINITION_PATH
    unset GRIB_SAMPLES_PATH
    unset GRIB_BIN_PATH
    export GRIB_BIN_PATH=/software/sse/manual/eccodes/2.10.0/nsc1-ENABLE-AEC-intel-2018a-eb/bin
    export GRIB_DEFINITION_PATH=${HOME}/esm/osm/sources/util/grib_table_126:/software/sse/manual/eccodes/2.10.0/nsc1-ENABLE-AEC-intel-2018a-eb/share/eccodes/definitions
    export GRIB_SAMPLES_PATH=/software/sse/manual/eccodes/2.10.0/nsc1-ENABLE-AEC-intel-2018a-eb/share/eccodes/ifs_samples/grib1
}


function launch()
{
    cmd="mpiexec.hydra -bootstrap slurm -genvall -prepend-rank -ordered-output"

    if $use_machinefile
    then
        cmd+=" -machinefile mfile"
        rm -f mfile
        nodelist=($(hostlist -e $SLURM_NODELIST))
        assigned_nodes=0
    fi

    while (( "$#" ))
    do
        nranks=$1
        executable=./$(basename $2)
        shift
        shift

        cmd+=" -n $nranks $executable"

        while (( "$#" )) && [ "$1" != "--" ]
        do
            cmd+=" $1"
            shift
        done
        shift || true

        (( "$#" )) && cmd+=" :"

        if $use_machinefile
        then
            case "$executable" in
                ./guess_*) ppn=$lpjg_ppn ;;
                *)         ppn=$proc_per_node ;;
            esac

            while (( $nranks > 0 ))
            do
                (( n = ppn<nranks?ppn:nranks ))
                echo "${nodelist[assigned_nodes]}:${n}" >> mfile
                (( assigned_nodes += 1 ))
                (( nranks -= n )) || true
            done
        fi
    done

    [ -z "${OMP_NUM_THREADS-}" ] && export OMP_NUM_THREADS=1

    $cmd
}

function finalise()
{
    # This function should execute of any post run functionality, e.g.
    # platform dependent cleaning or a resubmit

    if ${resubmit_job} && [ $(date -d "${leg_end_date}" +%s) -lt $(date -d "${run_end_date}" +%s) ]
    then
        info "Resubmitting job for leg $((leg_number+1))"
        # Need to go to start_dir to find the run script
        cd ${start_dir}
        # Submit command
        # Note: This does not work if you specify a job name with sbatch -J jobname!
        sbatch -N ${SLURM_JOB_NUM_NODES}                                                 \
               -o ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
               -e ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
               -d ${SLURM_JOB_ID}                                                        \
               ${resubmit_opt}                                                           \
               ./${SLURM_JOB_NAME}
    fi
}