# Platform dependent configuration functions for MareNostrum #(mnX.bsc.es) function configure() { # This function should configure all settings/modules needed to # later prepare the EC-Earth run directory and set variables used # in the run script # SCRATCH is not defined in MN3, define it here # and also make sure it is defined when compiling export SCRATCH=/gpfs/scratch/`id -gn`/${USER} # Configure paths for building/running EC-Earth ecearth_src_dir=[[[PLT:ACTIVE:ECEARTH_SRC_DIR]]] run_dir=[[[PLT:ACTIVE:RUN_DIR]]] ini_data_dir=[[[PLT:ACTIVE:INI_DATA_DIR]]] # File for standard output. # NOTE: This will be modified for restart jobs! stdout_file=${start_dir}/out/$(basename ${SLURM_JOB_NAME}).out # Resubmit this job for automatic restarts? [true/false] # Also, add options for the resubmit command here. resubmit_job=[[[PLT:ACTIVE:RESUBMIT_JOB]]] resubmit_opt="[[[PLT:ACTIVE:RESUBMIT_OPT]]]" # Configure grib api paths export GRIB_DEFINITION_PATH=[[[PLT:ACTIVE:ECEARTH_SRC_DIR]]]/util/grib_table_126:[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_DEFINITION_SUBDIR]]] export GRIB_SAMPLES_PATH=[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_SAMPLES_SUBDIR]]] export GRIB_BIN_PATH=[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_BIN_SUBDIR]]] # Configure number of processors per node proc_per_node=[[[PLT:ACTIVE:PROC_PER_NODE]]] # Configure and load modules pre_load_modules_cmd="[[[PLT:ACTIVE:PRE_LOAD_MODULES_CMD]]]" module_list="[[[PLT:ACTIVE:MODULE_LIST]]]" if [ -n "${module_list}" ] then set +eu if [ -n "${pre_load_modules_cmd}" ] then ${pre_load_modules_cmd} fi for m in "${module_list}" do module add $m done set -eu fi # Add directories to the shared library search path if [ -n "[[[PLT:ACTIVE:ADD_TO_LD_LIBRARY_PATH]]]" ] then export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}"[[[PLT:ACTIVE:ADD_TO_LD_LIBRARY_PATH]]]" fi # Use machinefiles or not [[ `echo "$use_machinefile" | tr '[:upper:]' '[:lower:]'` == true ]] && use_machinefile=true || use_machinefile=false ulimit -s unlimited # Load specific IMPI environment configuration configure_impi } function configure_python() { # specific for python+eccodes setup - used for OSM pre/post-processing # it would be simple to do the following in configure # module load eccodes/2.8.0 python/2.7.13 module load eccodes/2.8.0 python/2.7.13 unset GRIB_DEFINITION_PATH unset GRIB_SAMPLES_PATH unset GRIB_BIN_PATH export GRIB_BIN_PATH=/apps/ECCODES/2.8.0/INTEL/bin } function configure_impi() { [ -z "${OMP_NUM_THREADS-}" ] && export OMP_NUM_THREADS=1 export I_MPI_DEBUG=5 } function get_hosts() { # This function uses a scheduler command to get the hosts allocated for the current job hosts=(`scontrol show hostname | paste -s`) } function machinefile_config() { # User configuration starts here # hard-coded c4mip configurations, must use the proper _numproc settings if has_config ifs nemo pisces rnfmapper xios lpjg ; then if ! has_config tm5 ; then ifs_ppn=48 ; [[ ${ifs_numproc} != 336 ]] && info "wrong numproc setting for ifs in machinefile_config" || true nem_ppn=43 ; [[ ${nem_numproc} != 380 ]] && info "wrong numproc setting for nemo in machinefile_config" || true xio_ppn=5 ; [[ ${xio_numproc} != 5 ]] && info "wrong numproc setting for xios in machinefile_config" || true lpjg_ppn=5 ; [[ ${lpjg_numproc} != 40 ]] && info "wrong numproc setting for lpjg in machinefile_config" || true else ifs_ppn=48 ; [[ ${ifs_numproc} != 256 ]] && info "wrong numproc setting for ifs in machinefile_config" || true nem_ppn=46 ; [[ ${nem_numproc} != 192 ]] && info "wrong numproc setting for nemo in machinefile_config" || true xio_ppn=2 ; [[ ${xio_numproc} != 2 ]] && info "wrong numproc setting for xios in machinefile_config" || true lpjg_ppn=2 ; [[ ${lpjg_numproc} != 8 ]] && info "wrong numproc setting for lpjg in machinefile_config" || true tm5_ppn=4 ; [[ ${tm5_numproc} != 4 ]] && info "wrong numproc setting for tm5 in machinefile_config" || true fi else # Add any new exclusive binary here ifs_exc=TRUE nem_exc=TRUE xio_exc=TRUE lpjg_exc=TRUE tm5_exc=TRUE # Modify the allocation to each binary using more than one process here ifs_ppn=48 nem_ppn=48 xio_ppn=48 lpjg_ppn=48 tm5_ppn=45 fi } function machinefile_init() { # Get max processes per node from the platform variable max_ppn=$proc_per_node components=( ifs nem xio rnf amip lpjg ) if $(has_config tm5) then components=( "${components[@]}" "tm5" ) fi for component in ${components[@]} do eval ${component}_exc=FALSE eval ${component}_ppn=1 done # Call user configuration and get_host functions machinefile_config get_hosts # Declare array to store the processes as they are assigned declare -a -g processes_hosts for n in `seq 0 ${#hosts[@]}` do processes_hosts[$n]=0 done > machinefile current_hostid=0 } machinefile_find_available_node() { while [ $((${processes_hosts[$current_hostid]} + ${!ppn})) -gt $max_ppn ] do let "current_hostid += 1" done } machinefile_add() { total_proc=$2 # Iterate through all the possible binaries for component in ${components[@]} do binary="${component}_exe_file" exclusive="${component}_exc" # Check if the current binary matches the input executable if [ ./$(basename ${!binary}) = "$1" ] then ppn="${component}_ppn" # Exclusive mode: start allocation at the first empty node if [[ ${!exclusive} == "TRUE" ]] then while [ ${processes_hosts[$current_hostid]} -gt 0 ] do let "current_hostid += 1" done # Shared mode: start allocation in the first node with enough free cores # Notice that only the first node is checked # Then, if a previous binary had "exc=TRUE", allocation space is not ensure in subsequent nodes else current_hostid=0 machinefile_find_available_node fi # Allocate ppn cores in each of the subsequent nodes till there are no more processes to assign count=0 while [ ${total_proc} -gt 0 ] do if [ ${current_hostid} -ge ${#hosts[@]} ] then echo "Not enough computing nodes" exit 1 fi current_hostname=${hosts[$current_hostid]} while [[ ${total_proc} -gt 0 && ${count} -lt ${!ppn} ]] do echo ${hosts[$current_hostid]} >> machinefile let "count += 1" let "processes_hosts[$current_hostid] += 1" let "total_proc -= 1" || true done if [ ${count} -eq ${!ppn} ] then let "current_hostid += 1" machinefile_find_available_node count=0 fi done fi done } function launch() { cmd="mpirun" if [ "$use_machinefile" = "true" ] then cmd="mpirun -machinefile machinefile" machinefile_init fi while (( "$#" )) do # Get number of MPI ranks and executable name nranks=$1 executable=./$(basename $2) if [ "$use_machinefile" = "true" ] then machinefile_add $executable $nranks fi shift shift cmd+=" -np $nranks $executable" # Add any arguments to executable while (( "$#" )) && [ "$1" != "--" ] do cmd+=" $1" shift done shift || true # Add colon of more executables follow (( "$#" )) && cmd+=" :" done $cmd } function finalise() { # This function should execute of any post run functionality, e.g. # platform dependent cleaning or a resubmit if ${resubmit_job} && [ $(date -d "${leg_end_date}" +%s) -lt $(date -d "${run_end_date}" +%s) ] then info "Resubmitting job for leg $((leg_number+1))" # Need to go to start_dir to find the run script cd ${start_dir} # Submit command # Note: This does not work if you explicitely specify a job name! # bsub -n ${SLURM_JOB_NUM_NODES} \ # -w ${SLURM_JOB_ID} \ # -oo ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \ # ${resubmit_opt} \ # ${SLURM_JOB_NAME} fi } function run_node() { # This function launches a command once on each node used in the job, currently only used in ece-lsm.sh runcript mpirun -n $SLURM_JOB_NUM_NODES -ppn 1 -hosts `scontrol show hostname | paste -d, -s` bash -c "$1" }