knmi-rhino.cfg.tmpl 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. # Platform dependent configuration functions for the 'rhino' machine
  2. # (KNMI, NL)
  3. function configure()
  4. {
  5. # This function should configure all settings/modules needed to
  6. # later prepare the EC-Earth run directory and set variables used
  7. # in the run script
  8. # Configure paths for building/running EC-Earth
  9. export ecearth_src_dir=[[[PLT:ACTIVE:ECEARTH_SRC_DIR]]]
  10. export run_dir=[[[PLT:ACTIVE:RUN_DIR]]]
  11. export ini_data_dir=[[[PLT:ACTIVE:INI_DATA_DIR]]]
  12. # File for standard output.
  13. # NOTE: This will be modified for restart jobs!
  14. stdout_file=${start_dir}/out/$(basename ${SLURM_JOB_NAME}).out
  15. # Resubmit this job for automatic restarts? [true/false]
  16. # Also, add options for the resubmit command here.
  17. resubmit_job=[[[PLT:ACTIVE:RESUBMIT_JOB]]]
  18. resubmit_opt="[[[PLT:ACTIVE:RESUBMIT_OPT]]]"
  19. # Configure grib api paths
  20. export GRIB_DEFINITION_PATH=[[[PLT:ACTIVE:ECEARTH_SRC_DIR]]]/util/grib_table_126:[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_DEFINITION_SUBDIR]]]
  21. export GRIB_SAMPLES_PATH=[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_SAMPLES_SUBDIR]]]
  22. export GRIB_BIN_PATH=[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_BIN_SUBDIR]]]
  23. # Configure GRIBEX paths
  24. export LOCAL_DEFINITION_TEMPLATES=[[[PLT:ACTIVE:GRIBEX_DEFINITION_PATH]]]
  25. #export ECMWF_LOCAL_TABLE_PATH=[[[PLT:ACTIVE:GRIBEX_DEFINITION_PATH]]]
  26. # Configure number of processors per node
  27. proc_per_node=[[[PLT:ACTIVE:PROC_PER_NODE]]]
  28. # Configure and load modules
  29. pre_load_modules_cmd="[[[PLT:ACTIVE:PRE_LOAD_MODULES_CMD]]]"
  30. module_list="[[[PLT:ACTIVE:MODULE_LIST]]]"
  31. if [ -n "${module_list}" ]
  32. then
  33. set +u
  34. if [ -n "${pre_load_modules_cmd}" ]
  35. then
  36. ${pre_load_modules_cmd}
  37. fi
  38. for m in "${module_list}"
  39. do
  40. eval $(/usr/libexec/cmod sh add $m)
  41. done
  42. set -u
  43. fi
  44. # Add directories to the shared library search path
  45. if [ -n "[[[PLT:ACTIVE:ADD_TO_LD_LIBRARY_PATH]]]" ]
  46. then
  47. export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}"[[[PLT:ACTIVE:ADD_TO_LD_LIBRARY_PATH]]]"
  48. fi
  49. ulimit -s unlimited
  50. ulimit -n 2048
  51. ulimit -c unlimited
  52. #ulimit -a
  53. }
  54. function launch_atos()
  55. {
  56. # version using srun
  57. # banner launch
  58. cmd="srun --kill-on-bad-exit=1"
  59. export I_MPI_PMI_LIBRARY=/usr/lib64/libpmi.so
  60. export I_MPI_FAST_COLLECTIVES=1
  61. export I_MPI_EXTRA_FILESYSTEM=on
  62. export I_MPI_EXTRA_FILESYSTEM_LIST=lustre
  63. export OMP_PROC_BIND=true
  64. export KMP_AFFINITY=verbose,compact,granularity=fine
  65. export KMP_AFFINITY=compact,granularity=fine
  66. export PMI_TIME=10
  67. export MKL_NUM_THREADS=1
  68. export OMP_STACKSIZE=256m
  69. export MXM_LOG_LEVEL=ERROR
  70. export OMPI_MCA_hwloc_base_binding_policy=none
  71. CONF_FILE=$1
  72. NODESET=$2
  73. NBTASKS=$3
  74. BINDING=$4
  75. export OMP_NUM_THREADS=$5
  76. export TIME="launch timing : %e elapsed %U user %S system"
  77. NBNODES=`nodeset -c $NODESET`
  78. /usr/bin/time $cmd --nodes=$NBNODES --nodelist=$NODESET --ntasks=$NBTASKS --distribution=block --cpu_bind=$BINDING -l --multi-prog $CONF_FILE
  79. }
  80. function launch()
  81. {
  82. # version using srun
  83. NB_CORES_PER_SOCKET_ACTUAL=14
  84. NB_CORES_PER_SOCKET_TARGET=14
  85. FIRST_CORE_SOCKET0=0
  86. LAST_CORE_SOCKET0=$((NB_CORES_PER_SOCKET_TARGET-1))
  87. FIRST_CORE_SOCKET1=$NB_CORES_PER_SOCKET_ACTUAL
  88. LAST_CORE_SOCKET1=$((FIRST_CORE_SOCKET1+NB_CORES_PER_SOCKET_TARGET-1))
  89. LIST_CORES_SOCKET0=`seq -s',' $FIRST_CORE_SOCKET0 $LAST_CORE_SOCKET0`
  90. LIST_CORES_SOCKET1=`seq -s',' $FIRST_CORE_SOCKET1 $LAST_CORE_SOCKET1`
  91. LIST_CORES=`echo $LIST_CORES_SOCKET0,$LIST_CORES_SOCKET1`
  92. # hack for one node case
  93. nb_nodes=$(nodeset -c $SLURM_NODELIST)
  94. NODES_ECE_IFS_NEMO=`nodeset -f $SLURM_NODELIST`
  95. rm -f conf.txt
  96. _task1=-1
  97. NBTASKS=0
  98. while (( "$#" ))
  99. do
  100. nranks=$1
  101. executable=./$(basename $2)
  102. shift
  103. shift
  104. _task0=$((_task1+1))
  105. _task1=$((_task0+nranks-1))
  106. cmd="${_task0}-${_task1} ${executable}"
  107. NBTASKS=$((NBTASKS+nranks))
  108. while (( "$#" )) && [ "$1" != "--" ]
  109. do
  110. cmd+=" $1"
  111. shift
  112. done
  113. echo ${cmd} >>conf.txt
  114. shift || true
  115. done
  116. export OMP_NUM_THREADS=1
  117. case $OMP_NUM_THREADS in
  118. 1) BINDING=map_cpu:$LIST_CORES ;;
  119. 2) MASK=`~/KNMI/mytools/build_mask.14cores.sh 28x2`
  120. BINDING=mask_cpu:$MASK ;;
  121. *) echo "$OMP_NUM_THREADS OpenMP not yet implemented." ; exit ;;
  122. esac
  123. cmd="srun --kill-on-bad-exit=1"
  124. export I_MPI_PMI_LIBRARY=/usr/lib64/libpmi.so
  125. export I_MPI_FAST_COLLECTIVES=1
  126. export I_MPI_EXTRA_FILESYSTEM=on
  127. export I_MPI_EXTRA_FILESYSTEM_LIST=lustre
  128. export OMP_PROC_BIND=true
  129. export KMP_AFFINITY=verbose,compact,granularity=fine
  130. export KMP_AFFINITY=compact,granularity=fine
  131. export PMI_TIME=10
  132. export MKL_NUM_THREADS=1
  133. export OMP_STACKSIZE=256m
  134. export MXM_LOG_LEVEL=ERROR
  135. export OMPI_MCA_hwloc_base_binding_policy=none
  136. CONF_FILE=conf.txt
  137. export TIME="launch timing : %e elapsed %U user %S system"
  138. NBNODES=`nodeset -c $NODES_ECE_IFS_NEMO`
  139. /usr/bin/time $cmd --nodes=$NBNODES --nodelist=$NODES_ECE_IFS_NEMO --ntasks=$NBTASKS --distribution=block --cpu_bind=$BINDING -l --multi-prog $CONF_FILE
  140. }
  141. function finalise()
  142. {
  143. # This function should execute of any post run functionality, e.g.
  144. # platform dependent cleaning or a resubmit
  145. if ${resubmit_job} && [ $(date -d "${leg_end_date}" +%s) -lt $(date -d "${run_end_date}" +%s) ]
  146. then
  147. info "Resubmitting job for leg $((leg_number+1))"
  148. info "No implemented yet!"
  149. # # Need to go to start_dir to find the run script
  150. # cd ${start_dir}
  151. # # Submit command
  152. # # Note: This does not work if you specify a job name with sbatch -J jobname!
  153. # sbatch -N ${SLURM_JOB_NUM_NODES} \
  154. # -n $((ifs_numproc + nem_numproc + tm5_numproc)) \
  155. # --exclusive \
  156. # --ntasks-per-node=${proc_per_node} \
  157. # -o ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
  158. # -e ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
  159. # -d ${SLURM_JOB_ID} \
  160. # ${resubmit_opt} \
  161. # ./${SLURM_JOB_NAME}
  162. fi
  163. }
  164. function postprocess()
  165. {
  166. # This function submits a script to postprocess the IFS output
  167. data_dir=`pwd`/$1
  168. script_file="$1/postprocess"
  169. cat >"${script_file}" <<EOF
  170. #! /bin/bash
  171. cd "${start_dir}/../../../postprocessing"
  172. ./postprocess_leg "${data_dir}" "${data_dir}/postprocessed"
  173. EOF
  174. sbatch -N 1 --exclusive \
  175. -o ${data_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
  176. -e ${data_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
  177. "${script_file}"
  178. }