bsc-marenostrum4.cfg.tmpl 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. # Platform dependent configuration functions for MareNostrum
  2. #(mnX.bsc.es)
  3. function configure()
  4. {
  5. # This function should configure all settings/modules needed to
  6. # later prepare the EC-Earth run directory and set variables used
  7. # in the run script
  8. # SCRATCH is not defined in MN3, define it here
  9. # and also make sure it is defined when compiling
  10. export SCRATCH=/gpfs/scratch/`id -gn`/${USER}
  11. # Configure paths for building/running EC-Earth
  12. ecearth_src_dir=[[[PLT:ACTIVE:ECEARTH_SRC_DIR]]]
  13. run_dir=[[[PLT:ACTIVE:RUN_DIR]]]
  14. ini_data_dir=[[[PLT:ACTIVE:INI_DATA_DIR]]]
  15. # File for standard output.
  16. # NOTE: This will be modified for restart jobs!
  17. stdout_file=${start_dir}/out/$(basename ${SLURM_JOB_NAME}).out
  18. # Resubmit this job for automatic restarts? [true/false]
  19. # Also, add options for the resubmit command here.
  20. resubmit_job=[[[PLT:ACTIVE:RESUBMIT_JOB]]]
  21. resubmit_opt="[[[PLT:ACTIVE:RESUBMIT_OPT]]]"
  22. # Configure grib api paths
  23. export GRIB_DEFINITION_PATH=[[[PLT:ACTIVE:ECEARTH_SRC_DIR]]]/util/grib_table_126:[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_DEFINITION_SUBDIR]]]
  24. export GRIB_SAMPLES_PATH=[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_SAMPLES_SUBDIR]]]
  25. export GRIB_BIN_PATH=[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_BIN_SUBDIR]]]
  26. # Configure number of processors per node
  27. proc_per_node=[[[PLT:ACTIVE:PROC_PER_NODE]]]
  28. # Configure and load modules
  29. pre_load_modules_cmd="[[[PLT:ACTIVE:PRE_LOAD_MODULES_CMD]]]"
  30. module_list="[[[PLT:ACTIVE:MODULE_LIST]]]"
  31. if [ -n "${module_list}" ]
  32. then
  33. set +eu
  34. if [ -n "${pre_load_modules_cmd}" ]
  35. then
  36. ${pre_load_modules_cmd}
  37. fi
  38. for m in "${module_list}"
  39. do
  40. module add $m
  41. done
  42. set -eu
  43. fi
  44. # Add directories to the shared library search path
  45. if [ -n "[[[PLT:ACTIVE:ADD_TO_LD_LIBRARY_PATH]]]" ]
  46. then
  47. export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}"[[[PLT:ACTIVE:ADD_TO_LD_LIBRARY_PATH]]]"
  48. fi
  49. # Use machinefiles or not
  50. [[ `echo "$use_machinefile" | tr '[:upper:]' '[:lower:]'` == true ]] && use_machinefile=true || use_machinefile=false
  51. ulimit -s unlimited
  52. # Load specific IMPI environment configuration
  53. configure_impi
  54. }
  55. function configure_python()
  56. {
  57. # specific for python+eccodes setup - used for OSM pre/post-processing
  58. # it would be simple to do the following in configure
  59. # module load eccodes/2.8.0 python/2.7.13
  60. module load eccodes/2.8.0 python/2.7.13
  61. unset GRIB_DEFINITION_PATH
  62. unset GRIB_SAMPLES_PATH
  63. unset GRIB_BIN_PATH
  64. export GRIB_BIN_PATH=/apps/ECCODES/2.8.0/INTEL/bin
  65. }
  66. function configure_impi()
  67. {
  68. [ -z "${OMP_NUM_THREADS-}" ] && export OMP_NUM_THREADS=1
  69. export I_MPI_DEBUG=5
  70. }
  71. function get_hosts()
  72. {
  73. # This function uses a scheduler command to get the hosts allocated for the current job
  74. hosts=(`scontrol show hostname | paste -s`)
  75. }
  76. function machinefile_config()
  77. {
  78. # User configuration starts here
  79. # hard-coded c4mip configurations, must use the proper _numproc settings
  80. if has_config ifs nemo pisces rnfmapper xios lpjg ; then
  81. if ! has_config tm5 ; then
  82. ifs_ppn=48 ; [[ ${ifs_numproc} != 336 ]] && info "wrong numproc setting for ifs in machinefile_config" || true
  83. nem_ppn=43 ; [[ ${nem_numproc} != 380 ]] && info "wrong numproc setting for nemo in machinefile_config" || true
  84. xio_ppn=5 ; [[ ${xio_numproc} != 5 ]] && info "wrong numproc setting for xios in machinefile_config" || true
  85. lpjg_ppn=5 ; [[ ${lpjg_numproc} != 40 ]] && info "wrong numproc setting for lpjg in machinefile_config" || true
  86. else
  87. ifs_ppn=48 ; [[ ${ifs_numproc} != 256 ]] && info "wrong numproc setting for ifs in machinefile_config" || true
  88. nem_ppn=46 ; [[ ${nem_numproc} != 192 ]] && info "wrong numproc setting for nemo in machinefile_config" || true
  89. xio_ppn=2 ; [[ ${xio_numproc} != 2 ]] && info "wrong numproc setting for xios in machinefile_config" || true
  90. lpjg_ppn=2 ; [[ ${lpjg_numproc} != 8 ]] && info "wrong numproc setting for lpjg in machinefile_config" || true
  91. tm5_ppn=4 ; [[ ${tm5_numproc} != 4 ]] && info "wrong numproc setting for tm5 in machinefile_config" || true
  92. fi
  93. else
  94. # Add any new exclusive binary here
  95. ifs_exc=TRUE
  96. nem_exc=TRUE
  97. xio_exc=TRUE
  98. lpjg_exc=TRUE
  99. tm5_exc=TRUE
  100. # Modify the allocation to each binary using more than one process here
  101. ifs_ppn=48
  102. nem_ppn=48
  103. xio_ppn=48
  104. lpjg_ppn=48
  105. tm5_ppn=45
  106. fi
  107. }
  108. function machinefile_init()
  109. {
  110. # Get max processes per node from the platform variable
  111. max_ppn=$proc_per_node
  112. components=( ifs nem xio rnf amip lpjg )
  113. if $(has_config tm5)
  114. then
  115. components=( "${components[@]}" "tm5" )
  116. fi
  117. for component in ${components[@]}
  118. do
  119. eval ${component}_exc=FALSE
  120. eval ${component}_ppn=1
  121. done
  122. # Call user configuration and get_host functions
  123. machinefile_config
  124. get_hosts
  125. # Declare array to store the processes as they are assigned
  126. declare -a -g processes_hosts
  127. for n in `seq 0 ${#hosts[@]}`
  128. do
  129. processes_hosts[$n]=0
  130. done
  131. > machinefile
  132. current_hostid=0
  133. }
  134. machinefile_find_available_node()
  135. {
  136. while [ $((${processes_hosts[$current_hostid]} + ${!ppn})) -gt $max_ppn ]
  137. do
  138. let "current_hostid += 1"
  139. done
  140. }
  141. machinefile_add()
  142. {
  143. total_proc=$2
  144. # Iterate through all the possible binaries
  145. for component in ${components[@]}
  146. do
  147. binary="${component}_exe_file"
  148. exclusive="${component}_exc"
  149. # Check if the current binary matches the input executable
  150. if [ ./$(basename ${!binary}) = "$1" ]
  151. then
  152. ppn="${component}_ppn"
  153. # Exclusive mode: start allocation at the first empty node
  154. if [[ ${!exclusive} == "TRUE" ]]
  155. then
  156. while [ ${processes_hosts[$current_hostid]} -gt 0 ]
  157. do
  158. let "current_hostid += 1"
  159. done
  160. # Shared mode: start allocation in the first node with enough free cores
  161. # Notice that only the first node is checked
  162. # Then, if a previous binary had "exc=TRUE", allocation space is not ensure in subsequent nodes
  163. else
  164. current_hostid=0
  165. machinefile_find_available_node
  166. fi
  167. # Allocate ppn cores in each of the subsequent nodes till there are no more processes to assign
  168. count=0
  169. while [ ${total_proc} -gt 0 ]
  170. do
  171. if [ ${current_hostid} -ge ${#hosts[@]} ]
  172. then
  173. echo "Not enough computing nodes"
  174. exit 1
  175. fi
  176. current_hostname=${hosts[$current_hostid]}
  177. while [[ ${total_proc} -gt 0 && ${count} -lt ${!ppn} ]]
  178. do
  179. echo ${hosts[$current_hostid]} >> machinefile
  180. let "count += 1"
  181. let "processes_hosts[$current_hostid] += 1"
  182. let "total_proc -= 1" || true
  183. done
  184. if [ ${count} -eq ${!ppn} ]
  185. then
  186. let "current_hostid += 1"
  187. machinefile_find_available_node
  188. count=0
  189. fi
  190. done
  191. fi
  192. done
  193. }
  194. function launch()
  195. {
  196. cmd="mpirun"
  197. if [ "$use_machinefile" = "true" ]
  198. then
  199. cmd="mpirun -machinefile machinefile"
  200. machinefile_init
  201. fi
  202. while (( "$#" ))
  203. do
  204. # Get number of MPI ranks and executable name
  205. nranks=$1
  206. executable=./$(basename $2)
  207. if [ "$use_machinefile" = "true" ]
  208. then
  209. machinefile_add $executable $nranks
  210. fi
  211. shift
  212. shift
  213. cmd+=" -np $nranks $executable"
  214. # Add any arguments to executable
  215. while (( "$#" )) && [ "$1" != "--" ]
  216. do
  217. cmd+=" $1"
  218. shift
  219. done
  220. shift || true
  221. # Add colon of more executables follow
  222. (( "$#" )) && cmd+=" :"
  223. done
  224. $cmd
  225. }
  226. function finalise()
  227. {
  228. # This function should execute of any post run functionality, e.g.
  229. # platform dependent cleaning or a resubmit
  230. if ${resubmit_job} && [ $(date -d "${leg_end_date}" +%s) -lt $(date -d "${run_end_date}" +%s) ]
  231. then
  232. info "Resubmitting job for leg $((leg_number+1))"
  233. # Need to go to start_dir to find the run script
  234. cd ${start_dir}
  235. # Submit command
  236. # Note: This does not work if you explicitely specify a job name!
  237. # bsub -n ${SLURM_JOB_NUM_NODES} \
  238. # -w ${SLURM_JOB_ID} \
  239. # -oo ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
  240. # ${resubmit_opt} \
  241. # ${SLURM_JOB_NAME}
  242. fi
  243. }
  244. function run_node()
  245. {
  246. # This function launches a command once on each node used in the job, currently only used in ece-lsm.sh runcript
  247. mpirun -n $SLURM_JOB_NUM_NODES -ppn 1 -hosts `scontrol show hostname | paste -d, -s` bash -c "$1"
  248. }