bsc-marenostrum4.cfg.tmpl 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. # Platform dependent configuration functions for MareNostrum
  2. #(mnX.bsc.es)
  3. function configure()
  4. {
  5. # This function should configure all settings/modules needed to
  6. # later prepare the EC-Earth run directory and set variables used
  7. # in the run script
  8. # SCRATCH is not defined in MN3, define it here
  9. # and also make sure it is defined when compiling
  10. export SCRATCH=/gpfs/scratch/`id -gn`/${USER}
  11. # Configure paths for building/running EC-Earth
  12. ecearth_src_dir=[[[PLT:ACTIVE:ECEARTH_SRC_DIR]]]
  13. run_dir=[[[PLT:ACTIVE:RUN_DIR]]]
  14. ini_data_dir=[[[PLT:ACTIVE:INI_DATA_DIR]]]
  15. # File for standard output.
  16. # NOTE: This will be modified for restart jobs!
  17. stdout_file=${start_dir}/out/$(basename ${SLURM_JOB_NAME}).out
  18. # Resubmit this job for automatic restarts? [true/false]
  19. # Also, add options for the resubmit command here.
  20. resubmit_job=[[[PLT:ACTIVE:RESUBMIT_JOB]]]
  21. resubmit_opt="[[[PLT:ACTIVE:RESUBMIT_OPT]]]"
  22. # Configure grib api paths
  23. export GRIB_DEFINITION_PATH=[[[PLT:ACTIVE:ECEARTH_SRC_DIR]]]/util/grib_table_126:[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_DEFINITION_SUBDIR]]]
  24. export GRIB_SAMPLES_PATH=[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_SAMPLES_SUBDIR]]]
  25. export GRIB_BIN_PATH=[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_BIN_SUBDIR]]]
  26. # Configure number of processors per node
  27. proc_per_node=[[[PLT:ACTIVE:PROC_PER_NODE]]]
  28. # Configure and load modules
  29. pre_load_modules_cmd="[[[PLT:ACTIVE:PRE_LOAD_MODULES_CMD]]]"
  30. module_list="[[[PLT:ACTIVE:MODULE_LIST]]]"
  31. if [ -n "${module_list}" ]
  32. then
  33. set +eu
  34. if [ -n "${pre_load_modules_cmd}" ]
  35. then
  36. ${pre_load_modules_cmd}
  37. fi
  38. for m in "${module_list}"
  39. do
  40. module add $m
  41. done
  42. set -eu
  43. fi
  44. # Add directories to the shared library search path
  45. if [ -n "[[[PLT:ACTIVE:ADD_TO_LD_LIBRARY_PATH]]]" ]
  46. then
  47. export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}"[[[PLT:ACTIVE:ADD_TO_LD_LIBRARY_PATH]]]"
  48. fi
  49. # Use machinefiles or not
  50. [[ `echo "$use_machinefile" | tr '[:upper:]' '[:lower:]'` == true ]] && use_machinefile=true || use_machinefile=false
  51. ulimit -s unlimited
  52. # Load specific IMPI environment configuration
  53. configure_impi
  54. }
  55. function configure_python()
  56. {
  57. # specific for python+eccodes setup - used for OSM pre/post-processing
  58. # it would be simple to do the following in configure
  59. # module load eccodes/2.8.0 python/2.7.13
  60. module load eccodes/2.8.0 python/2.7.13
  61. unset GRIB_DEFINITION_PATH
  62. unset GRIB_SAMPLES_PATH
  63. unset GRIB_BIN_PATH
  64. export GRIB_BIN_PATH=/apps/ECCODES/2.8.0/INTEL/bin
  65. }
  66. function configure_impi()
  67. {
  68. [ -z "${OMP_NUM_THREADS-}" ] && export OMP_NUM_THREADS=1
  69. export I_MPI_DEBUG=5
  70. }
  71. function get_hosts()
  72. {
  73. # This function uses a scheduler command to get the hosts allocated for the current job
  74. hosts=(`scontrol show hostname | paste -s`)
  75. }
  76. function machinefile_config()
  77. {
  78. # User configuration starts here
  79. # hard-coded c4mip configurations, must use the proper _numproc settings
  80. if has_config ifs nemo pisces rnfmapper xios lpjg ; then
  81. if ! has_config tm5 ; then
  82. ifs_ppn=48 ; [[ ${ifs_numproc} != 336 ]] && info "wrong numproc setting for ifs in machinefile_config" || true
  83. nem_ppn=43 ; [[ ${nem_numproc} != 380 ]] && info "wrong numproc setting for nemo in machinefile_config" || true
  84. xio_ppn=5 ; [[ ${xio_numproc} != 5 ]] && info "wrong numproc setting for xios in machinefile_config" || true
  85. lpjg_ppn=5 ; [[ ${lpjg_numproc} != 40 ]] && info "wrong numproc setting for lpjg in machinefile_config" || true
  86. else
  87. ifs_ppn=48 ; [[ ${ifs_numproc} != 256 ]] && info "wrong numproc setting for ifs in machinefile_config" || true
  88. nem_ppn=46 ; [[ ${nem_numproc} != 192 ]] && info "wrong numproc setting for nemo in machinefile_config" || true
  89. xio_ppn=2 ; [[ ${xio_numproc} != 2 ]] && info "wrong numproc setting for xios in machinefile_config" || true
  90. lpjg_ppn=2 ; [[ ${lpjg_numproc} != 8 ]] && info "wrong numproc setting for lpjg in machinefile_config" || true
  91. tm5_ppn=4 ; [[ ${tm5_numproc} != 4 ]] && info "wrong numproc setting for tm5 in machinefile_config" || true
  92. fi
  93. else
  94. # Add any new exclusive binary here
  95. ifs_exc=TRUE
  96. lpjg_exc=TRUE
  97. tm5_exc=TRUE
  98. # Modify the allocation to each binary using more than one process here
  99. ifs_ppn=48
  100. if has_config nemo && [ "$nem_grid" == "ORCA025L75" ] ; then
  101. nem_ppn=47
  102. xio_ppn=1
  103. else
  104. nem_exc=TRUE
  105. xio_exc=TRUE
  106. nem_ppn=48
  107. xio_ppn=48
  108. fi
  109. lpjg_ppn=48
  110. tm5_ppn=45
  111. fi
  112. }
  113. function machinefile_init()
  114. {
  115. # Get max processes per node from the platform variable
  116. max_ppn=$proc_per_node
  117. components=( ifs nem xio rnf amip lpjg )
  118. if $(has_config tm5)
  119. then
  120. components=( "${components[@]}" "tm5" )
  121. fi
  122. for component in ${components[@]}
  123. do
  124. eval ${component}_exc=FALSE
  125. eval ${component}_ppn=1
  126. done
  127. # Call user configuration and get_host functions
  128. machinefile_config
  129. get_hosts
  130. # Declare array to store the processes as they are assigned
  131. declare -a -g processes_hosts
  132. for n in `seq 0 ${#hosts[@]}`
  133. do
  134. processes_hosts[$n]=0
  135. done
  136. > machinefile
  137. current_hostid=0
  138. }
  139. machinefile_find_available_node()
  140. {
  141. while [ $((${processes_hosts[$current_hostid]} + ${!ppn})) -gt $max_ppn ]
  142. do
  143. let "current_hostid += 1"
  144. done
  145. }
  146. machinefile_add()
  147. {
  148. total_proc=$2
  149. # Iterate through all the possible binaries
  150. for component in ${components[@]}
  151. do
  152. binary="${component}_exe_file"
  153. exclusive="${component}_exc"
  154. # Check if the current binary matches the input executable
  155. if [ ./$(basename ${!binary}) = "$1" ]
  156. then
  157. ppn="${component}_ppn"
  158. # Exclusive mode: start allocation at the first empty node
  159. if [[ ${!exclusive} == "TRUE" ]]
  160. then
  161. while [ ${processes_hosts[$current_hostid]} -gt 0 ]
  162. do
  163. let "current_hostid += 1"
  164. done
  165. # Shared mode: start allocation in the first node with enough free cores
  166. # Notice that only the first node is checked
  167. # Then, if a previous binary had "exc=TRUE", allocation space is not ensure in subsequent nodes
  168. else
  169. current_hostid=0
  170. machinefile_find_available_node
  171. fi
  172. # Allocate ppn cores in each of the subsequent nodes till there are no more processes to assign
  173. count=0
  174. while [ ${total_proc} -gt 0 ]
  175. do
  176. if [ ${current_hostid} -ge ${#hosts[@]} ]
  177. then
  178. echo "Not enough computing nodes"
  179. exit 1
  180. fi
  181. current_hostname=${hosts[$current_hostid]}
  182. while [[ ${total_proc} -gt 0 && ${count} -lt ${!ppn} ]]
  183. do
  184. echo ${hosts[$current_hostid]} >> machinefile
  185. let "count += 1"
  186. let "processes_hosts[$current_hostid] += 1"
  187. let "total_proc -= 1" || true
  188. done
  189. if [ ${count} -eq ${!ppn} ]
  190. then
  191. let "current_hostid += 1"
  192. machinefile_find_available_node
  193. count=0
  194. fi
  195. done
  196. fi
  197. done
  198. }
  199. function launch()
  200. {
  201. cmd="mpirun"
  202. if [ "$use_machinefile" = "true" ]
  203. then
  204. cmd="mpirun -machinefile machinefile"
  205. machinefile_init
  206. fi
  207. while (( "$#" ))
  208. do
  209. # Get number of MPI ranks and executable name
  210. nranks=$1
  211. executable=./$(basename $2)
  212. if [ "$use_machinefile" = "true" ]
  213. then
  214. machinefile_add $executable $nranks
  215. fi
  216. shift
  217. shift
  218. cmd+=" -np $nranks $executable"
  219. # Add any arguments to executable
  220. while (( "$#" )) && [ "$1" != "--" ]
  221. do
  222. cmd+=" $1"
  223. shift
  224. done
  225. shift || true
  226. # Add colon of more executables follow
  227. (( "$#" )) && cmd+=" :"
  228. done
  229. $cmd
  230. }
  231. function finalise()
  232. {
  233. # This function should execute of any post run functionality, e.g.
  234. # platform dependent cleaning or a resubmit
  235. if ${resubmit_job} && [ $(date -d "${leg_end_date}" +%s) -lt $(date -d "${run_end_date}" +%s) ]
  236. then
  237. info "Resubmitting job for leg $((leg_number+1))"
  238. # Need to go to start_dir to find the run script
  239. cd ${start_dir}
  240. # Submit command
  241. # Note: This does not work if you explicitely specify a job name!
  242. # bsub -n ${SLURM_JOB_NUM_NODES} \
  243. # -w ${SLURM_JOB_ID} \
  244. # -oo ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
  245. # ${resubmit_opt} \
  246. # ${SLURM_JOB_NAME}
  247. fi
  248. }
  249. function run_node()
  250. {
  251. # This function launches a command once on each node used in the job, currently only used in ece-lsm.sh runcript
  252. mpirun -n $SLURM_JOB_NUM_NODES -ppn 1 -hosts `scontrol show hostname | paste -d, -s` bash -c "$1"
  253. }