csc-puhti-intel-intelmpi.cfg.tmpl 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. # Platform dependent configuration functions for the 'rhino' machine
  2. # (KNMI, NL)
  3. function configure()
  4. {
  5. # This function should configure all settings/modules needed to
  6. # later prepare the EC-Earth run directory and set variables used
  7. # in the run script
  8. module purge
  9. module load intel/18.0.5
  10. module load intel-mpi/18.0.5
  11. module load intel-mkl/2018.0.5
  12. module load hdf/4.2.13
  13. module load hdf5/1.10.4-mpi
  14. module load netcdf/4.7.0
  15. module load netcdf-fortran/4.4.4
  16. module load grib-api/1.24.0
  17. module load cdo
  18. # Configure paths for building/running EC-Earth
  19. export ecearth_src_dir=[[[PLT:ACTIVE:ECEARTH_SRC_DIR]]]
  20. export run_dir=[[[PLT:ACTIVE:RUN_DIR]]]
  21. export ini_data_dir=[[[PLT:ACTIVE:INI_DATA_DIR]]]
  22. # File for standard output.
  23. # NOTE: This will be modified for restart jobs!
  24. stdout_file=${start_dir}/out/$(basename ${SLURM_JOB_NAME}).out
  25. # Resubmit this job for automatic restarts? [true/false]
  26. # Also, add options for the resubmit command here.
  27. resubmit_job=[[[PLT:ACTIVE:RESUBMIT_JOB]]]
  28. resubmit_opt="[[[PLT:ACTIVE:RESUBMIT_OPT]]]"
  29. # Configure grib api paths
  30. export GRIB_DEFINITION_PATH=[[[PLT:ACTIVE:ECEARTH_SRC_DIR]]]/util/grib_table_126:[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_DEFINITION_SUBDIR]]]
  31. export GRIB_SAMPLES_PATH=[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_SAMPLES_SUBDIR]]]
  32. export GRIB_BIN_PATH=[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_BIN_SUBDIR]]]
  33. # Configure GRIBEX paths
  34. export LOCAL_DEFINITION_TEMPLATES=[[[PLT:ACTIVE:GRIBEX_DEFINITION_PATH]]]
  35. #export ECMWF_LOCAL_TABLE_PATH=[[[PLT:ACTIVE:GRIBEX_DEFINITION_PATH]]]
  36. # Configure number of processors per node
  37. proc_per_node=[[[PLT:ACTIVE:PROC_PER_NODE]]]
  38. # Configure and load modules
  39. pre_load_modules_cmd="[[[PLT:ACTIVE:PRE_LOAD_MODULES_CMD]]]"
  40. module_list="[[[PLT:ACTIVE:MODULE_LIST]]]"
  41. if [ -n "${module_list}" ]
  42. then
  43. set +u
  44. if [ -n "${pre_load_modules_cmd}" ]
  45. then
  46. ${pre_load_modules_cmd}
  47. fi
  48. for m in "${module_list}"
  49. do
  50. eval $(/usr/bin/modulecmd sh add $m)
  51. done
  52. set -u
  53. fi
  54. # Add directories to the shared library search path
  55. if [ -n "[[[PLT:ACTIVE:ADD_TO_LD_LIBRARY_PATH]]]" ]
  56. then
  57. export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}"[[[PLT:ACTIVE:ADD_TO_LD_LIBRARY_PATH]]]"
  58. fi
  59. echo $LD_LIBRARY_PATH
  60. ulimit -s unlimited
  61. ulimit -n 2048
  62. ulimit -c unlimited
  63. #ulimit -a
  64. }
  65. function launch_atos()
  66. {
  67. # version using srun
  68. # banner launch
  69. cmd="srun --kill-on-bad-exit=1"
  70. export I_MPI_PMI_LIBRARY=/usr/lib64/libpmi.so
  71. export I_MPI_FAST_COLLECTIVES=1
  72. export I_MPI_EXTRA_FILESYSTEM=on
  73. export I_MPI_EXTRA_FILESYSTEM_LIST=lustre
  74. export OMP_PROC_BIND=true
  75. export KMP_AFFINITY=verbose,compact,granularity=fine
  76. export KMP_AFFINITY=compact,granularity=fine
  77. export PMI_TIME=10
  78. export MKL_NUM_THREADS=1
  79. export OMP_STACKSIZE=256m
  80. export MXM_LOG_LEVEL=ERROR
  81. export OMPI_MCA_hwloc_base_binding_policy=none
  82. CONF_FILE=$1
  83. NODESET=$2
  84. NBTASKS=$3
  85. BINDING=$4
  86. export OMP_NUM_THREADS=$5
  87. export TIME="launch timing : %e elapsed %U user %S system"
  88. NBNODES=`nodeset -c $NODESET`
  89. ls /usr/bin
  90. #/usr/bin/time
  91. $cmd --nodes=$NBNODES --nodelist=$NODESET --ntasks=$NBTASKS --distribution=block --cpu_bind=$BINDING -l --multi-prog $CONF_FILE
  92. }
  93. function launch()
  94. {
  95. # version using srun
  96. echo $#
  97. #cmd="srun "
  98. _task1=-1
  99. NBTASKS=0
  100. rm -rf conf.txt
  101. while (( "$#" ))
  102. do
  103. nranks=$1
  104. executable=./$(basename $2)
  105. shift
  106. shift
  107. _task0=$((_task1+1))
  108. _task1=$((_task0+nranks-1))
  109. cmd="${_task0}-${_task1} ${executable}"
  110. NBTASKS=$((NBTASKS+nranks))
  111. while (( "$#" )) && [ "$1" != "--" ]
  112. do
  113. cmd+=" $1"
  114. shift
  115. done
  116. echo ${cmd} >>conf.txt
  117. shift || true
  118. done
  119. export CONF_FILE=conf.txt
  120. #cat $CONF_FILE
  121. export OMP_NUM_THREADS=1
  122. srun --multi-prog $CONF_FILE
  123. }
  124. function finalise()
  125. {
  126. # This function should execute of any post run functionality, e.g.
  127. # platform dependent cleaning or a resubmit
  128. if ${resubmit_job} && [ $(date -d "${leg_end_date}" +%s) -lt $(date -d "${run_end_date}" +%s) ]
  129. then
  130. info "Resubmitting job for leg $((leg_number+1))"
  131. cd ${start_dir}
  132. sbatch ${SLURM_JOB_NAME}
  133. # # Need to go to start_dir to find the run script
  134. # cd ${start_dir}
  135. # # Submit command
  136. # # Note: This does not work if you specify a job name with sbatch -J jobname!
  137. # sbatch -N ${SLURM_JOB_NUM_NODES} \
  138. # -n $((ifs_numproc + nem_numproc + tm5_numproc)) \
  139. # --exclusive \
  140. # --ntasks-per-node=${proc_per_node} \
  141. # -o ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
  142. # -e ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
  143. # -d ${SLURM_JOB_ID} \
  144. # ${resubmit_opt} \
  145. # ./${SLURM_JOB_NAME}
  146. fi
  147. }
  148. function postprocess()
  149. {
  150. # This function submits a script to postprocess the IFS output
  151. data_dir=`pwd`/$1
  152. script_file="$1/postprocess"
  153. cat >"${script_file}" <<EOF
  154. #! /bin/bash
  155. cd "${start_dir}/../../../postprocessing"
  156. ./postprocess_leg "${data_dir}" "${data_dir}/postprocessed"
  157. EOF
  158. sbatch -N 1 --exclusive \
  159. -o ${data_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
  160. -e ${data_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
  161. "${script_file}"
  162. }