surfsara-cartesius.cfg.tmpl 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. # Platform dependent configuration functions for the 'neuron' machine
  2. # (KNMI, NL)
  3. #hostlist="${HOME}/bin/ExpandNodeList -r -p 1"
  4. function configure()
  5. {
  6. # This function should configure all settings/modules needed to
  7. # later prepare the EC-Earth run directory and set variables used
  8. # in the run script
  9. # Configure paths for building/running EC-Earth
  10. export ecearth_src_dir=[[[PLT:ACTIVE:ECEARTH_SRC_DIR]]]
  11. export run_dir=[[[PLT:ACTIVE:RUN_DIR]]]
  12. export ini_data_dir=[[[PLT:ACTIVE:INI_DATA_DIR]]]
  13. # File for standard output.
  14. # NOTE: This will be modified for restart jobs!
  15. stdout_file=${start_dir}/out/$(basename ${SLURM_JOB_NAME}).out
  16. # Resubmit this job for automatic restarts? [true/false]
  17. # Also, add options for the resubmit command here.
  18. resubmit_job=[[[PLT:ACTIVE:RESUBMIT_JOB]]]
  19. resubmit_opt="[[[PLT:ACTIVE:RESUBMIT_OPT]]]"
  20. # Configure grib api paths
  21. export GRIB_DEFINITION_PATH=[[[PLT:ACTIVE:ECEARTH_SRC_DIR]]]/util/grib_table_126:[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_DEFINITION_SUBDIR]]]
  22. export GRIB_SAMPLES_PATH=[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_SAMPLES_SUBDIR]]]
  23. export GRIB_BIN_PATH=[[[PLT:ACTIVE:GRIBAPI_BASE_DIR]]]/[[[PLT:ACTIVE:GRIBAPI_BIN_SUBDIR]]]
  24. # Configure number of processors per node
  25. proc_per_node=[[[PLT:ACTIVE:PROC_PER_NODE]]]
  26. # Configure and load modules
  27. pre_load_modules_cmd="[[[PLT:ACTIVE:PRE_LOAD_MODULES_CMD]]]"
  28. module_list="[[[PLT:ACTIVE:MODULE_LIST]]]"
  29. if [ -n "${module_list}" ]
  30. then
  31. set +u
  32. if [ -n "${pre_load_modules_cmd}" ]
  33. then
  34. ${pre_load_modules_cmd}
  35. fi
  36. for m in "${module_list}"
  37. do
  38. eval $(/usr/libexec/cmod sh add $m)
  39. done
  40. set -u
  41. fi
  42. # Add directories to the shared library search path
  43. if [ -n "[[[PLT:ACTIVE:ADD_TO_LD_LIBRARY_PATH]]]" ]
  44. then
  45. export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}"[[[PLT:ACTIVE:ADD_TO_LD_LIBRARY_PATH]]]"
  46. fi
  47. #hostname
  48. ulimit -s unlimited
  49. ulimit -n 2048
  50. #ulimit -a
  51. }
  52. function launch()
  53. {
  54. #version using MPMD MPI job script as described on SURFSARA/Cartesius website
  55. set -x
  56. cmd="srun -l --multi-prog mfile" #command given at the end of launch
  57. # MYARG="strace -ostrace-out.$SLURM_PROCID -k -eexit_group ./appl-tm5.x --multi-prog mfile"
  58. # cmd="srun bash -c \"$MYARG\"" #command given at the end of launch
  59. rm -f mfile #removing old mfile
  60. proc_begin="0"
  61. while (( "$#" ))
  62. do
  63. nranks=$1
  64. if [ $2 = "strace" ]; then
  65. executable="$2 $3 $4 $5 ./$(basename $6)"
  66. shift
  67. shift
  68. shift
  69. shift
  70. else
  71. executable=./$(basename $2)
  72. fi
  73. shift
  74. shift
  75. proc_end="$(($proc_begin+$nranks-1))"
  76. mfile_line=""
  77. mfile_line+="$proc_begin-$proc_end $executable"
  78. proc_begin="$(($proc_end+1))"
  79. while (( "$#" )) && [ "$1" != "--" ]
  80. do
  81. mfile_line+=" $1"
  82. shift
  83. done
  84. shift || true
  85. echo "$mfile_line" >> mfile
  86. done
  87. # echo 'strace -k -eexit_group -ostrace.out ./appl-tm5.x' >> mfile
  88. # echo 'pretty_print_strace_out.py --tree strace.out' >> mfile
  89. # strace -k -eexit_group -ostrace.out ./appl-tm5.x
  90. # pretty_print_strace_out.py --tree strace.out
  91. $cmd
  92. set +x
  93. # set -x
  94. # # version using the hydra process manager
  95. # cmd="mpiexec.hydra -bootstrap slurm -machinefile mfile"
  96. #
  97. # rm -f mfile
  98. #
  99. # nodes="$(scontrol show hostname $SLURM_NODELIST)"
  100. # #"$(nodeset -e $SLURM_NODELIST)"
  101. #
  102. # while (( "$#" ))
  103. # do
  104. # nranks=$1
  105. # executable=./$(basename $2)
  106. # shift
  107. # shift
  108. #
  109. # cmd+=" -n $nranks $executable"
  110. #
  111. # while (( "$#" )) && [ "$1" != "--" ]
  112. # do
  113. # cmd+=" $1"
  114. # shift
  115. # done
  116. # shift || true
  117. #
  118. # for node in $nodes
  119. # do
  120. # (( n = proc_per_node<nranks?proc_per_node:nranks ))
  121. # #echo "$node:$n" >> mfile
  122. # for ((i = 1; i<=n ; i++));do echo "$node" >> mfile; done
  123. # (( nranks -= n )) || break
  124. # done
  125. #
  126. # nodes=$(echo $nodes | sed "s/.*$node[[:space:]]*//")
  127. # (( "$#" )) && cmd+=" :"
  128. # done
  129. #
  130. # export OMP_NUM_THREADS=1
  131. # # MPIEXEC_START_PROTOCOL=pmi
  132. # # I_MPI_PMI_EXTENSIONS=on
  133. # # I_MPI_FABRICS=shm:ofa
  134. # # export I_MPI_DEBUG=3
  135. # $cmd
  136. # set +x
  137. }
  138. function finalise()
  139. {
  140. # This function should execute of any post run functionality, e.g.
  141. # platform dependent cleaning or a resubmit
  142. if ${resubmit_job} && [ $(date -d "${leg_end_date}" +%s) -lt $(date -d "${run_end_date}" +%s) ]
  143. then
  144. info "Resubmitting job for leg $((leg_number+1))"
  145. # Need to go to start_dir to find the run script
  146. cd ${start_dir}
  147. # Submit command
  148. # Note: This does not work if you specify a job name with sbatch -J jobname!
  149. sbatch -N ${SLURM_JOB_NUM_NODES} \
  150. -n $((ifs_numproc + nem_numproc + tm5_numproc)) \
  151. --exclusive \
  152. --ntasks-per-node=${proc_per_node} \
  153. -o ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
  154. -e ${run_dir}/$(basename ${stdout_file}).$(printf %03d $((leg_number+1))) \
  155. -d ${SLURM_JOB_ID} \
  156. ${resubmit_opt} \
  157. ./${SLURM_JOB_NAME}
  158. fi
  159. }