ソースを参照

Adding feature: restart from another job

Barriat 7 年 前
コミット
1e7ae4a409

+ 4 - 3
nemo/ARCH/arch-intel_ELIC.fcm

@@ -9,13 +9,14 @@
 %CPP                 fpp
 
 %FC                  mpiifort
-%FCFLAGS             -O3 -i4 -r8 -no-prec-div
+#%FCFLAGS             -O3 -i4 -r8 -no-prec-div
+%FCFLAGS             -g -traceback -O1 -i4 -r8 -no-prec-div
 %FFLAGS              %FCFLAGS
 
-%FPPFLAGS            -P -C
+%FPPFLAGS            -P -C -traditional
 
 %LD                  mpiifort
-%LDFLAGS             
+%LDFLAGS             -g -traceback
 
 %AR                  ar
 %ARFLAGS             curv

+ 34 - 10
templates/scripts/skeleton.sh

@@ -40,8 +40,7 @@ function leap_days()
 
 [[ $@ == *verbose* ]] && set -x
 
-#module load "${module_list:?}"
-module load ${module_list}
+module load "${module_list:?}"
 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}"${extralibs_list}"
 
 #
@@ -50,7 +49,34 @@ export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}"${extralibs_list}
 # Copy XIOS XML files into run directory
 # Copy namelist files into run directory
 #
-mkdir -p "${run_dir:?}"
+if [ ! -d ${run_dir:?} ]
+then
+     mkdir -p ${run_dir}
+     if $special_restart
+     then
+          rsync -av --delete ${run_dir}/../${special_restart_from}/ --exclude log --exclude output --exclude restart --exclude="${special_restart_from}_*" --exclude="ocean*" --exclude="restart_*" --exclude="debug.*" --exclude="output.*" ${run_dir}
+          cp -f ${nem_exe_file} ${run_dir}
+          cp -f ${xio_exe_file} ${run_dir}
+          special_year=${special_restart_date:0:4}
+          sed -i "/$special_year/q" ${run_dir}/${info_file}
+          . ${run_dir}/${info_file}
+          special_restart_leg=$(printf %03d $((leg_number+1)))
+          # PUT HERE THE INSTRUCTIONS TO COPY THE restart files
+          cd ${run_dir}/../../archive/${special_restart_from}/restart/${special_restart_leg}
+          for f in *.nc; do
+              nf=${exp_name}${f:4}
+              cp $f ${run_dir}/$nf
+          done
+          cd -
+          cd ${run_dir}
+          for f in ${exp_name}_????????_restart_???_????.nc; do
+              nf=${f:14}
+              ln -s $f $nf
+          done
+          cd -
+     fi
+fi
+cd ${start_dir}
 cp -u xios_config/*xml "${run_dir}"
 cp -u namelists/* "${run_dir}"
 cd "${run_dir}"
@@ -68,7 +94,7 @@ cp -u "${xio_exe_file:?}" .
 for file in "${ic_files[@]}"; do 
     [[ ! -e ${file#*> } ]] && ln -sf $(sed 's/ *=> */ /' <<< "${ini_data_dir}/${ic_subdir}/${nem_grid}/$file") 
 done
-for file in "${forcing_files[@]}"; do 
+for file in "${forcing_files[@]}"; do
     [[ ! -e ${file#*> } ||  "$file" == \** ]] && ln -sf $(sed 's/ *=> */ /' <<< "${ini_data_dir}/${forcing_subdir}/${nem_forcing_set}/$file")
 done
 for file in "${shared_files[@]}"; do
@@ -224,6 +250,7 @@ do
       echo "leg_end_date=\"${leg_end_date}\""
     } | tee -a "${info_file}"
 
+    special_restart=false
 done
 #
 # Move back to submission directory
@@ -233,16 +260,13 @@ cd - >/dev/null
 # Check whether there is some work left to do in a further job
 #
 [[ $@ == *noresubmit* ]] && exit 0
-if (( leg_end_epoch < run_end_epoch )) ;
-then
+if (( leg_end_epoch < run_end_epoch )) ; then
     echo "Leg end earlier than end of simulation."
     echo "Submitting another job."
     #[[ $@ == *local* ]] && exec $0 $@ || scontrol requeue $SLURM_JOB_ID #TODO: factorize this
-    if [[ "$@" == *local* ]] 
-    then
+    if [ "$@" == *local* ] ; then
         exec "$0" "$@"
-    elif [[ "${submit_command}" == *sbatch* ]]
-    then
+    elif [ "${submit_command}" == *sbatch* ] ; then
         "${submit_command}" "$0" "$@" | awk '{print $4}' >> "${run_dir}"/.coral_jobs
     else
         "${submit_command}" -v PBS_OPTIONS="$@" "$0" | tee -a coral_jobs

+ 10 - 0
templates/zenobe_xios-1.0/experiment.cfg

@@ -12,6 +12,11 @@ rst_freq="1 year"
 # Number of restart legs to be run in one go
 run_num_legs=29
 
+# Special restart
+special_restart=false
+special_restart_from=EXP0
+special_restart_date="1959-01-01"
+
 # Parameters
 nem_time_step_sec=3600
 lim_time_step_sec=21600
@@ -20,3 +25,8 @@ nem_config_name=ORCA1_LIM3_REF
 
 # This file is used to store information about restarts
 info_file="nemo.info"
+
+# Paths
+start_dir=${PBS_O_WORKDIR-$PWD}
+run_dir="/SCRATCH/acad/limhr/$USER/nemo/run/${exp_name}"
+archive_dir="/SCRATCH/acad/limhr/$USER/nemo/archive/${exp_name}"

+ 2 - 6
templates/zenobe_xios-1.0/job.cfg

@@ -5,7 +5,6 @@ PBS -r y
 PBS -W group_list=limhr
 PBS -l walltime=17:00:00
 PBS -l select=11:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1
-
 #PBS -M ${email}
 #PBS -m abe
 
@@ -19,12 +18,9 @@ echo "queue : $PBS_O_QUEUE"
 echo "user : $PBS_O_LOGNAME"
 echo "threads : $OMP_NUM_THREADS"
 
-start_dir=${PBS_O_WORKDIR-$PWD}
-run_dir="/SCRATCH/acad/limhr/$USER/nemo/run/${exp_name}"
-archive_dir="/SCRATCH/acad/limhr/$USER/nemo/archive/${exp_name}"
-
 # Cancel job on error
 set -ueo pipefail
 
 # RESET script arguments
-set -- $PBS_OPTIONS 
+NOTHING=""
+set -- ${PBS_OPTIONS-$NOTHING}