import os import sys import glob import subprocess # List of variable to save for output PLASIM_VAR = ['tas','hfls','hfss'] OCEAN_VAR = ['heata','fldoa','sst'] LSG_VAR = ['fluxhea', 'tbound'] PLASIM_VAR = ['time','lat','lon'] + PLASIM_VAR OCEAN_VAR = ['time','lat','lon', 'ls'] + OCEAN_VAR LSG_VAR = ['time','lat','lon','lev', 'wet'] + LSG_VAR # check the python version if float(sys.version[:3]) <= 3.6: print("This script require Python >= 3.6 !") print("Try:") print("\n\tmodule load python3\n\n") print("and restart this script again.") print("Aborting...") sys.exit(1) #check if ecfs utils are loaded user = os.getenv('USER') try: dummy = subprocess.run(['els', 'ectmp:/'+user+'/'], check=True, stdout=subprocess.PIPE) except: print("This script require the ECFS toolchain to be loaded !") print("Try:") print("\n\tmodule load ecfs\n\n") print("and restart this script again.") print("Aborting...") sys.exit(1) #check if netcdf utils are loaded try: dummy = subprocess.run(['ncdump'], check=True, stderr=subprocess.PIPE) except: print("This script require NetCDF4 toolchain to be loaded !") print("Try:") print("\n\tmodule load netcdf4\n\n") print("and restart this script again.") print("Aborting...") sys.exit(1) home_dir = os.getenv("HOME") scratch_dir = os.getenv("SCRATCH") perm_dir = os.getenv("PERM") plasim_dir = home_dir + "/PLASIM/" if len(sys.argv) < 5: print('Bad arguments:', sys.argv) print('Usage:') print('\n\t python3 restart_ensemble_experiment.py where experiment ensemble_size number_of_years where_to_save\n') print('Arguments:\n') print('\twhere :\t\t\tWhere the experiment ensemble folders are located.') print('\texperiment :\t\tName of the experiment.') print('\tensemble_size :\t\tSize of the ensemble.') print('\tnumber_of_years :\tNumber of years simulated by one run of the experiment.') print('\twhere_to_save :\t\tOptional. Where to backup the previous run. If not provide, uses $PERM.') sys.exit(0) where = sys.argv[1] basedir = where.split('/')[-1] experiment = sys.argv[2] ensemble_size = int(sys.argv[3]) restart_year_to_save = sys.argv[4] try: save = sys.argv[5] except: save = perm_dir save_light = save + "/"+experiment+"_light/" save += "/"+experiment+"/" # Check if the experiment folder exists experiment_folder = where+"/"+experiment+"/" if not os.path.isdir(experiment_folder): print("Experiment folder not found!") print("Create and start the experiment "+experiment+" first.") print("Aborting...") sys.exit(1) # check if the experiment is still running queue = subprocess.run(['ssh', 'cca', '/opt/pbs/13.0.403.161593/bin/qstat', '-u', user], stdout=subprocess.PIPE, timeout=360) #if 'plasim_'+experiment[:3] in str(queue.stdout): # print("Experiment still running on cca, no need to restart!") # print("Aborting...") # sys.exit(1) queue = subprocess.run(['ssh', 'ccb', '/opt/pbs/13.0.403.161593/bin/qstat', '-u', user], stdout=subprocess.PIPE, timeout=360) #if 'plasim_'+experiment[:3] in str(queue.stdout): # print("Experiment still running on ccb, no need to restart!") # print("Aborting...") # sys.exit(1) #print('Saving the results of the previous run...') #os.system('mkdir -p '+save) #os.system('mkdir -p '+save_light) #os.system('mkdir -p '+scratch_dir+'/tmp/'+experiment) #for i in range(1, ensemble_size+1): # member_number = str(i).rjust(2, '0') # save_experiment_folder = save + 'run_'+experiment+'_'+member_number # save_experiment_folder_light = save_light + 'run_'+experiment+'_'+member_number # os.system('mkdir -p ' + save_experiment_folder) # # list all the past runs saved # past_run_list = os.listdir(save_experiment_folder) # if len(past_run_list) == 0: # past_run_list = [0] # else: # past_run_list = list(map(int, past_run_list)) # past_run_list.sort() # last_experiment_index = past_run_list[-1] # new_experiment_index = last_experiment_index + 1 # # temporarily move previous experiment run to scratch # if last_experiment_index > 0: # to_ecfs = save_experiment_folder+'/'+str(last_experiment_index) # to_light = save_experiment_folder_light+'/'+str(last_experiment_index) # os.system('mkdir -p '+scratch_dir+'/tmp/'+experiment + '/run_'+experiment+'_'+member_number) # os.system('rsync -a '+to_ecfs+' '+scratch_dir+'/tmp/'+experiment + '/run_'+experiment+'_'+member_number) # os.system('rm -rf '+to_ecfs) # os.system('rm -rf '+to_light) # # make the directory to save the result of the last experiment # dest = save_experiment_folder+'/'+str(new_experiment_index) # os.system('mkdir -p '+dest) # os.system('mkdir -p '+dest+'/restart/') # os.system('mkdir -p '+dest+'/output/') # # save the result of the last experiment # ensemble_member_folder = experiment_folder+'run_'+experiment+'_'+member_number # os.system('rsync -a '+ensemble_member_folder+'/output/* '+dest+'/output/') # os.system('rsync -a '+ensemble_member_folder+'/restart/kleiswi '+dest+'/restart/') # #os.system('rsync -a '+ensemble_member_folder+'/restart/*'+restart_year_to_save+' '+dest+'/restart/') # os.system('rsync -a '+ensemble_member_folder+'/restart/* '+dest+'/restart/') # # generating partial output files # dest_light = save_experiment_folder_light+'/'+str(new_experiment_index) # os.system('mkdir -p '+dest_light) # os.system('mkdir -p '+dest_light+'/restart/') # os.system('mkdir -p '+dest_light+'/output/') # os.system('rsync -a '+ensemble_member_folder+'/output/*.txt '+dest_light+'/output/') # os.system('rsync -a '+ensemble_member_folder+'/restart/kleiswi '+dest_light+'/restart/') # #os.system('rsync -a '+ensemble_member_folder+'/restart/*'+restart_year_to_save+' '+dest_light+'/restart/') # os.system('rsync -a '+ensemble_member_folder+'/restart/* '+dest_light+'/restart/') # nc_list = glob.glob(ensemble_member_folder+'/output/*PLA*.nc') # for infile in nc_list: # filename = infile.split('/')[-1] # os.system('nccopy -V '+','.join(PLASIM_VAR)+' '+infile+' '+dest_light+'/output/'+filename) # nc_list = glob.glob(ensemble_member_folder+'/output/*OCE*.nc') # for infile in nc_list: # filename = infile.split('/')[-1] # os.system('nccopy -V '+','.join(OCEAN_VAR)+' '+infile+' '+dest_light+'/output/'+filename) # nc_list = glob.glob(ensemble_member_folder+'/output/*LSG*.nc') # for infile in nc_list: # filename = infile.split('/')[-1] # os.system('nccopy -V '+','.join(LSG_VAR)+' '+infile+' '+dest_light+'/output/'+filename) # # #print('Backup of the previous run done !') # #if last_experiment_index > 0: # print('Creating the tar archive of the experiment run number '+str(last_experiment_index)) # print('and saving it in ECFS temporary storage...') # j = last_experiment_index # yts = int(restart_year_to_save) # # ## # os.system('cd '+scratch_dir+'/tmp/ && tar -c -f '+scratch_dir+'/tmp/plasim_'+experiment+'_years_'+str(yts*(j-1)+1)+'to'+str(yts*j)+'.tar '+experiment) # ## # # #os.system('tar -c -f '+scratch_dir+'/tmp/plasim_'+experiment+'_years_'+str(yts*(j-1)+1)+'to'+str(yts*j)+'.tar '+scratch_dir+'/tmp/'+experiment) # os.system('rm -rf '+scratch_dir+'/tmp/'+experiment+'/*') # queue = subprocess.run(['emkdir', '-p', 'ectmp:'+'/'+user+'/'+basedir+'/'+experiment], check=True) # os.system('ecp -t '+scratch_dir+'/tmp/plasim_'+experiment+'_years_'+str(yts*(j-1)+1)+'to'+str(yts*j)+'.tar '+ 'ectmp:'+'/'+user+'/'+basedir+'/'+experiment+'/') # print('Backup and move to ecfs done !') # print('Starting the ensemble runs...') for i in range(1, ensemble_size+1): member_number = str(i).rjust(2, '0') job_name = 'plasim_'+experiment+'_'+member_number ensemble_member_folder = experiment_folder+'run_'+experiment+'_'+member_number os.system('qsub '+ensemble_member_folder+'/PBS_'+job_name) print("Experiment '"+experiment+"' ensemble restarted.") print('Check the status with: qstat -u '+user)