lucia_lite.py 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792
  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib as mpl
  4. import matplotlib.pyplot as plt
  5. import glob
  6. import os
  7. import time
  8. import sys
  9. import math
  10. import argparse
  11. mpl.use('Agg')
  12. def main():
  13. # Set path to data and folders to save the results of Lucia
  14. print(os.getcwd())
  15. data_path = './'
  16. csv_path = './lucia_lite_results/'
  17. plots_path = './lucia_lite_plots/'
  18. parser = argparse.ArgumentParser(description="Balance tool for BSC")
  19. parser.add_argument('method', type=str, help='method used (average or minmax)')
  20. parser.add_argument('ignore_init_ts', type=int, help='number of coupling steps to ignore at the beginning of the '
  21. 'simulation')
  22. parser.add_argument('ignore_end_ts', type=int, help='number of coupling steps to ignore at the end of the '
  23. 'simulation')
  24. parser.add_argument('num_ts_print', type=int, help='number of coupling steps to print individually (it is '
  25. 'recommended to use values between 32 and 128')
  26. args = parser.parse_args()
  27. if not data_path[-1] == '/':
  28. data_path += '/'
  29. # Check data_path is correct
  30. lucia_logs = data_path + 'lucia.01.000000'
  31. if not os.path.isfile(lucia_logs):
  32. print("No input data provided. Current data_path: " + data_path)
  33. return 0
  34. # Specify if what method you want to use (string): "average" (default) or "minmax"
  35. print("Method used: %s" % args.method)
  36. # Specify the number of coupling steps that will be considered as init and end, therefore, ignored for the study
  37. # Natural number
  38. print("Removing first %i and last %i coupling steps of the execution" % (args.ignore_init_ts, args.ignore_end_ts))
  39. # Number of coupling steps to print for NEMO and IFS (hint: use between 32 and 128)
  40. # Natural number
  41. print("%d timesteps will be analysed" % args.num_ts_print)
  42. # Components that will not be taken into account for the analysis ( should look like: ["AMIPFORC", "RNFMAP"] )
  43. # Mapping of the binary names in EC-Earth (runoffi mapper -> RNFMAP, AMIP forcing -> AMIPFORC,
  44. # XIOS -> xios.x, NEMO -> oceanx, IFS -> ATMIFS, LPJG -> lpjg, TM5 -> ctm5mp)
  45. components_to_ignore = ["RNFMAP", "AMIPFORC"]
  46. print("Ignoring components: ", components_to_ignore)
  47. # Create directories where outputs will be stored. Skip if already exist.
  48. os.makedirs(csv_path, exist_ok=True)
  49. os.makedirs(plots_path, exist_ok=True)
  50. # Get the number of components used
  51. number_of_components = len(glob.glob(data_path + "lucia.??.000000"))
  52. # Variables declaration
  53. cpl_model_names = []
  54. field_frames = {}
  55. n_proc = {}
  56. n_files_read_cpl = {}
  57. final_field_results = {}
  58. all_component_dependencies = {}
  59. total_dependencies = {}
  60. number_cpl_ts = {}
  61. number_ts_executed = {}
  62. data = {}
  63. fields = {}
  64. component_ids = {}
  65. ts_ids = {}
  66. timesteps = {}
  67. jitter = {}
  68. # Get information from all components by reading only the master file
  69. for component_number in range(1, number_of_components + 1):
  70. component_id = "%02d" % component_number
  71. # Get component name and number of ts executed from the master
  72. component_name, fields_df, number_ts, ts, n_proc[component_name] = get_info_master(component_id, data_path)
  73. # Skyp components that do not communicate with OASIS
  74. if component_name in components_to_ignore:
  75. continue
  76. if number_ts == 0:
  77. print("Component %s does not communicate with OASIS but it is taken into account for the CHPSY. "
  78. "If added to components_to_ignore parameter, it wont be used." % component_name)
  79. continue
  80. # Store the number of coupling steps, model name, component_id and coupling steps ids only if the component
  81. # communicates with OASIS
  82. component_ids[component_name] = component_id
  83. cpl_model_names.append(component_name)
  84. fields[component_name] = fields_df
  85. number_ts_executed[component_name] = number_ts
  86. ts_ids[component_name] = ts
  87. # The component that communicates more frequently with OASIS is used as a base component to check that the
  88. # parameters to ignore coupling steps (ignore_init_ts and ignore_end_ts) are feasible and to set the first and last
  89. # coupling steps ids of the coupled execution
  90. most_frequent_coupled_component = max(number_ts_executed, key=number_ts_executed.get)
  91. if number_ts_executed[most_frequent_coupled_component] <= (args.ignore_init_ts + args.ignore_end_ts):
  92. sys.exit("\nERROR: more coupling steps to ignore than simulated! Check ignore_init_ts and ignore_end_ts values.")
  93. first_ts_id = ts_ids[most_frequent_coupled_component][args.ignore_init_ts]
  94. last_ts_id = ts_ids[most_frequent_coupled_component][-(args.ignore_end_ts + 1)]
  95. print("The max number of coupling steps executed is %d by component %s."
  96. % (number_ts_executed[most_frequent_coupled_component], most_frequent_coupled_component))
  97. print("Only coupling steps between %d and %d (%s , %s) are taken into account for the study. "
  98. % (first_ts_id, last_ts_id, pd.to_timedelta(first_ts_id, unit='s'), pd.to_timedelta(last_ts_id, unit='s')))
  99. if (args.ignore_init_ts + args.num_ts_print) >= number_ts_executed[most_frequent_coupled_component]:
  100. sys.exit("\nERROR: More coupling steps to analyze than the ones executed. Try reducing 'num_ts_print'.")
  101. # Get number of coupling steps executed by the Master process and the coupling steps that will be studied in detail
  102. # and shown as a plot.
  103. all_possible_ts = []
  104. for component in cpl_model_names:
  105. valid_ts = ts_ids[component][(ts_ids[component] >= first_ts_id) & (ts_ids[component] <= last_ts_id)]
  106. number_cpl_ts[component] = len(valid_ts)
  107. all_possible_ts = list(set(all_possible_ts) | set(valid_ts))
  108. all_possible_ts.sort()
  109. ts_ids_to_study = all_possible_ts[:args.num_ts_print]
  110. total_n_proc_cpl_components = 0
  111. # Read OASIS log files
  112. for component in cpl_model_names:
  113. # Read data form lucia files
  114. print("\nReading %s: " % component, end='')
  115. data_df, num_files, num_files_cpl = read_data(component_ids[component], first_ts_id,
  116. last_ts_id, data_path)
  117. print("%s has executed %d coupling steps. Only %d are taken into account for the study"
  118. % (component, number_ts_executed[component], number_cpl_ts[component]))
  119. print("%s has used %d MPI processes. Only %d are used for the study"
  120. % (component, n_proc[component], num_files_cpl))
  121. # Store data read from files. Calculated the number of files read and the number of files that actually
  122. # communicate with OASIS
  123. total_n_proc_cpl_components += n_proc[component]
  124. n_files_read_cpl[component] = num_files_cpl
  125. data[component] = data_df
  126. # Store fields information
  127. for component in cpl_model_names:
  128. # DataFrame to store the Fields. Rename cols
  129. fields_received = fields[component][fields[component].Operation == 'RC']
  130. fields_sent = fields[component][fields[component].Operation == 'SN']
  131. df = pd.concat([fields_received, fields_sent])
  132. df.insert(0, "Origin", component, True)
  133. # Store the component data to main structures
  134. field_frames[component] = df
  135. # Since all components are running in parallel, the most periodic coupled component is used to get the
  136. # coupled execution time.
  137. # Care! This is after delete_init_end_ts and may not correspond to the total execution time of the experiment if
  138. # ignore_init_ts and/or ignore_end_ts are != 0
  139. cpl_execution_time = data[most_frequent_coupled_component].iloc[-1].Time - \
  140. data[most_frequent_coupled_component].iloc[0].Time
  141. ts = data[most_frequent_coupled_component].Timestep.unique()
  142. simulated_time = ts[-1] - ts[0]
  143. last_coupling_step_to_analyse = ts[args.num_ts_print]
  144. # Find the field interchanges between components. Stored in field_frames
  145. get_coupling_interchanges(field_frames,cpl_model_names)
  146. if args.method == "average":
  147. main_average_computation(cpl_model_names, data, final_field_results, all_possible_ts, ts_ids_to_study, timesteps)
  148. elif args.method == "minmax":
  149. main_minmax_computation(cpl_model_names, data, final_field_results, jitter, all_possible_ts, ts_ids_to_study,
  150. timesteps)
  151. # Find dependencies between components
  152. get_component_dependencies(cpl_model_names, field_frames, final_field_results, all_component_dependencies,
  153. total_dependencies)
  154. # Write to CSV
  155. sypd, chpsy = write_results(cpl_model_names, field_frames, args.method, final_field_results, jitter,
  156. all_component_dependencies, cpl_execution_time, n_proc, total_n_proc_cpl_components,
  157. simulated_time, number_cpl_ts, csv_path)
  158. # Create plots
  159. show_plots(cpl_model_names, final_field_results, cpl_execution_time, timesteps, args.num_ts_print, n_proc,
  160. all_component_dependencies, sypd, chpsy, plots_path)
  161. print("\nLucia-lite results in %s and %s directories" % (plots_path, csv_path))
  162. return 0
  163. def get_info_master(component_id, data_path):
  164. master_file_name = "lucia." + component_id + ".000000"
  165. file_name = glob.glob1(data_path, master_file_name)[0]
  166. full_name = data_path + file_name
  167. df = pd.read_csv(full_name,
  168. names=["Balance", "Timestep", "Field_id", "Order", "Action", "Time"],
  169. delim_whitespace=True,
  170. dtype={'Timestep': str, 'Field': str, 'Order': str, 'Action': str,
  171. 'Time': float}).iloc[:, 1:]
  172. component_name = df.iloc[1, 1]
  173. nproc = int(df.iloc[2, 1])
  174. fields, data = prepare_dataframe(df)
  175. ts = data.Timestep.unique()
  176. n_ts = len(ts)
  177. return component_name, fields, n_ts, ts, nproc
  178. def read_data(component_id, first_ts_id, last_ts_id, data_path):
  179. list_of_data = []
  180. file_names = "lucia." + component_id + ".*"
  181. input_files = glob.glob1(data_path, file_names)
  182. input_files.sort()
  183. num_files_cpl = 0
  184. num_files_to_read = len(input_files)
  185. perc = 10
  186. for count, file_name in enumerate(input_files):
  187. if count == math.ceil(num_files_to_read * perc / 100):
  188. print(int(math.ceil(perc)), "%..", sep='', end='', flush=True)
  189. perc += 10
  190. full_name = data_path + file_name
  191. df = pd.read_csv(full_name,
  192. names=["Balance", "Timestep", "Field_id", "Order", "Action", "Time"],
  193. delim_whitespace=True,
  194. dtype={'Timestep': str, 'Field': str, 'Order': str, 'Action': str,
  195. 'Time': float}).iloc[:, 1:]
  196. _, data_df = prepare_dataframe(df)
  197. # In case only the master process communicates with Oasis
  198. if not data_df.empty:
  199. data_df, _ = delete_init_end_ts(data_df, first_ts_id, last_ts_id)
  200. num_files_cpl += 1
  201. list_of_data.append(data_df)
  202. print("100%")
  203. # Concat the data from all MPI slave processes.
  204. data_df = pd.concat(list_of_data, axis=0, ignore_index=True)
  205. return data_df, num_files_to_read, num_files_cpl
  206. def prepare_dataframe(data_in):
  207. # Separate header from data
  208. is_head = data_in['Time'].isna()
  209. head = data_in[is_head].copy() # Full copy to avoid warning when changing column datatype
  210. data = data_in[~is_head].copy()
  211. # Get fields
  212. fields = head[pd.notnull(head.Order)].copy()
  213. fields.rename(index=str, columns={"Timestep": "Operation", "Order": "Fields", "Time": "Destination"}, inplace=True)
  214. del fields["Action"]
  215. fields.Field_id = fields.Field_id.astype(int)
  216. data[["Timestep", "Field_id"]] = data[["Timestep", "Field_id"]].astype(int)
  217. return fields, data
  218. # Ignore irregular ts from the initialization and finalization of the execution
  219. def delete_init_end_ts(data, first_ts_id, last_ts_id):
  220. new_data = data[data.Timestep.between(first_ts_id, last_ts_id)]
  221. ts = new_data.Timestep.unique()
  222. number_cpl_ts = len(ts)
  223. return new_data, number_cpl_ts
  224. def get_coupling_interchanges(field_frames,cpl_model_names):
  225. fields_origin_destiny = pd.concat(field_frames)
  226. fields_origin_destiny = fields_origin_destiny.sort_values(by=['Field_id'])
  227. fields_origin_destiny = fields_origin_destiny.reset_index(drop=True)
  228. count = fields_origin_destiny.Field_id.value_counts()
  229. # Fields that are sent (or received) but not received (or sent). This happens when omitting components
  230. unpaired_fields = count[count.values != 2].index.values.astype(int)
  231. fields_origin_destiny = fields_origin_destiny[~fields_origin_destiny.Field_id.isin(unpaired_fields)]
  232. fields_origin_destiny = fields_origin_destiny.reset_index(drop=True)
  233. for i in range(0, len(fields_origin_destiny.Field_id.unique()) * 2, 2):
  234. fields_origin_destiny.loc[i, 'Destination'] = fields_origin_destiny.loc[i + 1, 'Origin']
  235. fields_origin_destiny.loc[i + 1, 'Destination'] = fields_origin_destiny.loc[i, 'Origin']
  236. for component in cpl_model_names:
  237. model = field_frames[component].iloc[0]['Origin']
  238. model_mk = fields_origin_destiny['Origin'] == model
  239. field_frames[component] = fields_origin_destiny[model_mk]
  240. # Find the time spent in the coupling per field
  241. def main_average_computation(cpl_model_names, data, final_field_results, all_possible_ts, ts_ids_to_study, timesteps):
  242. for component in cpl_model_names:
  243. data_c = data[component].reset_index(drop=True)
  244. field_data_means = []
  245. acc_time_waiting = pd.Series(0, index=all_possible_ts)
  246. acc_time_sending = pd.Series(0, index=all_possible_ts)
  247. acc_time_interpo = pd.Series(0, index=all_possible_ts)
  248. # Iterate for every field_id
  249. for field in data_c.Field_id.unique():
  250. # Get data from a single field and store it on data_field.
  251. data_field = data_c[data_c.Field_id == field]
  252. # Masks to filter MPI, interpolation, before and after operations
  253. is_interpo = data_field.Action == 'interpo'
  254. is_sending = data_field.Action == 'MPI_put'
  255. is_waiting = data_field.Action == 'MPI_get'
  256. is_before = data_field.Order == 'Before'
  257. is_after = data_field.Order == 'After'
  258. # Compute time spent, update totals and save to list field_data
  259. # MEAN
  260. avg_time_interpo = data_field[is_interpo & is_after].groupby(["Timestep"])["Time"].mean() - \
  261. data_field[is_interpo & is_before].groupby(["Timestep"])["Time"].mean()
  262. avg_time_sending = data_field[is_sending & is_after].groupby(["Timestep"])["Time"].mean() - \
  263. data_field[is_sending & is_before].groupby(["Timestep"])["Time"].mean()
  264. avg_time_waiting = data_field[is_waiting & is_after].groupby(["Timestep"])["Time"].mean() - \
  265. data_field[is_waiting & is_before].groupby(["Timestep"])["Time"].mean()
  266. # Fix the index so it corresponds to the list of timesteps executed by the component
  267. avg_time_interpo = avg_time_interpo.reindex(index=all_possible_ts, copy=True, fill_value=0)
  268. avg_time_sending = avg_time_sending.reindex(index=all_possible_ts, copy=True, fill_value=0)
  269. avg_time_waiting = avg_time_waiting.reindex(index=all_possible_ts, copy=True, fill_value=0)
  270. # Save the time accumulated per filed
  271. if not avg_time_interpo.empty:
  272. acc_time_interpo += avg_time_interpo
  273. if not avg_time_sending.empty:
  274. acc_time_sending += avg_time_sending
  275. if not avg_time_waiting.empty:
  276. acc_time_waiting += avg_time_waiting
  277. # Save results on field_data
  278. field_total_cpl_time = (avg_time_interpo.sum() + avg_time_waiting.sum() + avg_time_sending.sum())
  279. field_data_means.append(
  280. (field, avg_time_waiting.sum(), avg_time_interpo.sum(), avg_time_sending.sum(), field_total_cpl_time))
  281. # Save results to a DataFrame
  282. final_field_results[component] = pd.DataFrame(field_data_means,
  283. columns=["Field_id", "Waiting", "Interpolation",
  284. "Sending", "Total"])
  285. get_info_coupling_step(data_c, component, ts_ids_to_study, acc_time_waiting, acc_time_interpo, acc_time_sending,
  286. timesteps)
  287. def main_minmax_computation(cpl_model_names, data, final_field_results_minmax, jitter, all_possible_ts, ts_ids_to_study,
  288. timesteps):
  289. for component in cpl_model_names:
  290. data_c = data[component].reset_index(drop=True)
  291. field_data_minmax = []
  292. acc_time_waiting = pd.Series(0, index=all_possible_ts)
  293. acc_time_sending = pd.Series(0, index=all_possible_ts)
  294. acc_time_interpo = pd.Series(0, index=all_possible_ts)
  295. acc_jitter = 0
  296. # Iterate for every field_id
  297. for field in data_c.Field_id.unique():
  298. # Get data from a single field and store it on data_field.
  299. data_field = data_c[data_c.Field_id == field]
  300. # Masks to filter MPI, interpolation, before and after operations
  301. is_interpo = data_field.Action == 'interpo'
  302. is_sending = data_field.Action == 'MPI_put'
  303. is_waiting = data_field.Action == 'MPI_get'
  304. is_before = data_field.Order == 'Before'
  305. is_after = data_field.Order == 'After'
  306. # Min/Max
  307. minmax_time_interpo = data_field[is_interpo & is_after].groupby(["Timestep"])["Time"].max() - \
  308. data_field[is_interpo & is_before].groupby(["Timestep"])["Time"].max()
  309. minmax_time_sending = data_field[is_sending & is_after].groupby(["Timestep"])["Time"].max() - \
  310. data_field[is_sending & is_before].groupby(["Timestep"])["Time"].max()
  311. minmax_time_waiting = data_field[is_waiting & is_after].groupby(["Timestep"])["Time"].max() - \
  312. data_field[is_waiting & is_before].groupby(["Timestep"])["Time"].max()
  313. field_total_cpl_time = (minmax_time_interpo.sum() + minmax_time_waiting.sum() + minmax_time_sending.sum())
  314. jitter_time_interpo = data_field[is_interpo & is_before].groupby(["Timestep"])["Time"].max() - \
  315. data_field[is_interpo & is_before].groupby(["Timestep"])["Time"].min()
  316. jitter_time_sending = data_field[is_sending & is_before].groupby(["Timestep"])["Time"].max() - \
  317. data_field[is_sending & is_before].groupby(["Timestep"])["Time"].min()
  318. jitter_time_waiting = data_field[is_waiting & is_before].groupby(["Timestep"])["Time"].max() - \
  319. data_field[is_waiting & is_before].groupby(["Timestep"])["Time"].min()
  320. acc_jitter += jitter_time_interpo.sum() + jitter_time_waiting.sum() + jitter_time_sending.sum()
  321. field_data_minmax.append((field, minmax_time_waiting.sum(), minmax_time_interpo.sum(),
  322. minmax_time_sending.sum(), field_total_cpl_time))
  323. # Save the time accumulated per filed
  324. if not minmax_time_interpo.empty:
  325. acc_time_interpo += minmax_time_interpo
  326. if not minmax_time_sending.empty:
  327. acc_time_sending += minmax_time_sending
  328. if not minmax_time_waiting.empty:
  329. acc_time_waiting += minmax_time_waiting
  330. jitter[component] = acc_jitter / len(data_c.Field_id.unique())
  331. # Save results to a DataFrame
  332. final_field_results_minmax[component] = pd.DataFrame(field_data_minmax,
  333. columns=["Field_id", "Waiting", "Interpolation",
  334. "Sending", "Total"])
  335. get_info_coupling_step(data_c, component, ts_ids_to_study, acc_time_waiting, acc_time_interpo, acc_time_sending,
  336. timesteps)
  337. def get_info_coupling_step(data_c, component, ts_ids_to_study, acc_time_waiting, acc_time_interpo, acc_time_sending,
  338. timesteps):
  339. # Get info by coupling step
  340. start_ts = data_c[data_c.Timestep <= ts_ids_to_study[-1]].groupby(['Timestep']).Time.min()
  341. if len(start_ts) < 2:
  342. sys.exit("\nERROR: Component %s does not have enough information to provide the detailed analysis per "
  343. "coupling step.\nTry increasing the 'num_ts_print' or adding this component to the list "
  344. "'components_to_ignore'" % component)
  345. res = [y - x for x, y in zip(start_ts, start_ts[1:])]
  346. ts_total_time = pd.Series(res, name='Component', index=start_ts.index[:-1])
  347. ts_lengths = ts_total_time - (acc_time_waiting.loc[:start_ts.index[-2]] +
  348. acc_time_interpo.loc[:start_ts.index[-2]] +
  349. acc_time_sending.loc[:start_ts.index[-2]])
  350. timesteps[component] = pd.concat([ts_lengths, acc_time_waiting.loc[:start_ts.index[-2]],
  351. acc_time_interpo.loc[:start_ts.index[-2]],
  352. acc_time_sending.loc[:start_ts.index[-2]]], axis=1)
  353. timesteps[component].columns = ["Component", "Waiting", "Interpolation", "Sending"]
  354. common_idx = pd.DataFrame(0, index=ts_ids_to_study[:-1], columns=timesteps[component].columns)
  355. timesteps[component] = common_idx.add(timesteps[component], fill_value=0)
  356. def get_component_dependencies(cpl_model_names, field_frames, final_field_results, all_component_dependencies,
  357. total_dependencies):
  358. for component in cpl_model_names:
  359. other_components = [x for x in cpl_model_names if x not in component]
  360. frames = []
  361. total_value = {}
  362. for to_component in other_components:
  363. fields = field_frames[component][field_frames[component].Destination == to_component]
  364. fields_id = fields.Field_id
  365. df = final_field_results[component][final_field_results[component].Field_id.isin(fields_id)]
  366. waiting_time = df.Waiting.sum()
  367. interp_time = df.Interpolation.sum()
  368. sending_time = df.Sending.sum()
  369. total_coupling_time = waiting_time + interp_time + sending_time
  370. frame = {'Sending': sending_time, 'Interpolation': interp_time, 'Waiting': waiting_time}
  371. df2 = pd.DataFrame(frame, index=[to_component])
  372. frames.append(df2)
  373. total_value[to_component] = total_coupling_time
  374. all_component_dependencies[component] = pd.concat(frames)
  375. total_dependencies[component] = total_value
  376. def write_results(cpl_model_names, field_frames, method, final_field_results, jitter, all_component_dependencies,
  377. cpl_execution_time, n_proc, total_n_proc_cpl_components, simulated_time, number_cpl_ts, csv_path):
  378. total_waiting = {}
  379. total_interpo = {}
  380. total_sending = {}
  381. total_waiting_cost = {}
  382. total_interpo_cost = {}
  383. total_sending_cost = {}
  384. component_exe_time = {}
  385. component_exe_cost = {}
  386. component_cost = {}
  387. total_time_in_cpl = {}
  388. percentual_component_exe_time = {}
  389. percentual_time_in_cpl = {}
  390. percentual_cpl_cost = {}
  391. sypd = {}
  392. chpsy = {}
  393. out_summary = open(csv_path + "summary.txt", "w")
  394. out_fields = open(csv_path + "oasis_fields.txt", "w")
  395. total_n_proc = sum(n_proc.values())
  396. cpl_execution_cost = cpl_execution_time / 3600 * total_n_proc
  397. max_cpl_event_cost = 0
  398. for component in cpl_model_names:
  399. file_out = csv_path + component + ".txt"
  400. # Write fields used by each component
  401. out_fields.write("%s fields:\n\n" % component)
  402. field_frames[component].to_csv(out_fields, float_format='%g', mode="a", index=False)
  403. out_fields.write("\n--------------------------------\n\n\n")
  404. # Write the coupling time of each field
  405. out_file = open(file_out, "w")
  406. out_file.write("Results for " + component + ":\n\n\n")
  407. out_file.write("Time in interpolation or MPI per field:\n\n")
  408. final_field_results[component].to_csv(out_file, float_format='%.3f', mode='a')
  409. total_waiting[component] = final_field_results[component].Waiting.sum()
  410. total_interpo[component] = final_field_results[component].Interpolation.sum()
  411. total_sending[component] = final_field_results[component].Sending.sum()
  412. total_waiting_cost[component] = total_waiting[component] / 3600 * n_proc[component]
  413. total_interpo_cost[component] = total_interpo[component] / 3600 * n_proc[component]
  414. total_sending_cost[component] = total_sending[component] / 3600 * n_proc[component]
  415. total_time_in_cpl[component] = total_interpo[component] + total_sending[component] + total_waiting[component]
  416. percentual_time_in_cpl[component] = (total_time_in_cpl[component] / cpl_execution_time) * 100
  417. percentual_cpl_cost[component] = (total_time_in_cpl[component] / 3600) * n_proc[component] / cpl_execution_cost * 100
  418. out_file.write("\nsum:%.3g,%.3g,%.3g,%.3g" %
  419. (total_waiting[component], total_interpo[component], total_sending[component],
  420. total_time_in_cpl[component]))
  421. component_exe_time[component] = cpl_execution_time - total_time_in_cpl[component]
  422. percentual_component_exe_time[component] = (component_exe_time[component] / cpl_execution_time) * 100
  423. # Cost
  424. component_exe_cost[component] = (component_exe_time[component] / 3600) * n_proc[component]
  425. component_cost[component] = (cpl_execution_time / 3600) * n_proc[component]
  426. # Write the component dependencies
  427. out_file.write("\n\n\nDependencies:\n\n")
  428. other_components = [x for x in cpl_model_names if not x in component]
  429. for to_component in other_components:
  430. waiting = all_component_dependencies[component].loc[to_component, 'Waiting']
  431. interpo = all_component_dependencies[component].loc[to_component, 'Interpolation']
  432. sending = all_component_dependencies[component].loc[to_component, "Sending"]
  433. total_coupling_time_to_component = waiting + interpo + sending
  434. out_file.write("Component %s has an overhead of %.3f seconds \n"
  435. "(%.2f%% of the total execution time) due to coupling with %s\n\n"
  436. % (component, total_coupling_time_to_component,
  437. (total_coupling_time_to_component / cpl_execution_time) * 100, to_component))
  438. all_component_dependencies[component].loc[[to_component]].to_csv(out_file, float_format='%.3f', mode='a')
  439. out_file.close()
  440. # CMIP6 metrics
  441. sypd[component] = (simulated_time / 365) / component_exe_time[component]
  442. chpsy[component] = n_proc[component] * (24 / sypd[component])
  443. # Find the coupling cost
  444. out_summary.write(component)
  445. out_summary.write("\nNumber of processes: %d" % n_proc[component])
  446. out_summary.write("\nNumber of coupling steps: %d" % number_cpl_ts[component])
  447. out_summary.write("\nCoupled component execution cost (coupled time (h) * num_proc): %.3f"
  448. % component_cost[component])
  449. out_summary.write("\nComponent useful execution time: %.3f (%.2f%%)"
  450. % (component_exe_time[component], percentual_component_exe_time[component]))
  451. out_summary.write("\nComponent total coupling time: %.3f (%.2f%%)"
  452. % (total_time_in_cpl[component], percentual_time_in_cpl[component]))
  453. out_summary.write("\n ( cpl event: Time(s) | Cost in core-hours (time(h) * num_proc) )")
  454. out_summary.write("\n - Waiting: %.3f | %.3f " % (total_waiting[component],
  455. total_waiting_cost[component]))
  456. out_summary.write("\n - Interpolation: %.3f | %.3f " % (total_interpo[component],
  457. total_interpo_cost[component]))
  458. out_summary.write("\n - Sending: %.3f | %.3f " % (total_sending[component],
  459. total_sending_cost[component]))
  460. out_summary.write("\n\n%s partial coupling cost: %.3f%%" % (component, percentual_cpl_cost[component]))
  461. out_summary.write("\n%s SYPD: %.3f" % (component, sypd[component]))
  462. out_summary.write("\n%s CHPSY: %.3f" % (component, chpsy[component]))
  463. if method == "minmax":
  464. out_summary.write("\nMin/Max metrics:")
  465. out_summary.write("\n%s Cn: %.3f" % (component, component_exe_time[component] + total_interpo[component]))
  466. out_summary.write("\n%s En: %.3f" % (component, total_waiting[component] + total_sending[component]))
  467. out_summary.write("\n%s Jitter: %.3f" % (component, jitter[component]))
  468. out_summary.write("\n\n-----------------------------------\n\n")
  469. out_summary.write("\nHint: try to minimize the coupling event with higher cost among all components if possible\n")
  470. out_summary.write("\nTotal coupled execution time: %.3f" % cpl_execution_time)
  471. cpl_cost = 1 - sum(component_exe_cost.values()) / ((cpl_execution_time / 3600) * total_n_proc_cpl_components)
  472. out_summary.write("\nTotal coupling cost: %.2f%%" % (cpl_cost * 100))
  473. cpl_sypd = (simulated_time / 365) / cpl_execution_time
  474. out_summary.write("\nCoupled SYPD: %.2f" % cpl_sypd)
  475. cpl_chpsy = total_n_proc * (24 / cpl_sypd)
  476. out_summary.write("\nCoupled CHPSY: %.2f" % cpl_chpsy)
  477. out_summary.close()
  478. # Table of results
  479. l_frames = []
  480. for component in cpl_model_names:
  481. frame = {'nproc': n_proc[component], 'number_ts': number_cpl_ts[component],
  482. 'exec_time': component_exe_time[component],
  483. 'waiting_time': total_waiting[component], 'interpo_time': total_interpo[component],
  484. 'SYPD': sypd[component], 'CHPSY': chpsy[component], 'cpl_cost': percentual_cpl_cost[component]}
  485. tmp_df = pd.DataFrame(frame, index=[component])
  486. l_frames.append(tmp_df)
  487. # Add coupled data
  488. frame = {'nproc': total_n_proc, 'number_ts': max(number_cpl_ts.values()), 'exec_time': cpl_execution_time,
  489. 'waiting_time': sum(total_waiting.values()), 'interpo_time': sum(total_interpo.values()),
  490. 'SYPD': cpl_sypd, 'CHPSY': cpl_chpsy, 'cpl_cost': (cpl_cost * 100)}
  491. l_frames.append(pd.DataFrame(frame, index=["Coupled"]))
  492. table_df = pd.concat(l_frames, axis=0, ignore_index=False)
  493. table_df[['nproc', 'number_ts']] = table_df[['nproc', 'number_ts']].astype(int)
  494. table_out = open(csv_path + 'table.csv', mode='w')
  495. table_df.to_csv(table_out, float_format='%.3f')
  496. table_out.close()
  497. print("\nCoupled SYPD: %.3f" % cpl_sypd)
  498. print("Coupled CHPSY: %.3f" % cpl_chpsy)
  499. print("Coupling cost: %.3f%%" % (cpl_cost * 100))
  500. sypd["Coupled"] = cpl_sypd
  501. return sypd, chpsy
  502. # Attach a text label above each bar displaying its height.
  503. def autolabel(rects, ax):
  504. for rect in rects:
  505. height = int(rect.get_height())
  506. ax.annotate('{}'.format(height),
  507. xy=(rect.get_x() + rect.get_width() / 2, height),
  508. xytext=(0, 3), # 3 points vertical offset
  509. textcoords="offset points",
  510. ha='center', va='bottom')
  511. def autolabelf(rects, ax):
  512. for rect in rects:
  513. height = round(float(rect.get_height()), 2)
  514. ax.annotate('{}'.format(height),
  515. xy=(rect.get_x() + rect.get_width() / 2, height),
  516. xytext=(0, 3), # 3 points vertical offset
  517. textcoords="offset points",
  518. ha='center', va='bottom')
  519. def show_plots(cpl_model_names, final_field_results, cpl_execution_time, timesteps, num_ts_print, n_proc,
  520. all_component_dependencies, sypd, chpsy, plots_path):
  521. calculation_time = list()
  522. communication_time = list()
  523. total_waiting = {}
  524. total_interpo = {}
  525. total_sending = {}
  526. total_waiting_cost = {}
  527. total_interpo_cost = {}
  528. total_sending_cost = {}
  529. total_cpl_cost = {}
  530. total_component_exe_time = {}
  531. c = 0
  532. fig1, ax1 = plt.subplots(1, len(cpl_model_names), figsize=(len(cpl_model_names) * 4, 3), sharey=True)
  533. fig2, ax2 = plt.subplots(1, len(cpl_model_names), figsize=(len(cpl_model_names) * 2 + 1, 3), sharey=True)
  534. for component in cpl_model_names:
  535. # Total component/coupling time
  536. total_waiting[component] = final_field_results[component].Waiting.sum()
  537. total_interpo[component] = final_field_results[component].Interpolation.sum()
  538. total_sending[component] = final_field_results[component].Sending.sum()
  539. total_waiting_cost[component] = total_waiting[component] * n_proc[component]
  540. total_interpo_cost[component] = total_interpo[component] * n_proc[component]
  541. total_sending_cost[component] = total_sending[component] * n_proc[component]
  542. total_cpl_cost[component] = total_waiting_cost[component] + total_interpo_cost[component] + total_sending_cost[component]
  543. total_component_exe_time[component] = cpl_execution_time - final_field_results[component].Total.sum()
  544. # Plot time in each event per component
  545. names = ['component', 'interp', "waiting", "sending"]
  546. total_values = [total_component_exe_time[component], total_interpo[component], total_waiting[component],
  547. total_sending[component]]
  548. ax1[c].bar(names, total_values, width=0.5, color=['C0', 'C2', 'C1', 'C8'])
  549. ax1[0].set_ylabel('time (s)')
  550. ax1[c].set_title(component)
  551. # Plot component dependencies
  552. all_component_dependencies[component].plot.bar(ax=ax2[c], stacked=True, legend=False, title=component,
  553. color=['C8', 'C2', 'C1'])
  554. # Old lucia measurements
  555. Cn = total_component_exe_time[component] + total_interpo[component]
  556. calculation_time.append(Cn)
  557. En = total_waiting[component] + total_sending[component]
  558. communication_time.append(En)
  559. # Increment subplot counter
  560. c += 1
  561. fig1.tight_layout()
  562. fig1.suptitle('Time spent in each event', y=1.05)
  563. fig1.savefig(plots_path + 'time_per_event.png', bbox_inches='tight')
  564. h, l = ax2[-1].get_legend_handles_labels()
  565. ax2[-1].legend(reversed(h), reversed(l), bbox_to_anchor=(1.02, 0.3),
  566. loc="lower left", prop={'size': 8}, borderaxespad=0)
  567. ax2[0].set_ylabel("time (s)")
  568. fig2.tight_layout()
  569. fig2.suptitle('Dependencies between components', y=1.05)
  570. fig2.savefig(plots_path + 'dependencies.png', bbox_inches='tight')
  571. # Stacked plot of all components
  572. fig3, ax3 = plt.subplots(figsize=(len(cpl_model_names) * 2, 3))
  573. ax3.set_title("Time spent in each event of OASIS")
  574. ax3.bar(cpl_model_names, total_component_exe_time.values(), width=0.7, color='C0')
  575. offset = list(total_component_exe_time.values())
  576. ax3.bar(cpl_model_names, total_waiting.values(), width=0.7, bottom=offset, color='C1')
  577. offset = [x + y for x, y in zip(offset, list(total_waiting.values()))]
  578. ax3.bar(cpl_model_names, total_interpo.values(), width=0.7, bottom=offset, color='C2')
  579. offset = [x + y for x, y in zip(offset, list(total_interpo.values()))]
  580. ax3.bar(cpl_model_names, total_sending.values(), width=0.7, bottom=offset, color='C8')
  581. ax3.legend(labels=['Component', 'Waiting', 'Interpolation', 'Sending'], bbox_to_anchor=(1.05, 1),
  582. loc="upper left", borderaxespad=0, prop={'size': 8})
  583. ax3.set_ylabel("Elapsed time (s)")
  584. fig3.tight_layout()
  585. fig3.savefig(plots_path + 'stackedevents.png', bbox_inches='tight')
  586. # Plot SYPD
  587. colors = ['c'] * len(cpl_model_names) + ['b']
  588. fig4, ax4 = plt.subplots(figsize=(len(cpl_model_names)*2, 3))
  589. rects1 = ax4.bar(list(sypd.keys()), list(sypd.values()), width=0.5, color=colors)
  590. ax4.set_ylabel("SYPD")
  591. ax4.set_title("SYPD per component and coupled")
  592. autolabelf(rects1, ax4)
  593. fig4.savefig(plots_path + 'SYPD.png', bbox_inches='tight')
  594. # Plot CHPSY. Coupled deleted since its not a fair comparison
  595. fig5, ax5 = plt.subplots(figsize=(len(cpl_model_names)*1.5, 3))
  596. ax5.bar(list(chpsy.keys()), list(chpsy.values()), width=0.5, color='g')
  597. ax5.set_ylabel("CHPSY")
  598. ax5.set_title("CHPSY per component")
  599. fig5.savefig(plots_path + 'CHPSY.png', bbox_inches='tight')
  600. # Old lucia plot
  601. x = np.arange(len(cpl_model_names))
  602. width = 0.35
  603. fig6, ax6 = plt.subplots(figsize=(len(cpl_model_names) * 2, 5))
  604. rects1 = ax6.bar(x - width / 2, calculation_time, width, label="Calculation time (Cn)", color='g')
  605. rects2 = ax6.bar(x + width / 2, communication_time, width, label="Communication time (En)", color='r')
  606. ax6.set_ylabel("Elapsed time (s)")
  607. ax6.set_title("OASIS coupled model components\n"
  608. "Calculation time (green) vs coupling exchange duration\n"
  609. "including the time spent waiting slower models (red)")
  610. ax6.set_xticks(x)
  611. ax6.set_xticklabels(cpl_model_names)
  612. ax6.legend(bbox_to_anchor=(0, -0.2), loc="center left", borderaxespad=0)
  613. autolabel(rects1, ax6)
  614. autolabel(rects2, ax6)
  615. fig6.tight_layout()
  616. fig6.savefig(plots_path + 'oasis.png', bbox_inches='tight')
  617. if num_ts_print > 0:
  618. # Plot pattern
  619. plot_width = math.ceil(num_ts_print / 3)
  620. fig7, ax7 = plt.subplots(len(cpl_model_names), 1, figsize=(plot_width, len(cpl_model_names)*6), sharey=True)
  621. fig8, ax8 = plt.subplots(len(cpl_model_names), 1, figsize=(plot_width, len(cpl_model_names)*6), sharey=True)
  622. c = 0
  623. for component in cpl_model_names:
  624. data_to_print = timesteps[component]
  625. data_to_print['Time'] = pd.to_timedelta(data_to_print.index, unit='s')
  626. # Plot component timesteps
  627. data_to_print.plot(ax=ax7[c], kind='bar', x='Time', y='Component',
  628. legend=False, color=["C0"], title=component)
  629. ax7[c].set_ylabel("time (s)")
  630. ax7[c].set_xlabel("cpl step")
  631. # Plot coupling timesteps
  632. data_to_print.plot.bar(ax=ax8[c], x='Time', stacked=True, legend=False,
  633. color=['C0', 'C1', 'C2', 'C8'], title=component)
  634. ax8[c].set_ylabel("time (s)")
  635. ax8[c].set_xlabel("timestep")
  636. c += 1
  637. fig7.tight_layout()
  638. fig7.suptitle('Component time per coupling step', y=1)
  639. fig7.savefig(plots_path + 'timesteps.png')
  640. h, l = ax8[0].get_legend_handles_labels()
  641. ax8[0].legend(reversed(h), reversed(l), bbox_to_anchor=(0, 1.02), loc="lower left", borderaxespad=0)
  642. fig8.tight_layout()
  643. fig8.suptitle('Waiting (MPI_get), component (component_ts) and \ninterpolation + send (interpo) time '
  644. 'per coupling step', y=1)
  645. fig8.savefig(plots_path + 'coupling_steps.png')
  646. # Pie plot with computing cost of each coupling event
  647. fig9, ax9 = plt.subplots(figsize=(16, 12))
  648. fig9.suptitle('Computing cost of each coupling event')
  649. width = 0.3
  650. cm = plt.get_cmap("tab20c")
  651. cout = cm(np.arange(len(cpl_model_names)) * 4)
  652. pie, _ = ax9.pie(list(total_cpl_cost.values()), radius=1, colors=cout, labels=list(total_cpl_cost.keys()))
  653. plt.setp(pie, width=width, edgecolor='white')
  654. values = list()
  655. labels = []
  656. for component in cpl_model_names:
  657. values.extend([total_waiting_cost[component], total_interpo_cost[component], total_sending_cost[component]])
  658. labels.extend([component + " waiting", component + " interpo", component + " send"])
  659. a = list(np.arange(4*len(cpl_model_names)))
  660. b = list(np.arange(4*len(cpl_model_names), step=4))
  661. cin = cm(list(set(a) - set(b)))
  662. plt.legend()
  663. pie2, _ = ax9.pie(values, radius=1-width, labeldistance=0.7, colors=cin, labels=labels)
  664. plt.setp(pie2, width=width, edgecolor='white')
  665. plt.legend()
  666. fig9.tight_layout()
  667. fig9.savefig(plots_path + "core-hours_cpl_event.png")
  668. # Run
  669. start = time.time()
  670. main()
  671. end = time.time()
  672. print("Total time spent in LUCIA (s) : ", end - start)