123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792 |
- import pandas as pd
- import numpy as np
- import matplotlib as mpl
- import matplotlib.pyplot as plt
- import glob
- import os
- import time
- import sys
- import math
- import argparse
- mpl.use('Agg')
- def main():
- # Set path to data and folders to save the results of Lucia
- print(os.getcwd())
- data_path = './'
- csv_path = './lucia_lite_results/'
- plots_path = './lucia_lite_plots/'
- parser = argparse.ArgumentParser(description="Balance tool for BSC")
-
- parser.add_argument('method', type=str, help='method used (average or minmax)')
- parser.add_argument('ignore_init_ts', type=int, help='number of coupling steps to ignore at the beginning of the '
- 'simulation')
- parser.add_argument('ignore_end_ts', type=int, help='number of coupling steps to ignore at the end of the '
- 'simulation')
- parser.add_argument('num_ts_print', type=int, help='number of coupling steps to print individually (it is '
- 'recommended to use values between 32 and 128')
- args = parser.parse_args()
- if not data_path[-1] == '/':
- data_path += '/'
- # Check data_path is correct
- lucia_logs = data_path + 'lucia.01.000000'
- if not os.path.isfile(lucia_logs):
- print("No input data provided. Current data_path: " + data_path)
- return 0
- # Specify if what method you want to use (string): "average" (default) or "minmax"
- print("Method used: %s" % args.method)
- # Specify the number of coupling steps that will be considered as init and end, therefore, ignored for the study
- # Natural number
- print("Removing first %i and last %i coupling steps of the execution" % (args.ignore_init_ts, args.ignore_end_ts))
- # Number of coupling steps to print for NEMO and IFS (hint: use between 32 and 128)
- # Natural number
- print("%d timesteps will be analysed" % args.num_ts_print)
- # Components that will not be taken into account for the analysis ( should look like: ["AMIPFORC", "RNFMAP"] )
- # Mapping of the binary names in EC-Earth (runoffi mapper -> RNFMAP, AMIP forcing -> AMIPFORC,
- # XIOS -> xios.x, NEMO -> oceanx, IFS -> ATMIFS, LPJG -> lpjg, TM5 -> ctm5mp)
- components_to_ignore = ["RNFMAP", "AMIPFORC"]
- print("Ignoring components: ", components_to_ignore)
- # Create directories where outputs will be stored. Skip if already exist.
- os.makedirs(csv_path, exist_ok=True)
- os.makedirs(plots_path, exist_ok=True)
- # Get the number of components used
- number_of_components = len(glob.glob(data_path + "lucia.??.000000"))
- # Variables declaration
- cpl_model_names = []
- field_frames = {}
- n_proc = {}
- n_files_read_cpl = {}
- final_field_results = {}
- all_component_dependencies = {}
- total_dependencies = {}
- number_cpl_ts = {}
- number_ts_executed = {}
- data = {}
- fields = {}
- component_ids = {}
- ts_ids = {}
- timesteps = {}
- jitter = {}
- # Get information from all components by reading only the master file
- for component_number in range(1, number_of_components + 1):
- component_id = "%02d" % component_number
- # Get component name and number of ts executed from the master
- component_name, fields_df, number_ts, ts, n_proc[component_name] = get_info_master(component_id, data_path)
- # Skyp components that do not communicate with OASIS
- if component_name in components_to_ignore:
- continue
- if number_ts == 0:
- print("Component %s does not communicate with OASIS but it is taken into account for the CHPSY. "
- "If added to components_to_ignore parameter, it wont be used." % component_name)
- continue
- # Store the number of coupling steps, model name, component_id and coupling steps ids only if the component
- # communicates with OASIS
- component_ids[component_name] = component_id
- cpl_model_names.append(component_name)
- fields[component_name] = fields_df
- number_ts_executed[component_name] = number_ts
- ts_ids[component_name] = ts
- # The component that communicates more frequently with OASIS is used as a base component to check that the
- # parameters to ignore coupling steps (ignore_init_ts and ignore_end_ts) are feasible and to set the first and last
- # coupling steps ids of the coupled execution
- most_frequent_coupled_component = max(number_ts_executed, key=number_ts_executed.get)
- if number_ts_executed[most_frequent_coupled_component] <= (args.ignore_init_ts + args.ignore_end_ts):
- sys.exit("\nERROR: more coupling steps to ignore than simulated! Check ignore_init_ts and ignore_end_ts values.")
- first_ts_id = ts_ids[most_frequent_coupled_component][args.ignore_init_ts]
- last_ts_id = ts_ids[most_frequent_coupled_component][-(args.ignore_end_ts + 1)]
- print("The max number of coupling steps executed is %d by component %s."
- % (number_ts_executed[most_frequent_coupled_component], most_frequent_coupled_component))
- print("Only coupling steps between %d and %d (%s , %s) are taken into account for the study. "
- % (first_ts_id, last_ts_id, pd.to_timedelta(first_ts_id, unit='s'), pd.to_timedelta(last_ts_id, unit='s')))
- if (args.ignore_init_ts + args.num_ts_print) >= number_ts_executed[most_frequent_coupled_component]:
- sys.exit("\nERROR: More coupling steps to analyze than the ones executed. Try reducing 'num_ts_print'.")
- # Get number of coupling steps executed by the Master process and the coupling steps that will be studied in detail
- # and shown as a plot.
- all_possible_ts = []
- for component in cpl_model_names:
- valid_ts = ts_ids[component][(ts_ids[component] >= first_ts_id) & (ts_ids[component] <= last_ts_id)]
- number_cpl_ts[component] = len(valid_ts)
- all_possible_ts = list(set(all_possible_ts) | set(valid_ts))
- all_possible_ts.sort()
- ts_ids_to_study = all_possible_ts[:args.num_ts_print]
- total_n_proc_cpl_components = 0
- # Read OASIS log files
- for component in cpl_model_names:
- # Read data form lucia files
- print("\nReading %s: " % component, end='')
- data_df, num_files, num_files_cpl = read_data(component_ids[component], first_ts_id,
- last_ts_id, data_path)
- print("%s has executed %d coupling steps. Only %d are taken into account for the study"
- % (component, number_ts_executed[component], number_cpl_ts[component]))
- print("%s has used %d MPI processes. Only %d are used for the study"
- % (component, n_proc[component], num_files_cpl))
- # Store data read from files. Calculated the number of files read and the number of files that actually
- # communicate with OASIS
- total_n_proc_cpl_components += n_proc[component]
- n_files_read_cpl[component] = num_files_cpl
- data[component] = data_df
- # Store fields information
- for component in cpl_model_names:
- # DataFrame to store the Fields. Rename cols
- fields_received = fields[component][fields[component].Operation == 'RC']
- fields_sent = fields[component][fields[component].Operation == 'SN']
- df = pd.concat([fields_received, fields_sent])
- df.insert(0, "Origin", component, True)
- # Store the component data to main structures
- field_frames[component] = df
- # Since all components are running in parallel, the most periodic coupled component is used to get the
- # coupled execution time.
- # Care! This is after delete_init_end_ts and may not correspond to the total execution time of the experiment if
- # ignore_init_ts and/or ignore_end_ts are != 0
- cpl_execution_time = data[most_frequent_coupled_component].iloc[-1].Time - \
- data[most_frequent_coupled_component].iloc[0].Time
- ts = data[most_frequent_coupled_component].Timestep.unique()
- simulated_time = ts[-1] - ts[0]
- last_coupling_step_to_analyse = ts[args.num_ts_print]
- # Find the field interchanges between components. Stored in field_frames
- get_coupling_interchanges(field_frames,cpl_model_names)
- if args.method == "average":
- main_average_computation(cpl_model_names, data, final_field_results, all_possible_ts, ts_ids_to_study, timesteps)
- elif args.method == "minmax":
- main_minmax_computation(cpl_model_names, data, final_field_results, jitter, all_possible_ts, ts_ids_to_study,
- timesteps)
- # Find dependencies between components
- get_component_dependencies(cpl_model_names, field_frames, final_field_results, all_component_dependencies,
- total_dependencies)
- # Write to CSV
- sypd, chpsy = write_results(cpl_model_names, field_frames, args.method, final_field_results, jitter,
- all_component_dependencies, cpl_execution_time, n_proc, total_n_proc_cpl_components,
- simulated_time, number_cpl_ts, csv_path)
- # Create plots
- show_plots(cpl_model_names, final_field_results, cpl_execution_time, timesteps, args.num_ts_print, n_proc,
- all_component_dependencies, sypd, chpsy, plots_path)
- print("\nLucia-lite results in %s and %s directories" % (plots_path, csv_path))
- return 0
- def get_info_master(component_id, data_path):
- master_file_name = "lucia." + component_id + ".000000"
- file_name = glob.glob1(data_path, master_file_name)[0]
- full_name = data_path + file_name
- df = pd.read_csv(full_name,
- names=["Balance", "Timestep", "Field_id", "Order", "Action", "Time"],
- delim_whitespace=True,
- dtype={'Timestep': str, 'Field': str, 'Order': str, 'Action': str,
- 'Time': float}).iloc[:, 1:]
- component_name = df.iloc[1, 1]
- nproc = int(df.iloc[2, 1])
- fields, data = prepare_dataframe(df)
- ts = data.Timestep.unique()
- n_ts = len(ts)
- return component_name, fields, n_ts, ts, nproc
- def read_data(component_id, first_ts_id, last_ts_id, data_path):
- list_of_data = []
- file_names = "lucia." + component_id + ".*"
- input_files = glob.glob1(data_path, file_names)
- input_files.sort()
- num_files_cpl = 0
- num_files_to_read = len(input_files)
- perc = 10
- for count, file_name in enumerate(input_files):
- if count == math.ceil(num_files_to_read * perc / 100):
- print(int(math.ceil(perc)), "%..", sep='', end='', flush=True)
- perc += 10
- full_name = data_path + file_name
- df = pd.read_csv(full_name,
- names=["Balance", "Timestep", "Field_id", "Order", "Action", "Time"],
- delim_whitespace=True,
- dtype={'Timestep': str, 'Field': str, 'Order': str, 'Action': str,
- 'Time': float}).iloc[:, 1:]
- _, data_df = prepare_dataframe(df)
- # In case only the master process communicates with Oasis
- if not data_df.empty:
- data_df, _ = delete_init_end_ts(data_df, first_ts_id, last_ts_id)
- num_files_cpl += 1
- list_of_data.append(data_df)
- print("100%")
- # Concat the data from all MPI slave processes.
- data_df = pd.concat(list_of_data, axis=0, ignore_index=True)
- return data_df, num_files_to_read, num_files_cpl
- def prepare_dataframe(data_in):
- # Separate header from data
- is_head = data_in['Time'].isna()
- head = data_in[is_head].copy() # Full copy to avoid warning when changing column datatype
- data = data_in[~is_head].copy()
- # Get fields
- fields = head[pd.notnull(head.Order)].copy()
- fields.rename(index=str, columns={"Timestep": "Operation", "Order": "Fields", "Time": "Destination"}, inplace=True)
- del fields["Action"]
- fields.Field_id = fields.Field_id.astype(int)
- data[["Timestep", "Field_id"]] = data[["Timestep", "Field_id"]].astype(int)
- return fields, data
- # Ignore irregular ts from the initialization and finalization of the execution
- def delete_init_end_ts(data, first_ts_id, last_ts_id):
- new_data = data[data.Timestep.between(first_ts_id, last_ts_id)]
- ts = new_data.Timestep.unique()
- number_cpl_ts = len(ts)
- return new_data, number_cpl_ts
- def get_coupling_interchanges(field_frames,cpl_model_names):
- fields_origin_destiny = pd.concat(field_frames)
- fields_origin_destiny = fields_origin_destiny.sort_values(by=['Field_id'])
- fields_origin_destiny = fields_origin_destiny.reset_index(drop=True)
- count = fields_origin_destiny.Field_id.value_counts()
- # Fields that are sent (or received) but not received (or sent). This happens when omitting components
- unpaired_fields = count[count.values != 2].index.values.astype(int)
- fields_origin_destiny = fields_origin_destiny[~fields_origin_destiny.Field_id.isin(unpaired_fields)]
- fields_origin_destiny = fields_origin_destiny.reset_index(drop=True)
- for i in range(0, len(fields_origin_destiny.Field_id.unique()) * 2, 2):
- fields_origin_destiny.loc[i, 'Destination'] = fields_origin_destiny.loc[i + 1, 'Origin']
- fields_origin_destiny.loc[i + 1, 'Destination'] = fields_origin_destiny.loc[i, 'Origin']
- for component in cpl_model_names:
- model = field_frames[component].iloc[0]['Origin']
- model_mk = fields_origin_destiny['Origin'] == model
- field_frames[component] = fields_origin_destiny[model_mk]
- # Find the time spent in the coupling per field
- def main_average_computation(cpl_model_names, data, final_field_results, all_possible_ts, ts_ids_to_study, timesteps):
- for component in cpl_model_names:
- data_c = data[component].reset_index(drop=True)
- field_data_means = []
- acc_time_waiting = pd.Series(0, index=all_possible_ts)
- acc_time_sending = pd.Series(0, index=all_possible_ts)
- acc_time_interpo = pd.Series(0, index=all_possible_ts)
- # Iterate for every field_id
- for field in data_c.Field_id.unique():
- # Get data from a single field and store it on data_field.
- data_field = data_c[data_c.Field_id == field]
- # Masks to filter MPI, interpolation, before and after operations
- is_interpo = data_field.Action == 'interpo'
- is_sending = data_field.Action == 'MPI_put'
- is_waiting = data_field.Action == 'MPI_get'
- is_before = data_field.Order == 'Before'
- is_after = data_field.Order == 'After'
- # Compute time spent, update totals and save to list field_data
- # MEAN
- avg_time_interpo = data_field[is_interpo & is_after].groupby(["Timestep"])["Time"].mean() - \
- data_field[is_interpo & is_before].groupby(["Timestep"])["Time"].mean()
- avg_time_sending = data_field[is_sending & is_after].groupby(["Timestep"])["Time"].mean() - \
- data_field[is_sending & is_before].groupby(["Timestep"])["Time"].mean()
- avg_time_waiting = data_field[is_waiting & is_after].groupby(["Timestep"])["Time"].mean() - \
- data_field[is_waiting & is_before].groupby(["Timestep"])["Time"].mean()
- # Fix the index so it corresponds to the list of timesteps executed by the component
- avg_time_interpo = avg_time_interpo.reindex(index=all_possible_ts, copy=True, fill_value=0)
- avg_time_sending = avg_time_sending.reindex(index=all_possible_ts, copy=True, fill_value=0)
- avg_time_waiting = avg_time_waiting.reindex(index=all_possible_ts, copy=True, fill_value=0)
- # Save the time accumulated per filed
- if not avg_time_interpo.empty:
- acc_time_interpo += avg_time_interpo
- if not avg_time_sending.empty:
- acc_time_sending += avg_time_sending
- if not avg_time_waiting.empty:
- acc_time_waiting += avg_time_waiting
- # Save results on field_data
- field_total_cpl_time = (avg_time_interpo.sum() + avg_time_waiting.sum() + avg_time_sending.sum())
- field_data_means.append(
- (field, avg_time_waiting.sum(), avg_time_interpo.sum(), avg_time_sending.sum(), field_total_cpl_time))
- # Save results to a DataFrame
- final_field_results[component] = pd.DataFrame(field_data_means,
- columns=["Field_id", "Waiting", "Interpolation",
- "Sending", "Total"])
- get_info_coupling_step(data_c, component, ts_ids_to_study, acc_time_waiting, acc_time_interpo, acc_time_sending,
- timesteps)
- def main_minmax_computation(cpl_model_names, data, final_field_results_minmax, jitter, all_possible_ts, ts_ids_to_study,
- timesteps):
- for component in cpl_model_names:
- data_c = data[component].reset_index(drop=True)
- field_data_minmax = []
- acc_time_waiting = pd.Series(0, index=all_possible_ts)
- acc_time_sending = pd.Series(0, index=all_possible_ts)
- acc_time_interpo = pd.Series(0, index=all_possible_ts)
- acc_jitter = 0
- # Iterate for every field_id
- for field in data_c.Field_id.unique():
- # Get data from a single field and store it on data_field.
- data_field = data_c[data_c.Field_id == field]
- # Masks to filter MPI, interpolation, before and after operations
- is_interpo = data_field.Action == 'interpo'
- is_sending = data_field.Action == 'MPI_put'
- is_waiting = data_field.Action == 'MPI_get'
- is_before = data_field.Order == 'Before'
- is_after = data_field.Order == 'After'
- # Min/Max
- minmax_time_interpo = data_field[is_interpo & is_after].groupby(["Timestep"])["Time"].max() - \
- data_field[is_interpo & is_before].groupby(["Timestep"])["Time"].max()
- minmax_time_sending = data_field[is_sending & is_after].groupby(["Timestep"])["Time"].max() - \
- data_field[is_sending & is_before].groupby(["Timestep"])["Time"].max()
- minmax_time_waiting = data_field[is_waiting & is_after].groupby(["Timestep"])["Time"].max() - \
- data_field[is_waiting & is_before].groupby(["Timestep"])["Time"].max()
- field_total_cpl_time = (minmax_time_interpo.sum() + minmax_time_waiting.sum() + minmax_time_sending.sum())
- jitter_time_interpo = data_field[is_interpo & is_before].groupby(["Timestep"])["Time"].max() - \
- data_field[is_interpo & is_before].groupby(["Timestep"])["Time"].min()
- jitter_time_sending = data_field[is_sending & is_before].groupby(["Timestep"])["Time"].max() - \
- data_field[is_sending & is_before].groupby(["Timestep"])["Time"].min()
- jitter_time_waiting = data_field[is_waiting & is_before].groupby(["Timestep"])["Time"].max() - \
- data_field[is_waiting & is_before].groupby(["Timestep"])["Time"].min()
- acc_jitter += jitter_time_interpo.sum() + jitter_time_waiting.sum() + jitter_time_sending.sum()
- field_data_minmax.append((field, minmax_time_waiting.sum(), minmax_time_interpo.sum(),
- minmax_time_sending.sum(), field_total_cpl_time))
- # Save the time accumulated per filed
- if not minmax_time_interpo.empty:
- acc_time_interpo += minmax_time_interpo
- if not minmax_time_sending.empty:
- acc_time_sending += minmax_time_sending
- if not minmax_time_waiting.empty:
- acc_time_waiting += minmax_time_waiting
- jitter[component] = acc_jitter / len(data_c.Field_id.unique())
- # Save results to a DataFrame
- final_field_results_minmax[component] = pd.DataFrame(field_data_minmax,
- columns=["Field_id", "Waiting", "Interpolation",
- "Sending", "Total"])
- get_info_coupling_step(data_c, component, ts_ids_to_study, acc_time_waiting, acc_time_interpo, acc_time_sending,
- timesteps)
- def get_info_coupling_step(data_c, component, ts_ids_to_study, acc_time_waiting, acc_time_interpo, acc_time_sending,
- timesteps):
- # Get info by coupling step
- start_ts = data_c[data_c.Timestep <= ts_ids_to_study[-1]].groupby(['Timestep']).Time.min()
- if len(start_ts) < 2:
- sys.exit("\nERROR: Component %s does not have enough information to provide the detailed analysis per "
- "coupling step.\nTry increasing the 'num_ts_print' or adding this component to the list "
- "'components_to_ignore'" % component)
- res = [y - x for x, y in zip(start_ts, start_ts[1:])]
- ts_total_time = pd.Series(res, name='Component', index=start_ts.index[:-1])
- ts_lengths = ts_total_time - (acc_time_waiting.loc[:start_ts.index[-2]] +
- acc_time_interpo.loc[:start_ts.index[-2]] +
- acc_time_sending.loc[:start_ts.index[-2]])
- timesteps[component] = pd.concat([ts_lengths, acc_time_waiting.loc[:start_ts.index[-2]],
- acc_time_interpo.loc[:start_ts.index[-2]],
- acc_time_sending.loc[:start_ts.index[-2]]], axis=1)
- timesteps[component].columns = ["Component", "Waiting", "Interpolation", "Sending"]
- common_idx = pd.DataFrame(0, index=ts_ids_to_study[:-1], columns=timesteps[component].columns)
- timesteps[component] = common_idx.add(timesteps[component], fill_value=0)
- def get_component_dependencies(cpl_model_names, field_frames, final_field_results, all_component_dependencies,
- total_dependencies):
- for component in cpl_model_names:
- other_components = [x for x in cpl_model_names if x not in component]
- frames = []
- total_value = {}
- for to_component in other_components:
- fields = field_frames[component][field_frames[component].Destination == to_component]
- fields_id = fields.Field_id
- df = final_field_results[component][final_field_results[component].Field_id.isin(fields_id)]
- waiting_time = df.Waiting.sum()
- interp_time = df.Interpolation.sum()
- sending_time = df.Sending.sum()
- total_coupling_time = waiting_time + interp_time + sending_time
- frame = {'Sending': sending_time, 'Interpolation': interp_time, 'Waiting': waiting_time}
- df2 = pd.DataFrame(frame, index=[to_component])
- frames.append(df2)
- total_value[to_component] = total_coupling_time
- all_component_dependencies[component] = pd.concat(frames)
- total_dependencies[component] = total_value
- def write_results(cpl_model_names, field_frames, method, final_field_results, jitter, all_component_dependencies,
- cpl_execution_time, n_proc, total_n_proc_cpl_components, simulated_time, number_cpl_ts, csv_path):
- total_waiting = {}
- total_interpo = {}
- total_sending = {}
- total_waiting_cost = {}
- total_interpo_cost = {}
- total_sending_cost = {}
- component_exe_time = {}
- component_exe_cost = {}
- component_cost = {}
- total_time_in_cpl = {}
- percentual_component_exe_time = {}
- percentual_time_in_cpl = {}
- percentual_cpl_cost = {}
- sypd = {}
- chpsy = {}
- out_summary = open(csv_path + "summary.txt", "w")
- out_fields = open(csv_path + "oasis_fields.txt", "w")
- total_n_proc = sum(n_proc.values())
- cpl_execution_cost = cpl_execution_time / 3600 * total_n_proc
- max_cpl_event_cost = 0
- for component in cpl_model_names:
- file_out = csv_path + component + ".txt"
- # Write fields used by each component
- out_fields.write("%s fields:\n\n" % component)
- field_frames[component].to_csv(out_fields, float_format='%g', mode="a", index=False)
- out_fields.write("\n--------------------------------\n\n\n")
- # Write the coupling time of each field
- out_file = open(file_out, "w")
- out_file.write("Results for " + component + ":\n\n\n")
- out_file.write("Time in interpolation or MPI per field:\n\n")
- final_field_results[component].to_csv(out_file, float_format='%.3f', mode='a')
- total_waiting[component] = final_field_results[component].Waiting.sum()
- total_interpo[component] = final_field_results[component].Interpolation.sum()
- total_sending[component] = final_field_results[component].Sending.sum()
- total_waiting_cost[component] = total_waiting[component] / 3600 * n_proc[component]
- total_interpo_cost[component] = total_interpo[component] / 3600 * n_proc[component]
- total_sending_cost[component] = total_sending[component] / 3600 * n_proc[component]
- total_time_in_cpl[component] = total_interpo[component] + total_sending[component] + total_waiting[component]
- percentual_time_in_cpl[component] = (total_time_in_cpl[component] / cpl_execution_time) * 100
- percentual_cpl_cost[component] = (total_time_in_cpl[component] / 3600) * n_proc[component] / cpl_execution_cost * 100
- out_file.write("\nsum:%.3g,%.3g,%.3g,%.3g" %
- (total_waiting[component], total_interpo[component], total_sending[component],
- total_time_in_cpl[component]))
- component_exe_time[component] = cpl_execution_time - total_time_in_cpl[component]
- percentual_component_exe_time[component] = (component_exe_time[component] / cpl_execution_time) * 100
- # Cost
- component_exe_cost[component] = (component_exe_time[component] / 3600) * n_proc[component]
- component_cost[component] = (cpl_execution_time / 3600) * n_proc[component]
- # Write the component dependencies
- out_file.write("\n\n\nDependencies:\n\n")
- other_components = [x for x in cpl_model_names if not x in component]
- for to_component in other_components:
- waiting = all_component_dependencies[component].loc[to_component, 'Waiting']
- interpo = all_component_dependencies[component].loc[to_component, 'Interpolation']
- sending = all_component_dependencies[component].loc[to_component, "Sending"]
- total_coupling_time_to_component = waiting + interpo + sending
- out_file.write("Component %s has an overhead of %.3f seconds \n"
- "(%.2f%% of the total execution time) due to coupling with %s\n\n"
- % (component, total_coupling_time_to_component,
- (total_coupling_time_to_component / cpl_execution_time) * 100, to_component))
- all_component_dependencies[component].loc[[to_component]].to_csv(out_file, float_format='%.3f', mode='a')
- out_file.close()
- # CMIP6 metrics
- sypd[component] = (simulated_time / 365) / component_exe_time[component]
- chpsy[component] = n_proc[component] * (24 / sypd[component])
- # Find the coupling cost
- out_summary.write(component)
- out_summary.write("\nNumber of processes: %d" % n_proc[component])
- out_summary.write("\nNumber of coupling steps: %d" % number_cpl_ts[component])
- out_summary.write("\nCoupled component execution cost (coupled time (h) * num_proc): %.3f"
- % component_cost[component])
- out_summary.write("\nComponent useful execution time: %.3f (%.2f%%)"
- % (component_exe_time[component], percentual_component_exe_time[component]))
- out_summary.write("\nComponent total coupling time: %.3f (%.2f%%)"
- % (total_time_in_cpl[component], percentual_time_in_cpl[component]))
- out_summary.write("\n ( cpl event: Time(s) | Cost in core-hours (time(h) * num_proc) )")
- out_summary.write("\n - Waiting: %.3f | %.3f " % (total_waiting[component],
- total_waiting_cost[component]))
- out_summary.write("\n - Interpolation: %.3f | %.3f " % (total_interpo[component],
- total_interpo_cost[component]))
- out_summary.write("\n - Sending: %.3f | %.3f " % (total_sending[component],
- total_sending_cost[component]))
- out_summary.write("\n\n%s partial coupling cost: %.3f%%" % (component, percentual_cpl_cost[component]))
- out_summary.write("\n%s SYPD: %.3f" % (component, sypd[component]))
- out_summary.write("\n%s CHPSY: %.3f" % (component, chpsy[component]))
- if method == "minmax":
- out_summary.write("\nMin/Max metrics:")
- out_summary.write("\n%s Cn: %.3f" % (component, component_exe_time[component] + total_interpo[component]))
- out_summary.write("\n%s En: %.3f" % (component, total_waiting[component] + total_sending[component]))
- out_summary.write("\n%s Jitter: %.3f" % (component, jitter[component]))
- out_summary.write("\n\n-----------------------------------\n\n")
- out_summary.write("\nHint: try to minimize the coupling event with higher cost among all components if possible\n")
- out_summary.write("\nTotal coupled execution time: %.3f" % cpl_execution_time)
- cpl_cost = 1 - sum(component_exe_cost.values()) / ((cpl_execution_time / 3600) * total_n_proc_cpl_components)
- out_summary.write("\nTotal coupling cost: %.2f%%" % (cpl_cost * 100))
- cpl_sypd = (simulated_time / 365) / cpl_execution_time
- out_summary.write("\nCoupled SYPD: %.2f" % cpl_sypd)
- cpl_chpsy = total_n_proc * (24 / cpl_sypd)
- out_summary.write("\nCoupled CHPSY: %.2f" % cpl_chpsy)
- out_summary.close()
- # Table of results
- l_frames = []
- for component in cpl_model_names:
- frame = {'nproc': n_proc[component], 'number_ts': number_cpl_ts[component],
- 'exec_time': component_exe_time[component],
- 'waiting_time': total_waiting[component], 'interpo_time': total_interpo[component],
- 'SYPD': sypd[component], 'CHPSY': chpsy[component], 'cpl_cost': percentual_cpl_cost[component]}
- tmp_df = pd.DataFrame(frame, index=[component])
- l_frames.append(tmp_df)
- # Add coupled data
- frame = {'nproc': total_n_proc, 'number_ts': max(number_cpl_ts.values()), 'exec_time': cpl_execution_time,
- 'waiting_time': sum(total_waiting.values()), 'interpo_time': sum(total_interpo.values()),
- 'SYPD': cpl_sypd, 'CHPSY': cpl_chpsy, 'cpl_cost': (cpl_cost * 100)}
- l_frames.append(pd.DataFrame(frame, index=["Coupled"]))
- table_df = pd.concat(l_frames, axis=0, ignore_index=False)
- table_df[['nproc', 'number_ts']] = table_df[['nproc', 'number_ts']].astype(int)
- table_out = open(csv_path + 'table.csv', mode='w')
- table_df.to_csv(table_out, float_format='%.3f')
- table_out.close()
- print("\nCoupled SYPD: %.3f" % cpl_sypd)
- print("Coupled CHPSY: %.3f" % cpl_chpsy)
- print("Coupling cost: %.3f%%" % (cpl_cost * 100))
- sypd["Coupled"] = cpl_sypd
- return sypd, chpsy
- # Attach a text label above each bar displaying its height.
- def autolabel(rects, ax):
- for rect in rects:
- height = int(rect.get_height())
- ax.annotate('{}'.format(height),
- xy=(rect.get_x() + rect.get_width() / 2, height),
- xytext=(0, 3), # 3 points vertical offset
- textcoords="offset points",
- ha='center', va='bottom')
- def autolabelf(rects, ax):
- for rect in rects:
- height = round(float(rect.get_height()), 2)
- ax.annotate('{}'.format(height),
- xy=(rect.get_x() + rect.get_width() / 2, height),
- xytext=(0, 3), # 3 points vertical offset
- textcoords="offset points",
- ha='center', va='bottom')
- def show_plots(cpl_model_names, final_field_results, cpl_execution_time, timesteps, num_ts_print, n_proc,
- all_component_dependencies, sypd, chpsy, plots_path):
- calculation_time = list()
- communication_time = list()
- total_waiting = {}
- total_interpo = {}
- total_sending = {}
- total_waiting_cost = {}
- total_interpo_cost = {}
- total_sending_cost = {}
- total_cpl_cost = {}
- total_component_exe_time = {}
- c = 0
- fig1, ax1 = plt.subplots(1, len(cpl_model_names), figsize=(len(cpl_model_names) * 4, 3), sharey=True)
- fig2, ax2 = plt.subplots(1, len(cpl_model_names), figsize=(len(cpl_model_names) * 2 + 1, 3), sharey=True)
- for component in cpl_model_names:
- # Total component/coupling time
- total_waiting[component] = final_field_results[component].Waiting.sum()
- total_interpo[component] = final_field_results[component].Interpolation.sum()
- total_sending[component] = final_field_results[component].Sending.sum()
- total_waiting_cost[component] = total_waiting[component] * n_proc[component]
- total_interpo_cost[component] = total_interpo[component] * n_proc[component]
- total_sending_cost[component] = total_sending[component] * n_proc[component]
- total_cpl_cost[component] = total_waiting_cost[component] + total_interpo_cost[component] + total_sending_cost[component]
- total_component_exe_time[component] = cpl_execution_time - final_field_results[component].Total.sum()
- # Plot time in each event per component
- names = ['component', 'interp', "waiting", "sending"]
- total_values = [total_component_exe_time[component], total_interpo[component], total_waiting[component],
- total_sending[component]]
- ax1[c].bar(names, total_values, width=0.5, color=['C0', 'C2', 'C1', 'C8'])
- ax1[0].set_ylabel('time (s)')
- ax1[c].set_title(component)
- # Plot component dependencies
- all_component_dependencies[component].plot.bar(ax=ax2[c], stacked=True, legend=False, title=component,
- color=['C8', 'C2', 'C1'])
- # Old lucia measurements
- Cn = total_component_exe_time[component] + total_interpo[component]
- calculation_time.append(Cn)
- En = total_waiting[component] + total_sending[component]
- communication_time.append(En)
- # Increment subplot counter
- c += 1
- fig1.tight_layout()
- fig1.suptitle('Time spent in each event', y=1.05)
- fig1.savefig(plots_path + 'time_per_event.png', bbox_inches='tight')
- h, l = ax2[-1].get_legend_handles_labels()
- ax2[-1].legend(reversed(h), reversed(l), bbox_to_anchor=(1.02, 0.3),
- loc="lower left", prop={'size': 8}, borderaxespad=0)
- ax2[0].set_ylabel("time (s)")
- fig2.tight_layout()
- fig2.suptitle('Dependencies between components', y=1.05)
- fig2.savefig(plots_path + 'dependencies.png', bbox_inches='tight')
- # Stacked plot of all components
- fig3, ax3 = plt.subplots(figsize=(len(cpl_model_names) * 2, 3))
- ax3.set_title("Time spent in each event of OASIS")
- ax3.bar(cpl_model_names, total_component_exe_time.values(), width=0.7, color='C0')
- offset = list(total_component_exe_time.values())
- ax3.bar(cpl_model_names, total_waiting.values(), width=0.7, bottom=offset, color='C1')
- offset = [x + y for x, y in zip(offset, list(total_waiting.values()))]
- ax3.bar(cpl_model_names, total_interpo.values(), width=0.7, bottom=offset, color='C2')
- offset = [x + y for x, y in zip(offset, list(total_interpo.values()))]
- ax3.bar(cpl_model_names, total_sending.values(), width=0.7, bottom=offset, color='C8')
- ax3.legend(labels=['Component', 'Waiting', 'Interpolation', 'Sending'], bbox_to_anchor=(1.05, 1),
- loc="upper left", borderaxespad=0, prop={'size': 8})
- ax3.set_ylabel("Elapsed time (s)")
- fig3.tight_layout()
- fig3.savefig(plots_path + 'stackedevents.png', bbox_inches='tight')
- # Plot SYPD
- colors = ['c'] * len(cpl_model_names) + ['b']
- fig4, ax4 = plt.subplots(figsize=(len(cpl_model_names)*2, 3))
- rects1 = ax4.bar(list(sypd.keys()), list(sypd.values()), width=0.5, color=colors)
- ax4.set_ylabel("SYPD")
- ax4.set_title("SYPD per component and coupled")
- autolabelf(rects1, ax4)
- fig4.savefig(plots_path + 'SYPD.png', bbox_inches='tight')
- # Plot CHPSY. Coupled deleted since its not a fair comparison
- fig5, ax5 = plt.subplots(figsize=(len(cpl_model_names)*1.5, 3))
- ax5.bar(list(chpsy.keys()), list(chpsy.values()), width=0.5, color='g')
- ax5.set_ylabel("CHPSY")
- ax5.set_title("CHPSY per component")
- fig5.savefig(plots_path + 'CHPSY.png', bbox_inches='tight')
- # Old lucia plot
- x = np.arange(len(cpl_model_names))
- width = 0.35
- fig6, ax6 = plt.subplots(figsize=(len(cpl_model_names) * 2, 5))
- rects1 = ax6.bar(x - width / 2, calculation_time, width, label="Calculation time (Cn)", color='g')
- rects2 = ax6.bar(x + width / 2, communication_time, width, label="Communication time (En)", color='r')
- ax6.set_ylabel("Elapsed time (s)")
- ax6.set_title("OASIS coupled model components\n"
- "Calculation time (green) vs coupling exchange duration\n"
- "including the time spent waiting slower models (red)")
- ax6.set_xticks(x)
- ax6.set_xticklabels(cpl_model_names)
- ax6.legend(bbox_to_anchor=(0, -0.2), loc="center left", borderaxespad=0)
- autolabel(rects1, ax6)
- autolabel(rects2, ax6)
- fig6.tight_layout()
- fig6.savefig(plots_path + 'oasis.png', bbox_inches='tight')
- if num_ts_print > 0:
- # Plot pattern
- plot_width = math.ceil(num_ts_print / 3)
- fig7, ax7 = plt.subplots(len(cpl_model_names), 1, figsize=(plot_width, len(cpl_model_names)*6), sharey=True)
- fig8, ax8 = plt.subplots(len(cpl_model_names), 1, figsize=(plot_width, len(cpl_model_names)*6), sharey=True)
- c = 0
- for component in cpl_model_names:
- data_to_print = timesteps[component]
- data_to_print['Time'] = pd.to_timedelta(data_to_print.index, unit='s')
- # Plot component timesteps
- data_to_print.plot(ax=ax7[c], kind='bar', x='Time', y='Component',
- legend=False, color=["C0"], title=component)
- ax7[c].set_ylabel("time (s)")
- ax7[c].set_xlabel("cpl step")
- # Plot coupling timesteps
- data_to_print.plot.bar(ax=ax8[c], x='Time', stacked=True, legend=False,
- color=['C0', 'C1', 'C2', 'C8'], title=component)
- ax8[c].set_ylabel("time (s)")
- ax8[c].set_xlabel("timestep")
- c += 1
- fig7.tight_layout()
- fig7.suptitle('Component time per coupling step', y=1)
- fig7.savefig(plots_path + 'timesteps.png')
- h, l = ax8[0].get_legend_handles_labels()
- ax8[0].legend(reversed(h), reversed(l), bbox_to_anchor=(0, 1.02), loc="lower left", borderaxespad=0)
- fig8.tight_layout()
- fig8.suptitle('Waiting (MPI_get), component (component_ts) and \ninterpolation + send (interpo) time '
- 'per coupling step', y=1)
- fig8.savefig(plots_path + 'coupling_steps.png')
- # Pie plot with computing cost of each coupling event
- fig9, ax9 = plt.subplots(figsize=(16, 12))
- fig9.suptitle('Computing cost of each coupling event')
- width = 0.3
- cm = plt.get_cmap("tab20c")
- cout = cm(np.arange(len(cpl_model_names)) * 4)
- pie, _ = ax9.pie(list(total_cpl_cost.values()), radius=1, colors=cout, labels=list(total_cpl_cost.keys()))
- plt.setp(pie, width=width, edgecolor='white')
- values = list()
- labels = []
- for component in cpl_model_names:
- values.extend([total_waiting_cost[component], total_interpo_cost[component], total_sending_cost[component]])
- labels.extend([component + " waiting", component + " interpo", component + " send"])
- a = list(np.arange(4*len(cpl_model_names)))
- b = list(np.arange(4*len(cpl_model_names), step=4))
- cin = cm(list(set(a) - set(b)))
- plt.legend()
- pie2, _ = ax9.pie(values, radius=1-width, labeldistance=0.7, colors=cin, labels=labels)
- plt.setp(pie2, width=width, edgecolor='white')
- plt.legend()
- fig9.tight_layout()
- fig9.savefig(plots_path + "core-hours_cpl_event.png")
- # Run
- start = time.time()
- main()
- end = time.time()
- print("Total time spent in LUCIA (s) : ", end - start)
|