diff --git a/video_prediction_tools/main_scripts/main_preprocess_data_step2.py b/video_prediction_tools/main_scripts/main_preprocess_data_step2.py index 8d755d1526a45ceb55aee6b05a4aa0b749f58676..1a16e4e723bda40baafdfada3f9012b51b61016a 100644 --- a/video_prediction_tools/main_scripts/main_preprocess_data_step2.py +++ b/video_prediction_tools/main_scripts/main_preprocess_data_step2.py @@ -16,6 +16,9 @@ import warnings def main(): + + method="main_preprocess_data_step2" + parser = argparse.ArgumentParser() parser.add_argument("-source_dir", type=str) parser.add_argument("-dest_dir", type=str) @@ -25,8 +28,8 @@ def main(): input_dir = args.source_dir ins = ERA5Pkl2Tfrecords(input_dir=input_dir, dest_dir=args.dest_dir, - sequence_length = args.sequence_length, - sequences_per_file=args.sequences_per_file) + sequence_length = args.sequence_length, + sequences_per_file=args.sequences_per_file) years, months,years_months = ins.get_years_months() # ini. MPI @@ -34,19 +37,19 @@ def main(): my_rank = comm.Get_rank() # rank of the node p = comm.Get_size() # number of assigned nodes if p < 2: - raise ValueError("Preprocessing step 2 must be assigned to at least two tasks.") + raise ValueError("%{0}: Preprocessing step 2 must be assigned to at least two tasks.".format(method)) if my_rank == 0: # retrieve final statistics first (not parallelized!) # some preparatory steps stat_dir = os.path.dirname(input_dir) - varnames = ins.vars_in + varnames = ins.vars_in vars_uni, varsind, nvars = get_unique_vars(varnames) stat_obj = Calc_data_stat(nvars) # init statistic-instance # loop over whole data set (training, dev and test set) to collect the intermediate statistics - print("Start collecting statistics from the whole dataset to be processed...") + print("%{0}: Start collecting statistics from the whole dataset to be processed...".format(method)) for year in years: file_dir = os.path.join(input_dir, year) @@ -55,7 +58,7 @@ def main(): # process stat-file: stat_obj.acc_stat_master(file_dir, int(month)) # process monthly statistic-file else: - warnings.warn("The stat file for year {} month {} does not exist".format(year, month)) + warnings.warn("%{0}: The statistic file for year {1}, month {2} does not exist".format(method, year, month)) # finalize statistics and write to json-file stat_obj.finalize_stat_master(vars_uni) stat_obj.write_stat_json(stat_dir) @@ -77,12 +80,11 @@ def main(): while message_counter <= p-1: message_in = comm.recv() message_counter = message_counter + 1 - print("Message in from slave: ", message_in) + print("%{0}: Message in from worker: {1} ".format(method, message_in)) else: message_in = comm.recv() - print("My rank,", my_rank) - print("message_in", message_in) + print("%{0}: Message from master to rank {1}: {2} ".format(method, my_rank, message_in)) years = list(message_in[0]) real_years_months = message_in[1] @@ -97,11 +99,11 @@ def main(): sequences_per_file=args.sequences_per_file) # create the tfrecords-files ins2.read_pkl_and_save_tfrecords(year=year, month=my_rank) - print("Year {} finished", year) + print("%{0}: Year {1} finished".format(method, year)) else: - print(year_rank + " is not in the datasplit_dic, will skip the process") + print("%{0}: {1} is not in the datasplit_dic, will skip the process".format(method, year_rank)) message_out = ("Node:", str(my_rank), "finished", "", "\r\n") - print("Message out for slaves:", message_out) + print("%{0}: Message out for worker: {1}".format(method, message_out)) comm.send(message_out, dest=0) MPI.Finalize()