Skip to content
Snippets Groups Projects
Commit 5c3bb92d authored by Michael Langguth's avatar Michael Langguth
Browse files

Source-code style revisions in main_preprocess_data_step2.py.

parent 5e9eb6e5
No related branches found
No related tags found
No related merge requests found
Pipeline #70694 passed
...@@ -16,6 +16,9 @@ import warnings ...@@ -16,6 +16,9 @@ import warnings
def main(): def main():
method="main_preprocess_data_step2"
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("-source_dir", type=str) parser.add_argument("-source_dir", type=str)
parser.add_argument("-dest_dir", type=str) parser.add_argument("-dest_dir", type=str)
...@@ -34,7 +37,7 @@ def main(): ...@@ -34,7 +37,7 @@ def main():
my_rank = comm.Get_rank() # rank of the node my_rank = comm.Get_rank() # rank of the node
p = comm.Get_size() # number of assigned nodes p = comm.Get_size() # number of assigned nodes
if p < 2: if p < 2:
raise ValueError("Preprocessing step 2 must be assigned to at least two tasks.") raise ValueError("%{0}: Preprocessing step 2 must be assigned to at least two tasks.".format(method))
if my_rank == 0: if my_rank == 0:
# retrieve final statistics first (not parallelized!) # retrieve final statistics first (not parallelized!)
...@@ -46,7 +49,7 @@ def main(): ...@@ -46,7 +49,7 @@ def main():
stat_obj = Calc_data_stat(nvars) # init statistic-instance stat_obj = Calc_data_stat(nvars) # init statistic-instance
# loop over whole data set (training, dev and test set) to collect the intermediate statistics # loop over whole data set (training, dev and test set) to collect the intermediate statistics
print("Start collecting statistics from the whole dataset to be processed...") print("%{0}: Start collecting statistics from the whole dataset to be processed...".format(method))
for year in years: for year in years:
file_dir = os.path.join(input_dir, year) file_dir = os.path.join(input_dir, year)
...@@ -55,7 +58,7 @@ def main(): ...@@ -55,7 +58,7 @@ def main():
# process stat-file: # process stat-file:
stat_obj.acc_stat_master(file_dir, int(month)) # process monthly statistic-file stat_obj.acc_stat_master(file_dir, int(month)) # process monthly statistic-file
else: else:
warnings.warn("The stat file for year {} month {} does not exist".format(year, month)) warnings.warn("%{0}: The statistic file for year {1}, month {2} does not exist".format(method, year, month))
# finalize statistics and write to json-file # finalize statistics and write to json-file
stat_obj.finalize_stat_master(vars_uni) stat_obj.finalize_stat_master(vars_uni)
stat_obj.write_stat_json(stat_dir) stat_obj.write_stat_json(stat_dir)
...@@ -77,12 +80,11 @@ def main(): ...@@ -77,12 +80,11 @@ def main():
while message_counter <= p-1: while message_counter <= p-1:
message_in = comm.recv() message_in = comm.recv()
message_counter = message_counter + 1 message_counter = message_counter + 1
print("Message in from slave: ", message_in) print("%{0}: Message in from worker: {1} ".format(method, message_in))
else: else:
message_in = comm.recv() message_in = comm.recv()
print("My rank,", my_rank) print("%{0}: Message from master to rank {1}: {2} ".format(method, my_rank, message_in))
print("message_in", message_in)
years = list(message_in[0]) years = list(message_in[0])
real_years_months = message_in[1] real_years_months = message_in[1]
...@@ -97,11 +99,11 @@ def main(): ...@@ -97,11 +99,11 @@ def main():
sequences_per_file=args.sequences_per_file) sequences_per_file=args.sequences_per_file)
# create the tfrecords-files # create the tfrecords-files
ins2.read_pkl_and_save_tfrecords(year=year, month=my_rank) ins2.read_pkl_and_save_tfrecords(year=year, month=my_rank)
print("Year {} finished", year) print("%{0}: Year {1} finished".format(method, year))
else: else:
print(year_rank + " is not in the datasplit_dic, will skip the process") print("%{0}: {1} is not in the datasplit_dic, will skip the process".format(method, year_rank))
message_out = ("Node:", str(my_rank), "finished", "", "\r\n") message_out = ("Node:", str(my_rank), "finished", "", "\r\n")
print("Message out for slaves:", message_out) print("%{0}: Message out for worker: {1}".format(method, message_out))
comm.send(message_out, dest=0) comm.send(message_out, dest=0)
MPI.Finalize() MPI.Finalize()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment