From f52b1f2be9fbc21c7409ab5df77f0044884587d7 Mon Sep 17 00:00:00 2001 From: Michael <m.langguth@fz-juelich.de> Date: Tue, 9 Jun 2020 12:18:55 +0200 Subject: [PATCH] Integrated methods to handle destination_dir for Preprocessing-step via temporary json-files. --- DataPreprocess/mpi_split_data_multi_years.py | 5 ++--- DataPreprocess/mpi_stager_v2_process_netCDF.py | 17 +++++++---------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/DataPreprocess/mpi_split_data_multi_years.py b/DataPreprocess/mpi_split_data_multi_years.py index c551bfe6..bdc53cc1 100644 --- a/DataPreprocess/mpi_split_data_multi_years.py +++ b/DataPreprocess/mpi_split_data_multi_years.py @@ -1,6 +1,7 @@ from mpi4py import MPI import argparse from process_netCDF_v2 import * +from metadata import MetaData import json #add parser arguments @@ -14,9 +15,7 @@ parser.add_argument("--varnames","-vars",dest="varnames", nargs = '+') args = parser.parse_args() # ML 2020/06/08: Dirty workaround as long as data-splitting is done with this seperate Python-script # called from the same parent Shell-/Batch-script as 'mpi_stager_v2_process_netCDF.py' -with open(os.getcwd()+"/temp.json",'r') as js_file: - dict_dirty = json.load(js_file) -target_dir = dict_dirty["dest_dir_split"] +target_dir = os.path.join(MetaData.get_destdir_jsontmp(),"hickle") print(target_dir) varnames = args.varnames diff --git a/DataPreprocess/mpi_stager_v2_process_netCDF.py b/DataPreprocess/mpi_stager_v2_process_netCDF.py index 84ee9c19..3e16ee80 100755 --- a/DataPreprocess/mpi_stager_v2_process_netCDF.py +++ b/DataPreprocess/mpi_stager_v2_process_netCDF.py @@ -115,21 +115,16 @@ def main(): md = MetaData(suffix_indir=destination_dir,data_filename=data_files_list[0],slices=slices,variables=vars) # modify Batch scripts if metadata has been retrieved for the first time (md.status = "new") if (md.status = "new"): - #md.write_dirs_to_batch_scripts(scr_dir+"/DataPreprocess.sh") md.write_dirs_to_batch_scripts(scr_dir+"/DataPreprocess_to_tf.sh") - #md.write_dirs_to_batch_scripts(scr_dir+"generate_era5.sh") - #md.write_dirs_to_batch_scripts(scr_dir+"train_era5.sh") + #md.write_dirs_to_batch_scripts(scr_dir+"/generate_era5.sh") + #md.write_dirs_to_batch_scripts(scr_dir+"/train_era5.sh") # ML 2020/06/08: Dirty workaround as long as data-splitting is done with a seperate Python-script # called from the same parent Shell-/Batch-script - # -> temproary json-file in working directory - dict_dirty = {"dest_dir_split": os.path.join(md.expdir,md.expname)} - print("Workaround for correct destination in data splitting: Write dictionary to json-file: temp'") - with open(os.getcwd()+"/temp.json",'w') as js_file: - print("Create: '"+os.getcwd()+"/temp.json'") - json.dump(dict_dirty,js_file) + # -> work with temproary json-file in working directory + md.write_destdir_jsontmp(os.path.join(self.expdir,self.expname)) #else: nothing to do - destination_dir= os.path.join(md.expdir,md.expname,years,"hickle") + destination_dir= os.path.join(md.expdir,md.expname,"hickle",years) # ...and create directory if necessary if not os.path.exists(destination_dir): # check if the Destination dir. is existing @@ -222,6 +217,8 @@ def main(): #os.system(rsync_str) #process_era5_in_dir(job, src_dir=source_dir, target_dir=destination_dir) + # ML 2020/06/09: workaround to get correct destination_dir obtained by the master node + destination_dir = os.path.join(MetaData.get_destdir_jsontmp(),"hickle",years) process_netCDF_in_dir(job_name=job, src_dir=source_dir, target_dir=destination_dir,slices=slices,vars=vars) if checksum_status == 1: -- GitLab