From 52a0de1d1de4f92b06d1b57b5fd3dcacab14986f Mon Sep 17 00:00:00 2001
From: Michael <m.langguth@fz-juelich.de>
Date: Thu, 15 Oct 2020 14:49:07 +0200
Subject: [PATCH] Concise usage of source_dir in main_preprocess_data_step1.py.

---
 .../data_preprocess/process_netCDF_v2.py              |  2 +-
 .../main_scripts/main_preprocess_data_step1.py        | 11 ++++++-----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/video_prediction_tools/data_preprocess/process_netCDF_v2.py b/video_prediction_tools/data_preprocess/process_netCDF_v2.py
index 23d62d61..90009e0b 100644
--- a/video_prediction_tools/data_preprocess/process_netCDF_v2.py
+++ b/video_prediction_tools/data_preprocess/process_netCDF_v2.py
@@ -32,7 +32,7 @@ class PreprocessNcToPkl():
         #directory_to_process is month-based directory
         if int(job_name) >12 or int(job_name) < 1 or not isinstance(job_name,str): raise ValueError("job_name should be int type between 1 to 12")
         self.directory_to_process=os.path.join(src_dir,str(year), str(job_name))
-        if not os.path.exists(self.directory_to_process) : raise ("The directory_to_process does not exist")
+        if not os.path.exists(self.directory_to_process) : raise IOError("The directory_to_process '"+self.directory_to_process+"' does not exist")
         self.target_dir = os.path.join(target_dir,"pickle",str(year))                              # enforce that the preprocessed data is located under the pickle-subdirectory
         if not os.path.exists(self.target_dir): os.mkdir(self.target_dir)
         self.job_name = job_name
diff --git a/video_prediction_tools/main_scripts/main_preprocess_data_step1.py b/video_prediction_tools/main_scripts/main_preprocess_data_step1.py
index 4e26407a..a4389e49 100755
--- a/video_prediction_tools/main_scripts/main_preprocess_data_step1.py
+++ b/video_prediction_tools/main_scripts/main_preprocess_data_step1.py
@@ -33,7 +33,8 @@ def main():
 
     current_path = os.getcwd()
     years        = args.years
-    source_dir   = os.path.join(args.source_dir,str(years))+"/"
+    source_dir   = args.source_dir
+    source_dir_full = os.path.join(source_dir,str(years))+"/"
     destination_dir = args.destination_dir
     scr_dir         = args.script_dir
     rsync_status = args.rsync_status
@@ -81,7 +82,7 @@ def main():
     # ================================== ALL Nodes:  Read-in parameters ====================================== #
 
     # check the existence of teh folders :
-    if not os.path.exists(source_dir):  # check if the source dir. is existing
+    if not os.path.exists(source_dir_full):  # check if the source dir. is existing
         if my_rank == 0:
             logging.critical('The source does not exist')
             logging.info('exit status : 1')
@@ -91,8 +92,8 @@ def main():
         
     # Expand destination_dir-variable by searching for netCDF-files in source_dir and processing the file from the first list element to obtain all relevant (meta-)data. 
     if my_rank == 0:
-        data_files_list = glob.glob(source_dir+"/**/*.nc",recursive=True)
-        if not data_files_list: raise ValueError("Could not find any data to be processed in '"+source_dir+"'")
+        data_files_list = glob.glob(source_dir_full+"/**/*.nc",recursive=True)
+        if not data_files_list: raise IOError("Could not find any data to be processed in '"+source_dir_full+"'")
         
         md = MetaData(suffix_indir=destination_dir,exp_id=exp_id,data_filename=data_files_list[0],slices=slices,variables=vars)
         # modify Batch scripts if metadata has been retrieved for the first time (md.status = "new")
@@ -132,7 +133,7 @@ def main():
 
         print(" # ==============  Directory scanner : start    ==================# ")
 
-        ret_dir_scanner = directory_scanner(source_dir)
+        ret_dir_scanner = directory_scanner(source_dir_full)
         print(ret_dir_scanner)
         dir_detail_list = ret_dir_scanner[0]
         sub_dir_list = ret_dir_scanner[1]
-- 
GitLab