diff --git a/video_prediction_tools/data_preprocess/preprocess_data_step2.py b/video_prediction_tools/data_preprocess/preprocess_data_step2.py
index bfd8c16b46fd199be24ee54c01a2fa484ad45fae..1494d220904583d8d3e180f82d5f1b214f5b86f0 100644
--- a/video_prediction_tools/data_preprocess/preprocess_data_step2.py
+++ b/video_prediction_tools/data_preprocess/preprocess_data_step2.py
@@ -93,7 +93,7 @@ class ERA5Pkl2Tfrecords(ERA5Dataset):
 
     def get_metadata(self):
         """
-        This function get the meta data that generared from data_process_step1, we aim to extract the height and width informaion from it
+        This function gets the meta data that generared from data_process_step1, we aim to extract the height and width informaion from it
         vars_in   : list(str), must be consistent with the list from DataPreprocessing_step1
         height    : int, the height of the image
         width     : int, the width of the image
@@ -187,7 +187,7 @@ class ERA5Pkl2Tfrecords(ERA5Dataset):
             month   : int, the target month to save to tfrecord 
         """
         #Define the input_file based on the year and month
-        self.input_file_year = os.path.join(os.path.join(self.input_dir, "pickle"),str(year))
+        self.input_file_year = os.path.join(self.input_dir,"pickle",str(year))
         input_file = os.path.join(self.input_file_year,'X_{:02d}.pkl'.format(month))
         temp_input_file = os.path.join(self.input_file_year,'T_{:02d}.pkl'.format(month))
 
diff --git a/video_prediction_tools/main_scripts/main_preprocess_data_step2.py b/video_prediction_tools/main_scripts/main_preprocess_data_step2.py
index 4c13ea258d0e6ffce91c30fc265cc1e6a3f32425..a9e77a02e8f4d5f19e9270704cc66ee843920ba0 100644
--- a/video_prediction_tools/main_scripts/main_preprocess_data_step2.py
+++ b/video_prediction_tools/main_scripts/main_preprocess_data_step2.py
@@ -58,7 +58,7 @@ def main():
         stat_obj = Calc_data_stat(nvars)                            # init statistic-instance
     
         # loop over whole data set (training, dev and test set) to collect the intermediate statistics
-        print("Start collecting statistics from the whole datset to be processed...")
+        print("Start collecting statistics from the whole dataset to be processed...")
         for split in partition.keys():
             values = partition[split]
             for year in values.keys():