diff --git a/video_prediction_savp/HPC_scripts/train_movingmnist.sh b/video_prediction_savp/HPC_scripts/train_movingmnist.sh
index cb20b32c8e80cef704ae1efb7bc770991e381d0f..006ff73c30c4a53c80aef9371bfbe29fac39f973 100755
--- a/video_prediction_savp/HPC_scripts/train_movingmnist.sh
+++ b/video_prediction_savp/HPC_scripts/train_movingmnist.sh
@@ -8,9 +8,9 @@
 #SBATCH --error=train_moving_mnist-err.%j
 #SBATCH --time=00:20:00
 #SBATCH --gres=gpu:1
-#SBATCH --partition=develgpus
+#SBATCH --partition=gpus
 #SBATCH --mail-type=ALL
-#SBATCH --mail-user=s.stadtler@fz-juelich.de
+#SBATCH --mail-user=b.gong@fz-juelich.de
 ##jutil env activate -p cjjsc42
 
 
@@ -40,6 +40,8 @@ destination_dir=/p/project/deepacf/deeprain/video_prediction_shared_folder/model
 model=convLSTM
 dataset=moving_mnist
 model_hparams=../hparams/${dataset}/${model}/model_hparams.json
+destination_dir=${destination_dir}/${model}/"$(date +"%Y%m%dT%H%M")_"$USER""
 
 # rund training
-srun python ../scripts/train_dummy.py --input_dir  ${source_dir}/tfrecords/ --dataset moving_mnist  --model ${model} --model_hparams_dict ${model_hparams} --output_dir ${destination_dir}/${model}_bing_20200902/ 
+
+srun python ../scripts/train_dummy.py --input_dir  ${source_dir}/tfrecords/ --dataset moving_mnist  --model ${model} --model_hparams_dict ${model_hparams} --output_dir ${destination_dir}/
diff --git a/video_prediction_savp/hparams/era5/convLSTM/model_hparams.json b/video_prediction_savp/hparams/era5/convLSTM/model_hparams.json
index d4942bea2ab5d6af424844b74d3769ccf699502f..fde951edd2e6b41965fbdce6ce831c1e154cbd0e 100644
--- a/video_prediction_savp/hparams/era5/convLSTM/model_hparams.json
+++ b/video_prediction_savp/hparams/era5/convLSTM/model_hparams.json
@@ -1,8 +1,8 @@
 
 {
-    "batch_size": 10,
+    "batch_size": 4,
     "lr": 0.001,
-    "max_epochs":2,
+    "max_epochs":20,
     "context_frames":10,
     "sequence_length":20,
     "loss_fun":"rmse"
diff --git a/video_prediction_savp/hparams/moving_mnist/convLSTM/model_hparams.json b/video_prediction_savp/hparams/moving_mnist/convLSTM/model_hparams.json
index b07caa0f35f54c1d5007e9fbc6802fe24f1adac0..b59f6cb2ee96162b2eb6014d7ca6bd37f54d4218 100644
--- a/video_prediction_savp/hparams/moving_mnist/convLSTM/model_hparams.json
+++ b/video_prediction_savp/hparams/moving_mnist/convLSTM/model_hparams.json
@@ -2,7 +2,7 @@
 {
     "batch_size": 10,
     "lr": 0.001,
-    "max_epochs":2,
+    "max_epochs":20,
     "context_frames":10,
     "sequence_length":20,
     "loss_fun":"cross_entropy"
diff --git a/video_prediction_savp/scripts/generate_movingmnist.py b/video_prediction_savp/scripts/generate_movingmnist.py
index 0ec2af488c81dddeef6bff2deeb867c4e7b4ffed..d4fbf5eb5d8d8f4cad87ae26d15bc2787d9e6c0a 100644
--- a/video_prediction_savp/scripts/generate_movingmnist.py
+++ b/video_prediction_savp/scripts/generate_movingmnist.py
@@ -318,7 +318,7 @@ def main():
             print("gene_images_denorm:",gen_images_denorm[0][0])
             
             #Generate images inputs
-            plot_seq_imgs(imgs=input_images_denorm[:context_frames-1,:,:,0],idx = sample_ind + i, label="Ground Truth",output_png_dir=args.results_dir)  
+            plot_seq_imgs(imgs=input_images_denorm[context_frames+1:,:,:,0],idx = sample_ind + i, label="Ground Truth",output_png_dir=args.results_dir)  
                                                              
             #Generate forecast images
             plot_seq_imgs(imgs=gen_images_denorm[context_frames:,:,:,0],idx = sample_ind + i,label="Forecast by Model " + args.model,output_png_dir=args.results_dir) 
diff --git a/video_prediction_savp/scripts/train_dummy.py b/video_prediction_savp/scripts/train_dummy.py
index f693d0a6689890dd930c1dcb06338ff140c449a9..0417a36514fd6136fb9fbe934bfb396633fa6093 100644
--- a/video_prediction_savp/scripts/train_dummy.py
+++ b/video_prediction_savp/scripts/train_dummy.py
@@ -16,13 +16,6 @@ from json import JSONEncoder
 import pickle as pkl
 
 
-class NumpyArrayEncoder(JSONEncoder):
-    def default(self, obj):
-        if isinstance(obj, np.ndarray):
-            return obj.tolist()
-        return JSONEncoder.default(self, obj)
-
-
 def add_tag_suffix(summary, tag_suffix):
     summary_proto = tf.Summary()
     summary_proto.ParseFromString(summary)
@@ -80,7 +73,6 @@ def set_seed(seed):
         random.seed(seed)
 
 def load_params_from_checkpoints_dir(model_hparams_dict,checkpoint,dataset,model):
-   
     model_hparams_dict_load = {}
     if model_hparams_dict:
         with open(model_hparams_dict) as f:
@@ -159,8 +151,19 @@ def make_dataset_iterator(train_dataset, val_dataset, batch_size ):
     return inputs,train_handle, val_handle
 
 
-def plot_train(train_losses,val_losses,output_dir):
-    iterations = list(range(len(train_losses))) 
+def plot_train(train_losses,val_losses,step,output_dir):
+    """
+    Function to plot training losses for train and val datasets against steps
+    params:
+    train_losses/val_losses (list): train losses, which length should be equal to the number of training steps
+    step (int): current training step
+    output_dir (str): the path to save the plot
+    
+    return: None
+    """
+   
+    iterations = list(range(len(train_losses)))
+    if len(train_losses) != len(val_losses) or len(train_losses) != step +1 : raise ValueError("The length of training losses must be equal to the length of val losses and  step +1 !")  
     plt.plot(iterations, train_losses, 'g', label='Training loss')
     plt.plot(iterations, val_losses, 'b', label='validation loss')
     plt.title('Training and Validation loss')
@@ -168,6 +171,8 @@ def plot_train(train_losses,val_losses,output_dir):
     plt.ylabel('Loss')
     plt.legend()
     plt.savefig(os.path.join(output_dir,'plot_train.png'))
+    plt.close()
+    return None
 
 def save_results_to_dict(results_dict,output_dir):
     with open(os.path.join(output_dir,"results.json"),"w") as fp:
@@ -257,6 +262,7 @@ def main():
     num_examples_per_epoch = train_dataset.num_examples_per_epoch()
     print ("number of exmaples per epoch:",num_examples_per_epoch)
     steps_per_epoch = int(num_examples_per_epoch/batch_size)
+    #number of steps totally equal to the number of steps per each echo multiple by number of epochs
     total_steps = steps_per_epoch * max_epochs
     global_step = tf.train.get_or_create_global_step()
     #mock total_steps only for fast debugging
@@ -284,13 +290,12 @@ def main():
             # --- Scarlet 20200813
             print ("step:", step)
             val_handle_eval = sess.run(val_handle)
-
             #Fetch variables in the graph
-
             fetches = {"train_op": model.train_op}
             #fetches["latent_loss"] = model.latent_loss
             fetches["summary"] = model.summary_op 
-            
+            fetches["global_step"] = model.global_step
+
             if model.__class__.__name__ == "McNetVideoPredictionModel" or model.__class__.__name__ == "VanillaConvLstmVideoPredictionModel" or model.__class__.__name__ == "VanillaVAEVideoPredictionModel":
                 fetches["global_step"] = model.global_step
                 fetches["total_loss"] = model.total_loss
@@ -326,8 +331,8 @@ def main():
             val_results = sess.run(val_fetches,feed_dict={train_handle: val_handle_eval})
             val_losses.append(val_results["total_loss"])
 
-            summary_writer.add_summary(results["summary"])
-            summary_writer.add_summary(val_results["summary"])
+            summary_writer.add_summary(results["summary"],results["global_step"])
+            summary_writer.add_summary(val_results["summary"],results["global_step"])
             summary_writer.flush()
 
             # global_step will have the correct step count if we resume from a checkpoint
@@ -346,19 +351,26 @@ def main():
                 print ("The model name does not exist")
 
             #print("saving model to", args.output_dir)
+
             saver.save(sess, os.path.join(args.output_dir, "model"), global_step=step)
             # +++ Scarlet 20200813
             timeit_end = time.time()  
             # --- Scarlet 20200813
             print("time needed for this step", timeit_end - timeit_start, ' s')
+            if step % 20 == 0:
+                # I save the pickle file and plot here inside the loop in case the training process cannot finished after job is done.
+                save_results_to_pkl(train_losses,val_losses,args.output_dir)
+                plot_train(train_losses,val_losses,step,args.output_dir)
+                                
+
         train_time = time.time() - run_start_time
         results_dict = {"train_time":train_time,
                         "total_steps":total_steps}
         save_results_to_dict(results_dict,args.output_dir)
-        save_results_to_pkl(train_losses, val_losses, args.output_dir)
+        #save_results_to_pkl(train_losses, val_losses, args.output_dir)
         print("train_losses:",train_losses)
         print("val_losses:",val_losses) 
-        plot_train(train_losses,val_losses,args.output_dir)
+        #plot_train(train_losses,val_losses,args.output_dir)
         print("Done")
         # +++ Scarlet 20200814
         print("Total training time:", train_time/60., "min")
diff --git a/video_prediction_savp/video_prediction/layers/BasicConvLSTMCell.py b/video_prediction_savp/video_prediction/layers/BasicConvLSTMCell.py
index 321f6cc7e05320cf83e1173d8004429edf07ec24..c4a095dc8fc3abdbd87c1eaf79adcd7dad99020b 100644
--- a/video_prediction_savp/video_prediction/layers/BasicConvLSTMCell.py
+++ b/video_prediction_savp/video_prediction/layers/BasicConvLSTMCell.py
@@ -88,10 +88,14 @@ class BasicConvLSTMCell(ConvRNNCell):
             else:
                 c, h = tf.split(axis = 3, num_or_size_splits = 2, value = state)
             concat = _conv_linear([inputs, h], self.filter_size, self.num_features * 4, True)
-
+            print("concat1:",concat)
             # i = input_gate, j = new_input, f = forget_gate, o = output_gate
             i, j, f, o = tf.split(axis = 3, num_or_size_splits = 4, value = concat)
-
+            print("input gate i:",i)
+            print("new_input j:",j)
+            print("forget gate:",f)
+            print("output gate:",o)
+           
             new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) *
                      self._activation(j))
             new_h = self._activation(new_c) * tf.nn.sigmoid(o)
@@ -100,6 +104,8 @@ class BasicConvLSTMCell(ConvRNNCell):
                 new_state = LSTMStateTuple(new_c, new_h)
             else:
                 new_state = tf.concat(axis = 3, values = [new_c, new_h])
+            print("new h", new_h)
+            print("new state",new_state)
             return new_h, new_state
 
 
@@ -135,9 +141,14 @@ def _conv_linear(args, filter_size, num_features, bias, bias_start=0.0, scope=No
         matrix = tf.get_variable(
             "Matrix", [filter_size[0], filter_size[1], total_arg_size_depth, num_features], dtype = dtype)
         if len(args) == 1:
+            print("args[0]:",args[0])
             res = tf.nn.conv2d(args[0], matrix, strides = [1, 1, 1, 1], padding = 'SAME')
+            print("res1:",res)
         else:
+            print("matrix:",matrix)
+            print("tf.concat(axis = 3, values = args):",tf.concat(axis = 3, values = args))
             res = tf.nn.conv2d(tf.concat(axis = 3, values = args), matrix, strides = [1, 1, 1, 1], padding = 'SAME')
+            print("res2:",res)
         if not bias:
             return res
         bias_term = tf.get_variable(
@@ -146,3 +157,4 @@ def _conv_linear(args, filter_size, num_features, bias, bias_start=0.0, scope=No
             initializer = tf.constant_initializer(
                 bias_start, dtype = dtype))
     return res + bias_term
+
diff --git a/video_prediction_savp/video_prediction/layers/layer_def.py b/video_prediction_savp/video_prediction/layers/layer_def.py
index 1ceac662136548fde65511815795d184fe91fac1..273b5eaee3cab703841b214ccc09ef190b6dd3ae 100644
--- a/video_prediction_savp/video_prediction/layers/layer_def.py
+++ b/video_prediction_savp/video_prediction/layers/layer_def.py
@@ -55,8 +55,7 @@ def _variable_with_weight_decay(name, shape, stddev, wd,initializer=tf.contrib.l
 
 
 def conv_layer(inputs, kernel_size, stride, num_features, idx, initializer=tf.contrib.layers.xavier_initializer() , activate="relu"):
-    print("conv_layer activation function",activate)
-    
+    print("conv_layer activation function",activate) 
     with tf.variable_scope('{0}_conv'.format(idx)) as scope:
  
         input_channels = inputs.get_shape()[-1]
@@ -75,7 +74,7 @@ def conv_layer(inputs, kernel_size, stride, num_features, idx, initializer=tf.co
         elif activate == "leaky_relu":
             conv_rect = tf.nn.leaky_relu(conv_biased, name = '{0}_conv'.format(idx))
         elif activate == "sigmoid":
-            conv_rect = tf.nn.sigmoid(conv_biased, name = '{0}_conv'.format(idx)) 
+            conv_rect = tf.nn.sigmoid(conv_biased, name = '{0}_conv'.format(idx))
         else:
             raise ("activation function is not correct")
         return conv_rect
diff --git a/video_prediction_savp/video_prediction/models/base_model.py b/video_prediction_savp/video_prediction/models/base_model.py
index df479968325946a9d61896d498428d65692c1848..0d3bf6e4b554c70671d4678b530688c44f999b77 100644
--- a/video_prediction_savp/video_prediction/models/base_model.py
+++ b/video_prediction_savp/video_prediction/models/base_model.py
@@ -3,12 +3,10 @@ import itertools
 import os
 import re
 from collections import OrderedDict
-
 import numpy as np
 import tensorflow as tf
 from tensorflow.contrib.training import HParams
 from tensorflow.python.util import nest
-
 import video_prediction as vp
 from video_prediction.utils import tf_utils
 from video_prediction.utils.tf_utils import compute_averaged_gradients, reduce_tensors, local_device_setter, \
@@ -244,7 +242,9 @@ class BaseVideoPredictionModel(object):
                 savers.append(saver)
             restore_op = [saver.saver_def.restore_op_name for saver in savers]
             sess.run(restore_op)
-
+            return True
+        else:
+            return False
 
 class VideoPredictionModel(BaseVideoPredictionModel):
     def __init__(self,
diff --git a/video_prediction_savp/video_prediction/models/vanilla_convLSTM_model.py b/video_prediction_savp/video_prediction/models/vanilla_convLSTM_model.py
index d3b3d4817faa10e6f5db5257fdf4cd526e6d01c7..796486a453f9dc6807928deeb2b8962e2908a4f2 100644
--- a/video_prediction_savp/video_prediction/models/vanilla_convLSTM_model.py
+++ b/video_prediction_savp/video_prediction/models/vanilla_convLSTM_model.py
@@ -30,15 +30,14 @@ class VanillaConvLstmVideoPredictionModel(BaseVideoPredictionModel):
         self.max_epochs = self.hparams.max_epochs
         self.loss_fun = self.hparams.loss_fun
 
+
     def get_default_hparams_dict(self):
         """
         The keys of this dict define valid hyperparameters for instances of
         this class. A class inheriting from this one should override this
         method if it has a different set of hyperparameters.
-
         Returns:
             A dict with the following hyperparameters.
-
             batch_size: batch size for training.
             lr: learning rate. if decay steps is non-zero, this is the
                 learning rate for steps <= decay_step.
@@ -80,7 +79,6 @@ class VanillaConvLstmVideoPredictionModel(BaseVideoPredictionModel):
         else:
             raise ValueError("Loss function is not selected properly, you should chose either 'rmse' or 'cross_entropy'")
 
-        
         #This is the loss for only all the channels(temperature, geo500, pressure)
         #self.total_loss = tf.reduce_mean(
         #    tf.square(self.x[:, self.context_frames:,:,:,:] - self.x_hat_predict_frames[:,:,:,:,:]))            
@@ -96,10 +94,8 @@ class VanillaConvLstmVideoPredictionModel(BaseVideoPredictionModel):
         self.saveable_variables = [self.global_step] + global_variables
         return None
 
-
     @staticmethod
     def convLSTM_cell(inputs, hidden):
-
         y_0 = inputs #we only usd patch 1, but the original paper use patch 4 for the moving mnist case, but use 2 for Radar Echo Dataset
         channels = inputs.get_shape()[-1]
         # conv lstm cell
@@ -114,7 +110,6 @@ class VanillaConvLstmVideoPredictionModel(BaseVideoPredictionModel):
         z3 = tf.reshape(output, [-1, output_shape[1], output_shape[2], output_shape[3]])
         #we feed the learn representation into a 1 × 1 convolutional layer to generate the final prediction
         x_hat = ld.conv_layer(z3, 1, 1, channels, "decode_1", activate="sigmoid")
-
         return x_hat, hidden
 
     def convLSTM_network(self):
@@ -143,3 +138,4 @@ class VanillaConvLstmVideoPredictionModel(BaseVideoPredictionModel):
         x_hat = tf.stack(x_hat)
         self.x_hat= tf.transpose(x_hat, [1, 0, 2, 3, 4])  # change first dim with sec dim
         self.x_hat_predict_frames = self.x_hat[:,self.context_frames-1:,:,:,:]
+