Skip to content
Snippets Groups Projects
Commit f169be9a authored by Bing Gong's avatar Bing Gong
Browse files

Update vanilla_convLSTM_model.py

parent f418c60b
No related branches found
No related tags found
No related merge requests found
Pipeline #44259 failed
...@@ -70,8 +70,13 @@ class VanillaConvLstmVideoPredictionModel(BaseVideoPredictionModel): ...@@ -70,8 +70,13 @@ class VanillaConvLstmVideoPredictionModel(BaseVideoPredictionModel):
#self.context_frames_loss = tf.reduce_mean( #self.context_frames_loss = tf.reduce_mean(
# tf.square(self.x[:, :self.context_frames, :, :, 0] - self.x_hat_context_frames[:, :, :, :, 0])) # tf.square(self.x[:, :self.context_frames, :, :, 0] - self.x_hat_context_frames[:, :, :, :, 0]))
# This is the loss function (RMSE): # This is the loss function (RMSE):
#This is loss function only for 1 channel (temperature RMSE)
self.total_loss = tf.reduce_mean( self.total_loss = tf.reduce_mean(
tf.square(self.x[:, self.context_frames:, :, :, 0] - self.x_hat_context_frames[:, (self.context_frames-1):-1, :, :, 0])) tf.square(self.x[:, self.context_frames:,:,:,0] - self.x_hat_predict_frames[:,:,:,:,0]))
#This is the loss for only all the channels(temperature, geo500, pressure)
#self.total_loss = tf.reduce_mean(
# tf.square(self.x[:, self.context_frames:,:,:,:] - self.x_hat_predict_frames[:,:,:,:,:]))
self.train_op = tf.train.AdamOptimizer( self.train_op = tf.train.AdamOptimizer(
learning_rate = self.learning_rate).minimize(self.total_loss, global_step = self.global_step) learning_rate = self.learning_rate).minimize(self.total_loss, global_step = self.global_step)
...@@ -88,35 +93,19 @@ class VanillaConvLstmVideoPredictionModel(BaseVideoPredictionModel): ...@@ -88,35 +93,19 @@ class VanillaConvLstmVideoPredictionModel(BaseVideoPredictionModel):
@staticmethod @staticmethod
def convLSTM_cell(inputs, hidden): def convLSTM_cell(inputs, hidden):
conv1 = ld.conv_layer(inputs, 3, 2, 8, "encode_1", activate = "leaky_relu") y_0 = inputs #we only usd patch 1, but the original paper use patch 4 for the moving mnist case, but use 2 for Radar Echo Dataset
conv2 = ld.conv_layer(conv1, 3, 1, 8, "encode_2", activate = "leaky_relu")
conv3 = ld.conv_layer(conv2, 3, 2, 8, "encode_3", activate = "leaky_relu")
y_0 = conv3
# conv lstm cell # conv lstm cell
cell_shape = y_0.get_shape().as_list() cell_shape = y_0.get_shape().as_list()
channels = cell_shape[-1]
with tf.variable_scope('conv_lstm', initializer = tf.random_uniform_initializer(-.01, 0.1)): with tf.variable_scope('conv_lstm', initializer = tf.random_uniform_initializer(-.01, 0.1)):
cell = BasicConvLSTMCell(shape = [cell_shape[1], cell_shape[2]], filter_size = [3, 3], num_features = 8) cell = BasicConvLSTMCell(shape = [cell_shape[1], cell_shape[2]], filter_size = [5, 5], num_features = 256)
if hidden is None: if hidden is None:
hidden = cell.zero_state(y_0, tf.float32) hidden = cell.zero_state(y_0, tf.float32)
output, hidden = cell(y_0, hidden) output, hidden = cell(y_0, hidden)
output_shape = output.get_shape().as_list() output_shape = output.get_shape().as_list()
z3 = tf.reshape(output, [-1, output_shape[1], output_shape[2], output_shape[3]]) z3 = tf.reshape(output, [-1, output_shape[1], output_shape[2], output_shape[3]])
#we feed the learn representation into a 1 × 1 convolutional layer to generate the final prediction
conv5 = ld.transpose_conv_layer(z3, 3, 2, 8, "decode_5", activate = "leaky_relu") x_hat = ld.conv_layer(z3, 1, 1, channels, "decode_1", activate="sigmoid")
conv6 = ld.transpose_conv_layer(conv5, 3, 1, 8, "decode_6", activate = "leaky_relu")
x_hat = ld.transpose_conv_layer(conv6, 3, 2, 3, "decode_7", activate = "sigmoid") # set activation to linear
return x_hat, hidden return x_hat, hidden
...@@ -124,20 +113,18 @@ class VanillaConvLstmVideoPredictionModel(BaseVideoPredictionModel): ...@@ -124,20 +113,18 @@ class VanillaConvLstmVideoPredictionModel(BaseVideoPredictionModel):
network_template = tf.make_template('network', network_template = tf.make_template('network',
VanillaConvLstmVideoPredictionModel.convLSTM_cell) # make the template to share the variables VanillaConvLstmVideoPredictionModel.convLSTM_cell) # make the template to share the variables
# create network # create network
x_hat_context = []
x_hat = [] x_hat = []
hidden = None
#This is for training
for i in range(self.sequence_length):
if i < self.context_frames:
x_1, hidden = network_template(self.x[:, i, :, :, :], hidden)
else:
x_1, hidden = network_template(x_1, hidden)
x_hat_context.append(x_1)
#This is for generating video # for i in range(self.sequence_length-1):
# if i < self.context_frames:
# x_1, hidden = network_template(self.x[:, i, :, :, :], hidden)
# else:
# x_1, hidden = network_template(x_1, hidden)
# x_hat_context.append(x_1)
#This is for training (optimization of convLSTM layer)
hidden_g = None hidden_g = None
for i in range(self.sequence_length): for i in range(self.sequence_length-1):
if i < self.context_frames: if i < self.context_frames:
x_1_g, hidden_g = network_template(self.x[:, i, :, :, :], hidden_g) x_1_g, hidden_g = network_template(self.x[:, i, :, :, :], hidden_g)
else: else:
...@@ -145,8 +132,6 @@ class VanillaConvLstmVideoPredictionModel(BaseVideoPredictionModel): ...@@ -145,8 +132,6 @@ class VanillaConvLstmVideoPredictionModel(BaseVideoPredictionModel):
x_hat.append(x_1_g) x_hat.append(x_1_g)
# pack them all together # pack them all together
x_hat_context = tf.stack(x_hat_context)
x_hat = tf.stack(x_hat) x_hat = tf.stack(x_hat)
self.x_hat_context_frames = tf.transpose(x_hat_context, [1, 0, 2, 3, 4]) # change first dim with sec dim
self.x_hat= tf.transpose(x_hat, [1, 0, 2, 3, 4]) # change first dim with sec dim self.x_hat= tf.transpose(x_hat, [1, 0, 2, 3, 4]) # change first dim with sec dim
self.x_hat_predict_frames = self.x_hat[:,self.context_frames:,:,:,:] self.x_hat_predict_frames = self.x_hat[:,self.context_frames-1:,:,:,:]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment