diff --git a/Zam347_scripts/DataPreprocess.sh b/Zam347_scripts/DataPreprocess.sh index 764b949433702053b0889671ebc36880f01c690b..69c93115ab6af16c2a3bdd4060322c0785d3fdc4 100755 --- a/Zam347_scripts/DataPreprocess.sh +++ b/Zam347_scripts/DataPreprocess.sh @@ -2,7 +2,7 @@ source_dir=/home/$USER/extractedData -destination_dir=/home/$USER/preprocessedData/era5-Y2015toY2017M01to12-128x160-74d00N71d00E-T_MSL_gph500/hickle +destination_dir=/home/$USER/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/hickle declare -a years=("2017") for year in "${years[@]}"; @@ -11,7 +11,7 @@ for year in "${years[@]}"; echo "source_dir ${source_dir}/${year}" mpirun -np 2 python ../../workflow_parallel_frame_prediction/DataPreprocess/mpi_stager_v2_process_netCDF.py \ --source_dir ${source_dir}/${year}/ \ - --destination_dir ${destination_dir}/${year}/ --vars T2 MSL gph500 --lat_s 74 --lat_e 202 --lon_s 550 --lon_e 710 + --destination_dir ${destination_dir}/${year}/ --vars T2 MSL gph500 --lat_s 138 --lat_e 202 --lon_s 646 --lon_e 710 done python ../../workflow_parallel_frame_prediction/DataPreprocess/mpi_split_data_multi_years.py --destination_dir ${destination_dir} diff --git a/Zam347_scripts/DataPreprocess_to_tf.sh b/Zam347_scripts/DataPreprocess_to_tf.sh index 608f95348b25c2169ae2963821639de8947c322b..0c41cf6cad72c1f758c3d977440f6591b63fde35 100755 --- a/Zam347_scripts/DataPreprocess_to_tf.sh +++ b/Zam347_scripts/DataPreprocess_to_tf.sh @@ -1,4 +1,4 @@ #!/bin/bash -x -python ../video_prediction/datasets/era5_dataset_v2.py /home/${USER}/preprocessedData/era5-Y2015toY2017M01to12-128x160-74d00N71d00E-T_MSL_gph500/hickle/splits/ /home/${USER}/preprocessedData/era5-Y2015toY2017M01to12-128x160-74d00N71d00E-T_MSL_gph500/tfrecords/ -vars T2 MSL gph500 -height 128 -width 160 -seq_length 20 +python ../video_prediction/datasets/era5_dataset_v2.py /home/${USER}/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/hickle/splits/ /home/${USER}/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/tfrecords/ -vars T2 MSL gph500 -height 64 -width 64 -seq_length 20 diff --git a/Zam347_scripts/generate_era5-out.out b/Zam347_scripts/generate_era5-out.out new file mode 100644 index 0000000000000000000000000000000000000000..e8924e4f78a5fa943026cda0b4322ef58c8fab16 --- /dev/null +++ b/Zam347_scripts/generate_era5-out.out @@ -0,0 +1,30 @@ +temporal_dir: /home/b.gong/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/hickle/splits/ +loading options from checkpoint /home/b.gong/models/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/vae +----------------------------------- Options ------------------------------------ +batch_size = 2 +checkpoint = /home/b.gong/models/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/vae +dataset = era5 +dataset_hparams = sequence_length=20 +fps = 4 +gif_length = None +gpu_mem_frac = 0 +input_dir = /home/b.gong/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/tfrecords +mode = test +model = vae +model_hparams = None +num_epochs = 1 +num_samples = None +num_stochastic_samples = 1 +output_gif_dir = /home/b.gong/results/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/vae +output_png_dir = /home/b.gong/results/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/vae +results_dir = /home/b.gong/results/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500 +results_gif_dir = /home/b.gong/results/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500 +results_png_dir = /home/b.gong/results/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500 +seed = 7 +------------------------------------- End -------------------------------------- +datset_class ERA5Dataset_v2 +FILENAMES ['/home/b.gong/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/tfrecords/test/sequence_0_to_1.tfrecords', '/home/b.gong/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/tfrecords/test/sequence_10_to_11.tfrecords', '/home/b.gong/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/tfrecords/test/sequence_12_to_13.tfrecords', '/home/b.gong/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/tfrecords/test/sequence_2_to_3.tfrecords', '/home/b.gong/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/tfrecords/test/sequence_4_to_5.tfrecords', '/home/b.gong/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/tfrecords/test/sequence_6_to_7.tfrecords', '/home/b.gong/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/tfrecords/test/sequence_8_to_9.tfrecords'] +files ['/home/b.gong/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/tfrecords/test/sequence_0_to_1.tfrecords', '/home/b.gong/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/tfrecords/test/sequence_10_to_11.tfrecords', '/home/b.gong/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/tfrecords/test/sequence_12_to_13.tfrecords', '/home/b.gong/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/tfrecords/test/sequence_2_to_3.tfrecords', '/home/b.gong/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/tfrecords/test/sequence_4_to_5.tfrecords', '/home/b.gong/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/tfrecords/test/sequence_6_to_7.tfrecords', '/home/b.gong/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/tfrecords/test/sequence_8_to_9.tfrecords'] +mode test +Parse features {'images/encoded': <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7f71880c6b00>, 'channels': <tf.Tensor 'ParseSingleExample/ParseSingleExample:3' shape=() dtype=int64>, 'height': <tf.Tensor 'ParseSingleExample/ParseSingleExample:4' shape=() dtype=int64>, 'sequence_length': <tf.Tensor 'ParseSingleExample/ParseSingleExample:5' shape=() dtype=int64>, 'width': <tf.Tensor 'ParseSingleExample/ParseSingleExample:6' shape=() dtype=int64>} +Image shape 20, 64,64,3 diff --git a/Zam347_scripts/generate_era5.sh b/Zam347_scripts/generate_era5.sh index 97a07d280248d09a754aa69450c883dc11375387..42c496bb89282348539ac8a27019d040505213fb 100755 --- a/Zam347_scripts/generate_era5.sh +++ b/Zam347_scripts/generate_era5.sh @@ -2,9 +2,9 @@ python -u ../scripts/generate_transfer_learning_finetune.py \ ---input_dir /home/${USER}/preprocessedData/era5-Y2015toY2017M01to12-128x160-74d00N71d00E-T_MSL_gph500/tfrecords \ ---dataset_hparams sequence_length=20 --checkpoint /home/${USER}/models/era5-Y2015toY2017M01to12-128x160-74d00N71d00E-T_MSL_gph500/ours_savp \ ---mode test --results_dir /home/${USER}/results/era5-Y2015toY2017M01to12-128x160-74d00N71d00E-T_MSL_gph500 \ +--input_dir /home/${USER}/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/tfrecords \ +--dataset_hparams sequence_length=20 --checkpoint /home/${USER}/models/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/vae \ +--mode test --results_dir /home/${USER}/results/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500 \ --batch_size 2 --dataset era5 > generate_era5-out.out #srun python scripts/train.py --input_dir data/era5 --dataset era5 --model savp --model_hparams_dict hparams/kth/ours_savp/model_hparams.json --output_dir logs/era5/ours_savp diff --git a/Zam347_scripts/train_vae.sh b/Zam347_scripts/train_vae.sh new file mode 100755 index 0000000000000000000000000000000000000000..b507d5c6afd266bc0d4b8a7dcfdd5e34cea495ab --- /dev/null +++ b/Zam347_scripts/train_vae.sh @@ -0,0 +1,6 @@ +#!/bin/bash -x + + + +python ../scripts/train_dummy.py --input_dir /home/${USER}/preprocessedData/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/tfrecords --dataset era5 --model vae --model_hparams_dict ../hparams/kth/ours_savp/model_hparams.json --output_dir /home/${USER}/models/era5-Y2015toY2017M01to12-64x64-74d00N71d00E-T_MSL_gph500/vae +#srun python scripts/train.py --input_dir data/era5 --dataset era5 --model savp --model_hparams_dict hparams/kth/ours_savp/model_hparams.json --output_dir logs/era5/ours_savp diff --git a/bash/workflow_era5_macOS.sh b/bash/workflow_era5_macOS.sh index 78b5101d810d9818cfb720154e9c42cc321f44a3..1a6ebef38df877b8ee20f628d4e375a20e7c8bd5 100755 --- a/bash/workflow_era5_macOS.sh +++ b/bash/workflow_era5_macOS.sh @@ -56,7 +56,7 @@ echo "=============================================================" # --input_dir ${DATA_EXTRA_DIR} --destination_dir ${DATA_PREPROCESS_DIR} #fi -#Change the .hkl data to .tfrecords files +####Change the .hkl data to .tfrecords files if [ -d "$DATA_PREPROCESS_TF_DIR" ] then echo "Step2: The Preprocessed Data (tf.records) exist" @@ -90,4 +90,4 @@ fi echo "Step4: Postprocessing start" python ./scripts/generate_transfer_learning_finetune.py --input_dir ${DATA_PREPROCESS_TF_DIR} \ --dataset_hparams sequence_length=20 --checkpoint ${CHECKPOINT_DIR} --mode test --results_dir ${RESULTS_OUTPUT_DIR} \ ---batch_size 4 --dataset era5 \ No newline at end of file +--batch_size 4 --dataset era5 diff --git a/bash/workflow_era5_zam347.sh b/bash/workflow_era5_zam347.sh new file mode 100755 index 0000000000000000000000000000000000000000..ffe7209b6099f4ad9f57b4e90247a7d7acaf009d --- /dev/null +++ b/bash/workflow_era5_zam347.sh @@ -0,0 +1,93 @@ +#!/usr/bin/env bash +set -e +# +#MODEL=savp +##train_mode: end_to_end, pre_trained +#TRAIN_MODE=end_to_end +#EXP_NAME=era5_size_64_64_3_3t_norm + +MODEL=$1 +TRAIN_MODE=$2 +EXP_NAME=$3 +RETRAIN=1 #if we continue training the model or using the existing end-to-end model, 1 means continue training, and 1 means use the existing one +DATA_ETL_DIR=/home/${USER}/ +DATA_ETL_DIR=/p/scratch/deepacf/${USER}/ +DATA_EXTRA_DIR=${DATA_ETL_DIR}/extractedData/${EXP_NAME} +DATA_PREPROCESS_DIR=${DATA_ETL_DIR}/preprocessedData/${EXP_NAME} +DATA_PREPROCESS_TF_DIR=./data/${EXP_NAME} +RESULTS_OUTPUT_DIR=./results_test_samples/${EXP_NAME}/${TRAIN_MODE}/ + +if [ $MODEL==savp ]; then + method_dir=ours_savp +elif [ $MODEL==gan ]; then + method_dir=ours_gan +elif [ $MODEL==vae ]; then + method_dir=ours_vae +else + echo "model does not exist" 2>&1 + exit 1 +fi + +if [ "$TRAIN_MODE" == pre_trained ]; then + TRAIN_OUTPUT_DIR=./pretrained_models/kth/${method_dir} +else + TRAIN_OUTPUT_DIR=./logs/${EXP_NAME}/${TRAIN_MODE} +fi + +CHECKPOINT_DIR=${TRAIN_OUTPUT_DIR}/${method_dir} + +echo "===========================WORKFLOW SETUP====================" +echo "Model ${MODEL}" +echo "TRAIN MODE ${TRAIN_MODE}" +echo "Method_dir ${method_dir}" +echo "DATA_ETL_DIR ${DATA_ETL_DIR}" +echo "DATA_EXTRA_DIR ${DATA_EXTRA_DIR}" +echo "DATA_PREPROCESS_DIR ${DATA_PREPROCESS_DIR}" +echo "DATA_PREPROCESS_TF_DIR ${DATA_PREPROCESS_TF_DIR}" +echo "TRAIN_OUTPUT_DIR ${TRAIN_OUTPUT_DIR}" +echo "=============================================================" + +##############Datat Preprocessing################ +#To hkl data +#if [ -d "$DATA_PREPROCESS_DIR" ]; then +# echo "The Preprocessed Data (.hkl ) exist" +#else +# python ../workflow_video_prediction/DataPreprocess/benchmark/mpi_stager_v2_process_netCDF.py \ +# --input_dir ${DATA_EXTRA_DIR} --destination_dir ${DATA_PREPROCESS_DIR} +#fi + +####Change the .hkl data to .tfrecords files +if [ -d "$DATA_PREPROCESS_TF_DIR" ] +then + echo "Step2: The Preprocessed Data (tf.records) exist" +else + echo "Step2: start, hkl. files to tf.records" + python ./video_prediction/datasets/era5_dataset_v2.py --source_dir ${DATA_PREPROCESS_DIR}/splits \ + --destination_dir ${DATA_PREPROCESS_TF_DIR} + echo "Step2: finish" +fi + +#########Train########################## +if [ "$TRAIN_MODE" == "pre_trained" ]; then + echo "step3: Using kth pre_trained model" +elif [ "$TRAIN_MODE" == "end_to_end" ]; then + echo "step3: End-to-end training" + if [ "$RETRAIN" == 1 ]; then + echo "Using the existing end-to-end model" + else + echo "Training Starts " + python ./scripts/train_v2.py --input_dir $DATA_PREPROCESS_TF_DIR --dataset era5 \ + --model ${MODEL} --model_hparams_dict hparams/kth/${method_dir}/model_hparams.json \ + --output_dir ${TRAIN_OUTPUT_DIR} --checkpoint ${CHECKPOINT_DIR} + echo "Training ends " + fi +else + echo "TRAIN_MODE is end_to_end or pre_trained" + exit 1 +fi + +#########Generate results################# +echo "Step4: Postprocessing start" +python ./scripts/generate_transfer_learning_finetune.py --input_dir ${DATA_PREPROCESS_TF_DIR} \ +--dataset_hparams sequence_length=20 --checkpoint ${CHECKPOINT_DIR} --mode test --results_dir ${RESULTS_OUTPUT_DIR} \ +--batch_size 4 --dataset era5 diff --git a/env_setup/create_env_zam347.sh b/env_setup/create_env_zam347.sh index 04873d26ebc0d307d960ca3ebc134d76b193eba6..c2a7b4e2315514ec551f81063ac7f7087a360235 100755 --- a/env_setup/create_env_zam347.sh +++ b/env_setup/create_env_zam347.sh @@ -22,7 +22,7 @@ pip3 install mpi4py pip3 install netCDF4 pip3 install numpy pip3 install h5py -pip3 install tensorflow==1.13.1 +pip3 install tensorflow-gpu==1.14.0 #Copy the hickle package from bing's account #cp -r /p/project/deepacf/deeprain/bing/hickle ${WORKING_DIR} diff --git a/hparams/kth/ours_savp/model_hparams.json b/hparams/kth/ours_savp/model_hparams.json index 65c36514efbcc39ab4c3c1a15a58fc4895dc1744..66b41f87e3c0f417b492314060121a0bfd01c8f9 100644 --- a/hparams/kth/ours_savp/model_hparams.json +++ b/hparams/kth/ours_savp/model_hparams.json @@ -11,6 +11,8 @@ "vae_gan_feature_cdist_weight": 10.0, "gan_feature_cdist_weight": 0.0, "state_weight": 0.0, - "nz": 32 + "nz": 32, + "max_steps":20 } + diff --git a/video_prediction/models/__init__.py b/video_prediction/models/__init__.py index d88b573127a1956b0532c8b3a8b1abc15010eb30..ea1fa77f821827b61c3c2cbfa362014c1da20faf 100644 --- a/video_prediction/models/__init__.py +++ b/video_prediction/models/__init__.py @@ -7,7 +7,7 @@ from .savp_model import SAVPVideoPredictionModel from .dna_model import DNAVideoPredictionModel from .sna_model import SNAVideoPredictionModel from .sv2p_model import SV2PVideoPredictionModel - +from .vanilla_vae_model import VanillaVAEVideoPredictionModel def get_model_class(model): model_mappings = { @@ -17,6 +17,7 @@ def get_model_class(model): 'dna': 'DNAVideoPredictionModel', 'sna': 'SNAVideoPredictionModel', 'sv2p': 'SV2PVideoPredictionModel', + 'vae': 'VanillaVAEVideoPredictionModel', } model_class = model_mappings.get(model, model) model_class = globals().get(model_class) diff --git a/video_prediction/models/savp_model.py b/video_prediction/models/savp_model.py index 63776e1cac0d6cbb4efa9247647f7d8e557a74c3..ca8acd3f32a5ea1772c9fbf36003149acfdcb950 100644 --- a/video_prediction/models/savp_model.py +++ b/video_prediction/models/savp_model.py @@ -990,4 +990,4 @@ def _maybe_tile_concat_layer(conv2d_layer): outputs = conv2d_layer(inputs, out_channels, *args, **kwargs) return outputs - return layer \ No newline at end of file + return layer