diff --git a/.gitignore b/.gitignore index eb06f09d1853c073da780dc5d14b6e86cfcbfc16..277f76853b32137a2b20b3a8c75c10508b56d96e 100644 --- a/.gitignore +++ b/.gitignore @@ -71,6 +71,9 @@ target/ # Jupyter Notebook .ipynb_checkpoints +# singularity containers +*.sif + # pyenv .python-version diff --git a/video_prediction_tools/HPC_scripts/data_extraction_era5_template.sh b/video_prediction_tools/HPC_scripts/data_extraction_era5_template.sh index 661eaccfb87ceaed3ed622014d1f471dccbc8855..f856eb55e47eb89fa9dbdba96e78dbe050ecdfab 100644 --- a/video_prediction_tools/HPC_scripts/data_extraction_era5_template.sh +++ b/video_prediction_tools/HPC_scripts/data_extraction_era5_template.sh @@ -24,7 +24,7 @@ jutil env activate -p deepacf VIRT_ENV_NAME="my_venv" # Loading mouldes -source ../env_setup/modules_data_extraction.sh +source ../env_setup/modules_preprocess+extract.sh # Activate virtual environment if needed (and possible) if [ -z ${VIRTUAL_ENV} ]; then if [[ -f ../${VIRT_ENV_NAME}/bin/activate ]]; then diff --git a/video_prediction_tools/HPC_scripts/preprocess_data_era5_step1_template.sh b/video_prediction_tools/HPC_scripts/preprocess_data_era5_step1_template.sh index 9576eafc8bb63f1e5885dba68808c7208c02f1ce..a6da4643636ab7997a72cfc81f9311de6d7e8527 100644 --- a/video_prediction_tools/HPC_scripts/preprocess_data_era5_step1_template.sh +++ b/video_prediction_tools/HPC_scripts/preprocess_data_era5_step1_template.sh @@ -32,7 +32,7 @@ if [ -z ${VIRTUAL_ENV} ]; then fi fi # Loading mouldes -source ../env_setup/modules_preprocess.sh +source ../env_setup/modules_preprocess+extract.sh # select years and variables for dataset and define target domain diff --git a/video_prediction_tools/HPC_scripts/preprocess_data_era5_step2_template.sh b/video_prediction_tools/HPC_scripts/preprocess_data_era5_step2_template.sh index 10133fce974952a72871f3cc0ccb852829cc8512..e0440dff5ab507f0ba475485781ab63283f8f4dc 100644 --- a/video_prediction_tools/HPC_scripts/preprocess_data_era5_step2_template.sh +++ b/video_prediction_tools/HPC_scripts/preprocess_data_era5_step2_template.sh @@ -17,22 +17,29 @@ echo "Do not run the template scripts" exit 99 ######### Template identifier (don't remove) ######### -# Name of virtual environment +# auxiliary variables +WORK_DIR="$(pwd)" +BASE_DIR=$(dirname "$WORK_DIR") +# Name of virtual environment VIRT_ENV_NAME="my_venv" +# Name of container image (must be available in working directory) +CONTAINER_IMG="${WORK_DIR}/tensorflow_21.09-tf1-py3.sif" +WRAPPER="${BASE_DIR}/env_setup/wrapper_container.sh" -# Loading mouldes -source ../env_setup/modules_train.sh -# Activate virtual environment if needed (and possible) -if [ -z ${VIRTUAL_ENV} ]; then - if [[ -f ../${VIRT_ENV_NAME}/bin/activate ]]; then - echo "Activating virtual environment..." - source ../${VIRT_ENV_NAME}/bin/activate - else - echo "ERROR: Requested virtual environment ${VIRT_ENV_NAME} not found..." - exit 1 - fi +# sanity checks +if [[ ! -f ${CONTAINER_IMG} ]]; then + echo "ERROR: Cannot find required TF1.15 container image '${CONTAINER_IMG}'." + exit 1 fi +if [[ ! -f ${WRAPPER} ]]; then + echo "ERROR: Cannot find wrapper-script '${WRAPPER}' for TF1.15 container image." + exit 1 +fi + +# clean-up modules to avoid conflicts between host and container settings +module purge + # declare directory-variables which will be modified by config_runscript.py source_dir=/my/path/to/pkl/files/ destination_dir=/my/path/to/tfrecords/files @@ -40,6 +47,11 @@ destination_dir=/my/path/to/tfrecords/files sequence_length=20 sequences_per_file=10 # run Preprocessing (step 2 where Tf-records are generated) -srun python ../main_scripts/main_preprocess_data_step2.py -source_dir ${source_dir} -dest_dir ${destination_dir} \ - -sequence_length ${sequence_length} -sequences_per_file ${sequences_per_file} +# run postprocessing/generation of model results including evaluation metrics +export CUDA_VISIBLE_DEVICES=0 +## One node, single GPU +srun --mpi=pspmix --cpu-bind=none \ + singularity exec --nv "${CONTAINER_IMG}" "${WRAPPER}" ${VIRT_ENV_NAME} \ + python3 ../main_scripts/main_preprocess_data_step2.py -source_dir ${source_dir} -dest_dir ${destination_dir} \ + -sequence_length ${sequence_length} -sequences_per_file ${sequences_per_file} diff --git a/video_prediction_tools/HPC_scripts/preprocess_data_moving_mnist_template.sh b/video_prediction_tools/HPC_scripts/preprocess_data_moving_mnist_template.sh index cc63f0dd9c4477f51e058907cba508ebbcd5c5d1..fba90ba9caec50822503a735751b5342ef3398fb 100644 --- a/video_prediction_tools/HPC_scripts/preprocess_data_moving_mnist_template.sh +++ b/video_prediction_tools/HPC_scripts/preprocess_data_moving_mnist_template.sh @@ -37,4 +37,9 @@ source_dir=/my/path/to/mnist/raw/data/ destination_dir=/my/path/to/mnist/tfrecords/ # run Preprocessing (step 2 where Tf-records are generated) -srun python ../video_prediction/datasets/moving_mnist.py ${source_dir} ${destination_dir} +# run postprocessing/generation of model results including evaluation metrics +export CUDA_VISIBLE_DEVICES=0 +## One node, single GPU +srun --mpi=pspmix --cpu-bind=none \ + singularity exec --nv "${CONTAINER_IMG}" "${WRAPPER}" ${VIRT_ENV_NAME} \ + python3 ../video_prediction/datasets/moving_mnist.py ${source_dir} ${destination_dir} diff --git a/video_prediction_tools/HPC_scripts/train_model_era5_template.sh b/video_prediction_tools/HPC_scripts/train_model_era5_template.sh old mode 100644 new mode 100755 index 9c03ae7adde3886fd7e005fec5b17b4c7da84dd9..8d9d7d0e8780cc5152f8e22106b878caa6ee8e83 --- a/video_prediction_tools/HPC_scripts/train_model_era5_template.sh +++ b/video_prediction_tools/HPC_scripts/train_model_era5_template.sh @@ -2,52 +2,56 @@ #SBATCH --account=deepacf #SBATCH --nodes=1 #SBATCH --ntasks=1 -##SBATCH --ntasks-per-node=1 -#SBATCH --cpus-per-task=1 -#SBATCH --output=train_era5-out.%j -#SBATCH --error=train_era5-err.%j -#SBATCH --time=20:00:00 +#SBATCH --output=train_model_era5-out.%j +#SBATCH --error=train_model_era5-err.%j +#SBATCH --time=24:00:00 #SBATCH --gres=gpu:1 -#SBATCH --partition=gpus +#SBATCH --partition=some_partition #SBATCH --mail-type=ALL -#SBATCH --mail-user=b.gong@fz-juelich.de -##jutil env activate -p cjjsc42 +#SBATCH --mail-user=me@somewhere.com ######### Template identifier (don't remove) ######### echo "Do not run the template scripts" exit 99 ######### Template identifier (don't remove) ######### -# Name of virtual environment +# auxiliary variables +WORK_DIR="$(pwd)" +BASE_DIR=$(dirname "$WORK_DIR") +# Name of virtual environment VIRT_ENV_NAME="my_venv" +# Name of container image (must be available in working directory) +CONTAINER_IMG="${WORK_DIR}/tensorflow_21.09-tf1-py3.sif" +WRAPPER="${BASE_DIR}/env_setup/wrapper_container.sh" -# Loading mouldes -source ../env_setup/modules_train.sh -# Activate virtual environment if needed (and possible) -if [ -z ${VIRTUAL_ENV} ]; then - if [[ -f ../${VIRT_ENV_NAME}/bin/activate ]]; then - echo "Activating virtual environment..." - source ../${VIRT_ENV_NAME}/bin/activate - else - echo "ERROR: Requested virtual environment ${VIRT_ENV_NAME} not found..." - exit 1 - fi +# sanity checks +if [[ ! -f ${CONTAINER_IMG} ]]; then + echo "ERROR: Cannot find required TF1.15 container image '${CONTAINER_IMG}'." + exit 1 fi -# declare directory-variables which will be modified by config_runscript.py -source_dir=/p/project/deepacf/deeprain/video_prediction_shared_folder/preprocessedData/ -destination_dir=/p/project/deepacf/deeprain/video_prediction_shared_folder/models/ +if [[ ! -f ${WRAPPER} ]]; then + echo "ERROR: Cannot find wrapper-script '${WRAPPER}' for TF1.15 container image." + exit 1 +fi + +# clean-up modules to avoid conflicts between host and container settings +module purge + +# declare directory-variables which will be modified by generate_runscript.py +source_dir=/my/path/to/tfrecords/files +destination_dir=/my/model/output/path # valid identifiers for model-argument are: convLSTM, savp, mcnet and vae -# the destination_dir_full cannot end up with "/", this will cause to save all the checkpoints issue in the results_dir model=convLSTM -datasplit_dict=../data_split/cv_test.json +datasplit_dict=${destination_dir}/data_split.json model_hparams=${destination_dir}/model_hparams.json -dataset=era5 -#If you train savp, Please uncomment the following CUDA configuration -#CUDA_VISIBLE_DEVICES=1 +# run training in container +export CUDA_VISIBLE_DEVICES=0 +## One node, single GPU +srun --mpi=pspmix --cpu-bind=none \ + singularity exec --nv "${CONTAINER_IMG}" "${WRAPPER}" ${VIRT_ENV_NAME} \ + python3 "${BASE_DIR}"/main_scripts/main_train_models.py --input_dir ${source_dir} --datasplit_dict ${datasplit_dict} \ + --dataset era5 --model ${model} --model_hparams_dict ${model_hparams} --output_dir ${destination_dir}/ -# run training -srun python ../main_scripts/main_train_models.py --input_dir ${source_dir} --datasplit_dict ${datasplit_dict} \ - --dataset ${dataset} --model ${model} --model_hparams_dict ${model_hparams} --output_dir ${destination_dir} --checkpoint ${destination_dir} diff --git a/video_prediction_tools/HPC_scripts/train_model_moving_mnist_template.sh b/video_prediction_tools/HPC_scripts/train_model_moving_mnist_template.sh index 05d4bdb5e55ad5c6d12fbde7d7f5299c50478701..0f25f6906d63918d376d697fbec98eadfb1ad9a0 100755 --- a/video_prediction_tools/HPC_scripts/train_model_moving_mnist_template.sh +++ b/video_prediction_tools/HPC_scripts/train_model_moving_mnist_template.sh @@ -18,22 +18,28 @@ echo "Do not run the template scripts" exit 99 ######### Template identifier (don't remove) ######### -# Name of virtual environment +# auxiliary variables +WORK_DIR=`pwd` +BASE_DIR=$(dirname "$WORK_DIR") +# Name of virtual environment VIRT_ENV_NAME="my_venv" +# Name of container image (must be available in working directory) +CONTAINER_IMG="${WORK_DIR}/tensorflow_21.09-tf1-py3.sif" +WRAPPER="${BASE_DIR}/env_setup/wrapper_container.sh" + +# sanity checks +if [[ ! -f ${CONTAINER_IMG} ]]; then + echo "ERROR: Cannot find required TF1.15 container image '${CONTAINER_IMG}'." + exit 1 +fi -# Loading mouldes -source ../env_setup/modules_train.sh -# Activate virtual environment if needed (and possible) -if [ -z ${VIRTUAL_ENV} ]; then - if [[ -f ../${VIRT_ENV_NAME}/bin/activate ]]; then - echo "Activating virtual environment..." - source ../${VIRT_ENV_NAME}/bin/activate - else - echo "ERROR: Requested virtual environment ${VIRT_ENV_NAME} not found..." - exit 1 - fi +if [[ ! -f ${WRAPPER} ]]; then + echo "ERROR: Cannot find wrapper-script '${WRAPPER}' for TF1.15 container image." + exit 1 fi +# clean-up modules to avoid conflicts between host and container settings +module purge # declare directory-variables which will be modified appropriately during Preprocessing (invoked by mpi_split_data_multi_years.py) @@ -47,5 +53,10 @@ model_hparams=../hparams/${dataset}/${model}/model_hparams.json destination_dir=${destination_dir}/${model}/"$(date +"%Y%m%dT%H%M")_"$USER"" # rund training - -srun python ../scripts/train_dummy.py --input_dir ${source_dir}/tfrecords/ --dataset moving_mnist --model ${model} --model_hparams_dict ${model_hparams} --output_dir ${destination_dir}/ +# run training in container +export CUDA_VISIBLE_DEVICES=0 +## One node, single GPU +srun --mpi=pspmix --cpu-bind=none \ + singularity exec --nv "${CONTAINER_IMG}" "${WRAPPER}" ${VIRT_ENV_NAME} \ + python ../main_scripts/train.py --input_dir ${source_dir}/tfrecords/ --dataset ${dataset} --model ${model} \ + --model_hparams_dict ${model_hparams} --output_dir "${destination_dir}"/ diff --git a/video_prediction_tools/HPC_scripts/visualize_postprocess_era5_template.sh b/video_prediction_tools/HPC_scripts/visualize_postprocess_era5_template.sh index a29b8e1b0f297dc986c5633a45857c590d37b514..be3e67c03f8384de39e9d193ad206e44695282df 100644 --- a/video_prediction_tools/HPC_scripts/visualize_postprocess_era5_template.sh +++ b/video_prediction_tools/HPC_scripts/visualize_postprocess_era5_template.sh @@ -10,42 +10,49 @@ #SBATCH --gres=gpu:1 #SBATCH --partition=gpus #SBATCH --mail-type=ALL -#SBATCH --mail-user=b.gong@fz-juelich.de -##jutil env activate -p cjjsc42 +#SBATCH --mail-user=me@somewhere.com ######### Template identifier (don't remove) ######### echo "Do not run the template scripts" exit 99 ######### Template identifier (don't remove) ######### +# auxiliary variables +WORK_DIR="$(pwd)" +BASE_DIR=$(dirname "$WORK_DIR") # Name of virtual environment VIRT_ENV_NAME="my_venv" +# Name of container image (must be available in working directory) +CONTAINER_IMG="${WORK_DIR}/tensorflow_21.09-tf1-py3.sif" +WRAPPER="${BASE_DIR}/env_setup/wrapper_container.sh" -# Loading modules -source ../env_setup/modules_postprocess.sh -# Activate virtual environment if needed (and possible) -if [ -z ${VIRTUAL_ENV} ]; then - if [[ -f ../${VIRT_ENV_NAME}/bin/activate ]]; then - echo "Activating virtual environment..." - source ../${VIRT_ENV_NAME}/bin/activate - else - echo "ERROR: Requested virtual environment ${VIRT_ENV_NAME} not found..." - exit 1 - fi +# sanity checks +if [[ ! -f ${CONTAINER_IMG} ]]; then + echo "ERROR: Cannot find required TF1.15 container image '${CONTAINER_IMG}'." + exit 1 fi -# declare directory-variables which will be modified by config_runscript.py +if [[ ! -f ${WRAPPER} ]]; then + echo "ERROR: Cannot find wrapper-script '${WRAPPER}' for TF1.15 container image." + exit 1 +fi + +# clean-up modules to avoid conflicts between host and container settings +module purge + +# declare directory-variables which will be modified by generate_runscript.py # Note: source_dir is only needed for retrieving the base-directory source_dir=/my/source/dir/ checkpoint_dir=/my/trained/model/dir results_dir=/my/results/dir lquick="" -# name of model -model=convLSTM - # run postprocessing/generation of model results including evaluation metrics -srun python -u ../main_scripts/main_visualize_postprocess.py --checkpoint ${checkpoint_dir} --mode test \ - --results_dir ${results_dir} --batch_size 4 \ - --num_stochastic_samples 1 ${lquick} \ - > postprocess_era5-out_all.${SLURM_JOB_ID} +export CUDA_VISIBLE_DEVICES=0 +## One node, single GPU +srun --mpi=pspmix --cpu-bind=none \ + singularity exec --nv "${CONTAINER_IMG}" "${WRAPPER}" ${VIRT_ENV_NAME} \ + python3 ../main_scripts/main_visualize_postprocess.py --checkpoint ${checkpoint_dir} --mode test \ + --results_dir ${results_dir} --batch_size 4 \ + --num_stochastic_samples 1 ${lquick} \ + > postprocess_era5-out_all."${SLURM_JOB_ID}" diff --git a/video_prediction_tools/HPC_scripts/visualize_postprocess_moving_mnist_template.sh b/video_prediction_tools/HPC_scripts/visualize_postprocess_moving_mnist_template.sh index 4bc46af7b83e6d9bc156543e8e899ff3bdbc07d7..c57beecc13959eb2dd28654f5289f0c0b122a71c 100755 --- a/video_prediction_tools/HPC_scripts/visualize_postprocess_moving_mnist_template.sh +++ b/video_prediction_tools/HPC_scripts/visualize_postprocess_moving_mnist_template.sh @@ -18,22 +18,29 @@ echo "Do not run the template scripts" exit 99 ######### Template identifier (don't remove) ######### -# Name of virtual environment +# auxiliary variables +WORK_DIR="$(pwd)" +BASE_DIR=$(dirname "$WORK_DIR") +# Name of virtual environment VIRT_ENV_NAME="my_venv" +# Name of container image (must be available in working directory) +CONTAINER_IMG="${WORK_DIR}/tensorflow_21.09-tf1-py3.sif" +WRAPPER="${BASE_DIR}/env_setup/wrapper_container.sh" -# Loading modules -source ../env_setup/modules_postprocess.sh -# Activate virtual environment if needed (and possible) -if [ -z ${VIRTUAL_ENV} ]; then - if [[ -f ../${VIRT_ENV_NAME}/bin/activate ]]; then - echo "Activating virtual environment..." - source ../${VIRT_ENV_NAME}/bin/activate - else - echo "ERROR: Requested virtual environment ${VIRT_ENV_NAME} not found..." - exit 1 - fi +# sanity checks +if [[ ! -f ${CONTAINER_IMG} ]]; then + echo "ERROR: Cannot find required TF1.15 container image '${CONTAINER_IMG}'." + exit 1 fi +if [[ ! -f ${WRAPPER} ]]; then + echo "ERROR: Cannot find wrapper-script '${WRAPPER}' for TF1.15 container image." + exit 1 +fi + +# clean-up modules to avoid conflicts between host and container settings +module purge + # declare directory-variables which will be modified by config_runscript.py source_dir=/p/project/deepacf/deeprain/video_prediction_shared_folder/preprocessedData/moving_mnist checkpoint_dir=/p/project/deepacf/deeprain/video_prediction_shared_folder/models/moving_mnist @@ -42,7 +49,11 @@ results_dir=/p/project/deepacf/deeprain/video_prediction_shared_folder/results/m model=convLSTM # run postprocessing/generation of model results including evaluation metrics -srun python -u ../scripts/generate_movingmnist.py \ ---input_dir ${source_dir}/ --dataset_hparams sequence_length=20 --checkpoint ${checkpoint_dir}/${model} \ ---mode test --model ${model} --results_dir ${results_dir}/${model} --batch_size 2 --dataset era5 > generate_era5-out.out +export CUDA_VISIBLE_DEVICES=0 +## One node, single GPU +srun --mpi=pspmix --cpu-bind=none \ + singularity exec --nv "${CONTAINER_IMG}" "${WRAPPER}" ${VIRT_ENV_NAME} \ + python3 ../scripts/generate_movingmnist.py --input_dir ${source_dir}/ --dataset_hparams sequence_length=20 \ + --checkpoint ${checkpoint_dir}/${model} --mode test --model ${model} --results_dir ${results_dir}/${model} \ + --batch_size 2 --dataset era5 > generate_era5-out."${SLURM_JOB_ID}" diff --git a/video_prediction_tools/deprecate/create_env_zam347.sh b/video_prediction_tools/deprecated/create_env_zam347.sh similarity index 100% rename from video_prediction_tools/deprecate/create_env_zam347.sh rename to video_prediction_tools/deprecated/create_env_zam347.sh diff --git a/video_prediction_tools/deprecate/datasets/Download_ERA5_Variable.py b/video_prediction_tools/deprecated/datasets/Download_ERA5_Variable.py similarity index 100% rename from video_prediction_tools/deprecate/datasets/Download_ERA5_Variable.py rename to video_prediction_tools/deprecated/datasets/Download_ERA5_Variable.py diff --git a/video_prediction_tools/deprecate/datasets/extract_data/era5_dataset_v2_anomaly.py b/video_prediction_tools/deprecated/datasets/extract_data/era5_dataset_v2_anomaly.py similarity index 100% rename from video_prediction_tools/deprecate/datasets/extract_data/era5_dataset_v2_anomaly.py rename to video_prediction_tools/deprecated/datasets/extract_data/era5_dataset_v2_anomaly.py diff --git a/video_prediction_tools/deprecate/datasets/extract_data/extract_era5.py b/video_prediction_tools/deprecated/datasets/extract_data/extract_era5.py similarity index 100% rename from video_prediction_tools/deprecate/datasets/extract_data/extract_era5.py rename to video_prediction_tools/deprecated/datasets/extract_data/extract_era5.py diff --git a/video_prediction_tools/deprecate/helper/helper.py b/video_prediction_tools/deprecated/helper/helper.py similarity index 100% rename from video_prediction_tools/deprecate/helper/helper.py rename to video_prediction_tools/deprecated/helper/helper.py diff --git a/video_prediction_tools/deprecate/model_modules/sna_model.py b/video_prediction_tools/deprecated/model_modules/sna_model.py similarity index 100% rename from video_prediction_tools/deprecate/model_modules/sna_model.py rename to video_prediction_tools/deprecated/model_modules/sna_model.py diff --git a/video_prediction_tools/deprecate/model_modules/sv2p_model.py b/video_prediction_tools/deprecated/model_modules/sv2p_model.py similarity index 100% rename from video_prediction_tools/deprecate/model_modules/sv2p_model.py rename to video_prediction_tools/deprecated/model_modules/sv2p_model.py diff --git a/video_prediction_tools/deprecate/model_modules/vanilla_GAN_model.py b/video_prediction_tools/deprecated/model_modules/vanilla_GAN_model.py similarity index 100% rename from video_prediction_tools/deprecate/model_modules/vanilla_GAN_model.py rename to video_prediction_tools/deprecated/model_modules/vanilla_GAN_model.py diff --git a/video_prediction_tools/env_setup/modules_postprocess.sh b/video_prediction_tools/deprecated/modules_postprocess.sh similarity index 100% rename from video_prediction_tools/env_setup/modules_postprocess.sh rename to video_prediction_tools/deprecated/modules_postprocess.sh diff --git a/video_prediction_tools/env_setup/modules_train.sh b/video_prediction_tools/deprecated/modules_train.sh old mode 100644 new mode 100755 similarity index 100% rename from video_prediction_tools/env_setup/modules_train.sh rename to video_prediction_tools/deprecated/modules_train.sh diff --git a/video_prediction_tools/deprecate/pretrained_models/download_model.sh b/video_prediction_tools/deprecated/pretrained_models/download_model.sh similarity index 100% rename from video_prediction_tools/deprecate/pretrained_models/download_model.sh rename to video_prediction_tools/deprecated/pretrained_models/download_model.sh diff --git a/video_prediction_tools/deprecate/scripts/combine_results.py b/video_prediction_tools/deprecated/scripts/combine_results.py similarity index 100% rename from video_prediction_tools/deprecate/scripts/combine_results.py rename to video_prediction_tools/deprecated/scripts/combine_results.py diff --git a/video_prediction_tools/deprecate/scripts/evaluate.py b/video_prediction_tools/deprecated/scripts/evaluate.py similarity index 100% rename from video_prediction_tools/deprecate/scripts/evaluate.py rename to video_prediction_tools/deprecated/scripts/evaluate.py diff --git a/video_prediction_tools/deprecate/scripts/evaluate_all.sh b/video_prediction_tools/deprecated/scripts/evaluate_all.sh similarity index 100% rename from video_prediction_tools/deprecate/scripts/evaluate_all.sh rename to video_prediction_tools/deprecated/scripts/evaluate_all.sh diff --git a/video_prediction_tools/deprecate/scripts/generate_all.sh b/video_prediction_tools/deprecated/scripts/generate_all.sh similarity index 100% rename from video_prediction_tools/deprecate/scripts/generate_all.sh rename to video_prediction_tools/deprecated/scripts/generate_all.sh diff --git a/video_prediction_tools/deprecate/scripts/generate_orig.py b/video_prediction_tools/deprecated/scripts/generate_orig.py similarity index 100% rename from video_prediction_tools/deprecate/scripts/generate_orig.py rename to video_prediction_tools/deprecated/scripts/generate_orig.py diff --git a/video_prediction_tools/deprecate/scripts/plot_results.py b/video_prediction_tools/deprecated/scripts/plot_results.py similarity index 100% rename from video_prediction_tools/deprecate/scripts/plot_results.py rename to video_prediction_tools/deprecated/scripts/plot_results.py diff --git a/video_prediction_tools/deprecate/scripts/plot_results_all.sh b/video_prediction_tools/deprecated/scripts/plot_results_all.sh similarity index 100% rename from video_prediction_tools/deprecate/scripts/plot_results_all.sh rename to video_prediction_tools/deprecated/scripts/plot_results_all.sh diff --git a/video_prediction_tools/deprecate/scripts/train.py b/video_prediction_tools/deprecated/scripts/train.py similarity index 100% rename from video_prediction_tools/deprecate/scripts/train.py rename to video_prediction_tools/deprecated/scripts/train.py diff --git a/video_prediction_tools/deprecate/scripts/train_all.sh b/video_prediction_tools/deprecated/scripts/train_all.sh similarity index 100% rename from video_prediction_tools/deprecate/scripts/train_all.sh rename to video_prediction_tools/deprecated/scripts/train_all.sh diff --git a/video_prediction_tools/env_setup/create_env.sh b/video_prediction_tools/env_setup/create_env.sh index 750abfb9a8a95db6c6f115e9ad3c14c81320b109..574a57e33b90e453d48d47ec4c013ca766320bde 100755 --- a/video_prediction_tools/env_setup/create_env.sh +++ b/video_prediction_tools/env_setup/create_env.sh @@ -2,13 +2,13 @@ # # __authors__ = Bing Gong, Michael Langguth # __date__ = '2020_01_15' -# __last_update__ = '2021_04_28' by Michael Langguth +# __last_update__ = '2021_10_28' by Michael Langguth # # **************** Description **************** # This script can be used for setting up the virtual environment needed for AMBS-project # The name of the virtual environment is controlled by the first parsed argument. # It also setups the (Batch) runscript templates to customized runscripts (to be used by generate_runscript.py) -# Note that the basic output directory for the workflow may be set may passing a path as second argument to this script. +# Note that the basic output directory for the workflow may be set may parsing -base_dir [my_dir]. # **************** Description **************** # # **************** Auxiliary functions **************** @@ -17,67 +17,51 @@ check_argin() { # Can also be used to check for non-positional arguments (such as -exp_id=*, see commented lines) for argin in "$@"; do if [[ $argin == *"-base_dir="* ]]; then - base_dir=${argin#"-base_dir="} - fi - if [[ $argin == *"-lcontainer"* ]]; then - bool_container=1 + base_outdir=${argin#"-base_dir="} fi done - if [[ -z "${bool_container}" ]]; then - bool_container=0 - fi } + # **************** Auxiliary functions **************** # **************** Actual script **************** # some first sanity checks -if [[ ${BASH_SOURCE[0]} == ${0} ]]; then +if [[ ${BASH_SOURCE[0]} == "${0}" ]]; then echo "ERROR: 'create_env.sh' must be sourced, i.e. execute by prompting 'source create_env.sh [virt_env_name]'" exit 1 fi # from now on, just return if something unexpected occurs instead of exiting # as the latter would close the terminal including logging out -if [[ ! -n "$1" ]]; then +if [[ -z "$1" ]]; then echo "ERROR: Provide a name to set up the virtual environment, i.e. execute by prompting 'source create_env.sh [virt_env_name]" return fi if [[ "$#" -gt 1 ]]; then - check_argin ${@:2} # sets base_dir if provided, always sets l_container + check_argin ${@:2} # sets base_outdir if provided fi # set some variables -HOST_NAME=`hostname` +HOST_NAME="$(hostname)" ENV_NAME=$1 -ENV_SETUP_DIR=`pwd` -WORKING_DIR="$(dirname "$ENV_SETUP_DIR")" -EXE_DIR="$(basename "$ENV_SETUP_DIR")" -ENV_DIR=${WORKING_DIR}/${ENV_NAME} +THIS_DIR="$(pwd)" +WORKING_DIR="$(dirname "$THIS_DIR")" +EXE_DIR="$(basename "$THIS_DIR")" +ENV_DIR=${WORKING_DIR}/virtual_envs/${ENV_NAME} +TF_CONTAINER=${WORKING_DIR}/HPC_scripts/tensorflow_21.09-tf1-py3.sif ## perform sanity checks -# correct bool_container if host is Juwels Booster and ensure running singularity -if [[ "${bool_container}" == 0 ]] && [[ "${HOST_NAME}" == jwlogin2[1-4]* ]]; then - echo "******************************************** NOTE ********************************************" - echo " Running on Juwels Booster is only possible inside a container environment. " - echo "******************************************** NOTE ********************************************" - bool_container=1 -fi -if [[ "${bool_container}" == 1 ]]; then - echo "******************************************** NOTE ********************************************" - echo " Make use of dedicated Horovod-related working branches only!!! " - echo "******************************************** NOTE ********************************************" - # Check if singularity is running - if [[ -z "${SINGULARITY_NAME}" ]]; then - echo "ERROR: create_env.sh must be executed in a running singularity on Juwels in conjuction with container-usage." - echo "Thus, execute 'singularity shell [my_docker_image]' first!" - return - fi +modules_purge="" +if [[ ! -f ${TF_CONTAINER} ]]; then + echo "ERROR: Cannot find required TF1.15 container image '${TF_CONTAINER}'." + return fi # further sanity checks: # * ensure execution from env_setup-directory +# * check host # * check if virtual env has already been set up if [[ "${EXE_DIR}" != "env_setup" ]]; then @@ -85,143 +69,47 @@ if [[ "${EXE_DIR}" != "env_setup" ]]; then return fi -if [[ -d ${ENV_DIR} ]]; then - echo "Virtual environment has already been set up under ${ENV_DIR}. The present virtual environment will be activated now." - echo "NOTE: If you wish to set up a new virtual environment, delete the existing one or provide a different name." - ENV_EXIST=1 -else - ENV_EXIST=0 -fi - -## check integratability of modules -if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *jwlogin* ]]; then - if [[ "${bool_container}" > 0 ]]; then - # on Juwels Booster, we are in a container environment -> loading modules is not possible - echo "***** Note for container environment! *****" - echo "Already checked the required modules?" - echo "To do so, run 'source modules_train.sh' after exiting the singularity." - echo "***** Note for container environment! *****" - else - # load modules and check for their availability - echo "***** Checking modules required during the workflow... *****" - source ${ENV_SETUP_DIR}/modules_preprocess.sh purge - source ${ENV_SETUP_DIR}/modules_train.sh purge - source ${ENV_SETUP_DIR}/modules_postprocess.sh - fi -else +if ! [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *jwlogin* ]]; then echo "ERROR: AMBS-workflow is currently only supported on the Juelich HPC-systems HDF-ML, Juwels and Juwels Booster" return # unset PYTHONPATH on every other machine that is not a known HPC-system # unset PYTHONPATH fi - -# Create fresh virtual environment or just activate the existing one -if [[ "$ENV_EXIST" == 0 ]]; then - # Check modules first - if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *juwels* ]]; then - if [[ "${HOST_NAME}" == jwlogin2[1-4]* ]]; then - # on Juwels Booster, we are in a container environment -> loading modules is not possible - echo "***** Note for Juwels Booster! *****" - echo "Already checked the required modules?" - echo "To do so, run 'source modules_train.sh' after exiting the singularity." - echo "***** Note for Juwels Booster! *****" - else - # load modules and check for their availability - echo "***** Checking modules required during the workflow... *****" - source ${ENV_SETUP_DIR}/modules_preprocess.sh purge - source ${ENV_SETUP_DIR}/modules_train.sh purge - source ${ENV_SETUP_DIR}/modules_postprocess.sh - fi - else - # unset PYTHONPATH on every other machine that is not a known HPC-system - unset PYTHONPATH - fi +if [[ -d ${ENV_DIR} ]]; then + echo "Virtual environment has already been set up under ${ENV_DIR}. The present virtual environment will be activated now." + echo "NOTE: If you wish to set up a new virtual environment, delete the existing one or provide a different name." + ENV_EXIST=1 +else + ENV_EXIST=0 fi -## set up virtual environment +## set up virtual environment if required if [[ "$ENV_EXIST" == 0 ]]; then # Activate virtual environment and install additional Python packages. echo "Configuring and activating virtual environment on ${HOST_NAME}" - - python3 -m venv $ENV_DIR - activate_virt_env=${ENV_DIR}/bin/activate - - echo "Entering virtual environment ${ENV_DIR} to install required Python modules..." - source ${activate_virt_env} + module purge + singularity exec --nv "${TF_CONTAINER}" ./install_venv_container.sh "${ENV_DIR}" - # install some requirements and/or check for modules - if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *jwlogin* ]]; then - # Install packages depending on host - echo "***** Start installing additional Python modules with pip... *****" - req_file=${ENV_SETUP_DIR}/requirements.txt - if [[ "${bool_container}" > 0 ]]; then req_file=${ENV_SETUP_DIR}/requirements_container.txt; fi - - pip3 install --no-cache-dir -r ${req_file} - else - echo "***** Start installing additional Python modules with pip... *****" - pip3 install --upgrade pip - pip3 install -r ${ENV_SETUP_DIR}/requirements.txt - pip3 install mpi4py - pip3 install netCDF4 - pip3 install numpy - pip3 install h5py - pip3 install tensorflow-gpu==1.13.1 - fi - - # expand PYTHONPATH... - export PYTHONPATH=${WORKING_DIR}:$PYTHONPATH >> ${activate_virt_env} - export PYTHONPATH=${WORKING_DIR}/utils:$PYTHONPATH >> ${activate_virt_env} - export PYTHONPATH=${WORKING_DIR}/external_package/lpips-tensorflow:$PYTHONPATH >> ${activate_virt_env} - export PYTHONPATH=${WORKING_DIR}/model_modules:$PYTHONPATH >> ${activate_virt_env} - export PYTHONPATH=${WORKING_DIR}/postprocess:$PYTHONPATH >> ${activate_virt_env} - - if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *jwlogin* ]]; then - export PYTHONPATH=${ENV_DIR}/lib/python3.6/site-packages:$PYTHONPATH >> ${activate_virt_env} - if [[ "${bool_container}" > 0 ]]; then - export PYTONPATH=/usr/locali/lib/python3.6/dist-packages:$PYTHONPATH - fi - fi - # ...and ensure that this also done when the - echo "" >> ${activate_virt_env} - echo "# Expand PYTHONPATH..." >> ${activate_virt_env} - echo "export PYTHONPATH=${WORKING_DIR}:\$PYTHONPATH" >> ${activate_virt_env} - echo "export PYTHONPATH=${WORKING_DIR}/utils/:\$PYTHONPATH" >> ${activate_virt_env} - echo "export PYTHONPATH=${WORKING_DIR}/model_modules:$PYTHONPATH " >> ${activate_virt_env} - echo "export PYTHONPATH=${WORKING_DIR}/external_package/lpips-tensorflow:\$PYTHONPATH" >> ${activate_virt_env} - echo "export PYTHONPATH=${WORKING_DIR}/postprocess:$PYTHONPATH" >> ${activate_virt_env} - - if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *juwels* ]]; then - echo "export PYTHONPATH=${ENV_DIR}/lib/python3.6/site-packages:\$PYTHONPATH" >> ${activate_virt_env} - if [[ "${bool_container}" > 0 ]]; then - echo "export PYTONPATH=/usr/locali/lib/python3.6/dist-packages:\$PYTHONPATH" >> ${activate_virt_env} - fi - fi info_str="Virtual environment ${ENV_DIR} has been set up successfully." elif [[ "$ENV_EXIST" == 1 ]]; then - # loading modules of postprocessing and activating virtual env are suifficient - if [[ "${bool_container}" == 0 ]]; then - source ${ENV_SETUP_DIR}/modules_postprocess.sh - fi - source ${ENV_DIR}/bin/activate - info_str="Virtual environment ${ENV_DIR} has been activated successfully." + info_str="Virtual environment ${ENV_DIR} already exists." fi +## load modules (for running runscript-generator... +echo "${info_str}" +echo "Load modules to enable running of runscript generator '${ENV_DIR}'." +source ${THIS_DIR}/modules_preprocess+extract.sh + +## ... and prepare runscripts echo "Set up runscript template for user ${USER}..." -if [[ -z "${base_dir}" ]]; then - shift - source "${WORKING_DIR}"/utils/runscript_generator/setup_runscript_templates.sh +if [[ -z "${base_outdir}" ]]; then + "${WORKING_DIR}"/utils/runscript_generator/setup_runscript_templates.sh else - source "${WORKING_DIR}"/utils/runscript_generator/setup_runscript_templates.sh ${base_dir} + "${WORKING_DIR}"/utils/runscript_generator/setup_runscript_templates.sh "${base_outdir}" fi echo "******************************************** NOTE ********************************************" -echo "${info_str}" echo "Make use of generate_runscript.py to generate customized runscripts of the workflow steps." echo "******************************************** NOTE ********************************************" - -# finally clean up loaded modules (if we are not on Juwels) -#if [[ "${HOST_NAME}" == *hdfml* || "${HOST_NAME}" == *juwels* ]] && [[ "${HOST_NAME}" != jwlogin2[1-4]* ]]; then -# module --force purge -#fi diff --git a/video_prediction_tools/env_setup/generate_runscript.py b/video_prediction_tools/env_setup/generate_runscript.py index 4da026b55af0753dcdcf667ec737bf7eaa98cbc0..68a30ec61ee035bc04e22e4d776077af99dfbb8c 100755 --- a/video_prediction_tools/env_setup/generate_runscript.py +++ b/video_prediction_tools/env_setup/generate_runscript.py @@ -17,10 +17,13 @@ import sys, os import socket if sys.version_info[0] < 3: raise Exception("This script has to be run with Python 3!") +# append path to get runscript-generator scripts sys.path.append(os.path.dirname(sys.path[0])) +workdir = os.path.dirname(os.getcwd()) +sys.path.append(os.path.join(workdir, "utils")) +import argparse + from runscript_generator.config_utils import check_virtualenv -# sanity check (is Python running in a virtual environment) -_ = check_virtualenv(labort=True) from runscript_generator.config_utils import Config_runscript_base from runscript_generator.config_extraction import Config_Extraction @@ -55,7 +58,14 @@ def get_runscript_cls(target_runscript_name, venv_name, lhpc): # def main(): - venv_name = check_virtualenv(labort=True) + parser = argparse.ArgumentParser() + parser.add_argument("--venv_path", "-venv", dest="venv_name", type=str, required=True, + help="Name of virtual environment to be used (created with create_env.sh).") + + + args = parser.parse_args() + venv_path = os.path.join(os.path.dirname(os.getcwd()), "virtual_envs", args.venv_name) + venv_name = check_virtualenv(lactive=False, venv_path=venv_path, labort=True) # check if we are on a known HPC lhpc = False diff --git a/video_prediction_tools/env_setup/install_venv_container.sh b/video_prediction_tools/env_setup/install_venv_container.sh new file mode 100755 index 0000000000000000000000000000000000000000..f0d53cdb495d5df6dffd30b79a19f53e1a0b2e98 --- /dev/null +++ b/video_prediction_tools/env_setup/install_venv_container.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# +# __authors__ = Bing Gong, Michael Langguth +# __date__ = '2021_10_28' +# __last_update__ = '2022_01_26' by Michael Langguth +# +# **************** Description **************** +# This auxiliary script sets up the virtual environment within a singularity container. +# **************** Description **************** + +# set some basic variables +BASE_DIR="$(pwd)" +VENV_DIR=$1 +VENV_NAME="$(basename "${VENV_DIR}")" +VENV_BASE="$(dirname "${VENV_DIR}")" +WORKING_DIR="$(dirname "${VENV_BASE}")" +VENV_REQ=${BASE_DIR}/requirements.txt + +# sanity checks +# check if we are running in a container +if [ -z "${SINGULARITY_NAME}" ]; then + echo "ERROR: install_venv_container.sh must be called within a running singularity container." + return +fi + +# check if directory to virtual environment is parsed +if [ -z "$1" ]; then + echo "ERROR: Provide a name to set up the virtual environment." + return +fi + +# check if virtual environment is not already existing +if [ -d "$1" ]; then + echo "ERROR: Target directory of virtual environment ${1} already exists. Chosse another directory path." + return +fi + +# check for requirement-file +if [ ! -f "${VENV_REQ}" ]; then + echo "ERROR: Cannot find requirement-file '${VENV_REQ}' to set up virtual environment." + return +fi + +# create or change to base directory for virtual environment (i.e. where the virtualenv-module is placed) +if ! [[ -d "${VENV_BASE}" ]]; then + mkdir "${VENV_BASE}" + # Install virtualenv in this directory + echo "Installing virtualenv under ${VENV_BASE}..." + pip install --target="${VENV_BASE}/" virtualenv + # Change into the base-directory of virtual environments... + cd "${VENV_BASE}" || return +else + # Change into the base-directory of virtual environments... + cd "${VENV_BASE}" || return + if ! python -m virtualenv --version >/dev/null; then + echo "ERROR: Base directory for virtual environment exists, but virtualenv-module is unavailable." + exit + fi + echo "Virtualenv is already installed." +fi + + +# Set-up virtual environment in base directory for virtual environments +python -m virtualenv -p /usr/bin/python "${VENV_NAME}" +# Activate virtual environment and install required packages +echo "Actiavting virtual environment ${VENV_NAME} to install required Python modules..." +ACT_VENV="${VENV_DIR}/bin/activate" +source "${VENV_DIR}/bin/activate" +# set PYTHONPATH... +export PYTHONPATH="/usr/local/lib/python3.8/dist-packages/" +export PYTHONPATH=${WORKING_DIR}:$PYTHONPATH +export PYTHONPATH=${WORKING_DIR}/utils:$PYTHONPATH +export PYTHONPATH=${WORKING_DIR}/model_modules:$PYTHONPATH +export PYTHONPATH=${WORKING_DIR}/postprocess:$PYTHONPATH +# ... also ensure that PYTHONPATH is appended when activating the virtual environment... +echo 'export PYTHONPATH="/usr/local/lib/python3.8/dist-packages/"' >> "${ACT_VENV}" +echo 'export PYTHONPATH='${WORKING_DIR}':$PYTHONPATH' >> ${ACT_VENV} +echo 'export PYTHONPATH='${WORKING_DIR}'/utils:$PYTHONPATH' >> ${ACT_VENV} +echo 'export PYTHONPATH='${WORKING_DIR}'/model_modules:$PYTHONPATH' >> ${ACT_VENV} +echo 'export PYTHONPATH='${WORKING_DIR}'/postprocess:$PYTHONPATH' >> ${ACT_VENV} +# ... install requirements +pip install --no-cache-dir -r "${VENV_REQ}" + +# get back to basic directory +cd "${BASE_DIR}" || exit + + + diff --git a/video_prediction_tools/env_setup/modules_data_extraction.sh b/video_prediction_tools/env_setup/modules_data_extraction.sh deleted file mode 100755 index 41a6e8af0ccf4853aa38d0e51716408d69b5216d..0000000000000000000000000000000000000000 --- a/video_prediction_tools/env_setup/modules_data_extraction.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env bash - -# __author__ = Bing Gong, Michael Langguth -# __date__ = '2020_06_26' - -# This script loads the required modules for ambs on Juwels and HDF-ML. -# Note that some other packages have to be installed into a venv (see create_env.sh and requirements.txt). - -HOST_NAME=`hostname` - -echo "Start loading modules on ${HOST_NAME} required for era5 data extraction..." -echo "modules_data_etraction.sh is subject to: " -echo "* data_extraction_era5_<exp_id>.sh" - -module purge -# serialized version is not available on HFML -# see https://gitlab.version.fz-juelich.de/haf/Wiki/-/wikis/HDF-ML%20System -if [[ "${HOST_NAME}" == hdfml* ]]; then - module use $OTHERSTAGES - ml Stages/2019a - ml GCC/8.3.0 - ml ParaStationMPI/5.2.2-1 - ml CDO/1.9.6 - ml mpi4py/3.0.1-Python-3.6.8 - ml SciPy-Stack/2019a-Python-3.6.8 - ml scikit/2019a-Python-3.6.8 - ml netcdf4-python/1.5.0.1-Python-3.6.8 - -else - module load Stages/2020 - ml GCC/9.3.0 - ml ParaStationMPI/5.4.7-1 - ml CDO/1.9.8 - ml mpi4py/3.0.3-Python-3.8.5 - echo "I am here" - ml SciPy-Stack/2020-Python-3.8.5 - ml scikit/2020-Python-3.8.5 -fi -#ml SciPy-Stack/2019a-Python-3.6.8 -#ml scikit/2019a-Python-3.6.8 -#ml netcdf4-python/1.5.0.1-Python-3.6.8 - -# clean up if triggered via script argument -if [[ $1 == purge ]]; then - echo "Purge all modules after loading them..." - module --force purge -fi diff --git a/video_prediction_tools/env_setup/modules_preprocess.sh b/video_prediction_tools/env_setup/modules_preprocess+extract.sh similarity index 97% rename from video_prediction_tools/env_setup/modules_preprocess.sh rename to video_prediction_tools/env_setup/modules_preprocess+extract.sh index 6bc4f4d09c13d1bd73db2d38ff2c263e72a1f8a8..c867554716e49f9fbe5c66275a158fefd505f927 100755 --- a/video_prediction_tools/env_setup/modules_preprocess.sh +++ b/video_prediction_tools/env_setup/modules_preprocess+extract.sh @@ -10,7 +10,6 @@ HOST_NAME=`hostname` echo "Start loading modules on ${HOST_NAME} required for preprocessing..." echo "modules_preprocess.sh is subject to: " -echo "* data_extraction_era5.sh" echo "* preprocess_data_era5_step1.sh" module purge diff --git a/video_prediction_tools/env_setup/requirements.txt b/video_prediction_tools/env_setup/requirements.txt old mode 100644 new mode 100755 index 24775be7e7b72788ccf9d98f88ec9f9885fea85b..9f433734a966541c6c6a20a6387a499716b2d80a --- a/video_prediction_tools/env_setup/requirements.txt +++ b/video_prediction_tools/env_setup/requirements.txt @@ -1,2 +1,7 @@ -opencv-python==4.2.0.34 -hickle +matplotlib==3.3.0 +mpi4py==3.0.1 +pandas==0.25.3 +xarray==0.16.0 +basemap==1.3.0 +scikit-image==0.18.1 +opencv-python-headless==4.2.0.34 diff --git a/video_prediction_tools/env_setup/requirements_booster.txt b/video_prediction_tools/env_setup/requirements_booster.txt deleted file mode 100755 index fa06454a4951c9dc46ca40ae5774880f5a11a49e..0000000000000000000000000000000000000000 --- a/video_prediction_tools/env_setup/requirements_booster.txt +++ /dev/null @@ -1,7 +0,0 @@ -opencv-python-headless==4.2.0.34 -netcdf4==1.5.0.1 -scipy -matplotlib==3.3.0 -scikit-image -pandas -hickle diff --git a/video_prediction_tools/env_setup/requirements_noHPC.txt b/video_prediction_tools/env_setup/requirements_noHPC.txt new file mode 100755 index 0000000000000000000000000000000000000000..44060174992ed2035c5c8ccd31eedbefce296d02 --- /dev/null +++ b/video_prediction_tools/env_setup/requirements_noHPC.txt @@ -0,0 +1,7 @@ +opencv-python==4.2.0.34 +hickle +mpi4py +netCDF4 +numpy +h5py +tensorflow-gpu==1.13.1 \ No newline at end of file diff --git a/video_prediction_tools/env_setup/wrapper_container.sh b/video_prediction_tools/env_setup/wrapper_container.sh new file mode 100755 index 0000000000000000000000000000000000000000..fea29a0a9018a5436122389164cfff0859f22552 --- /dev/null +++ b/video_prediction_tools/env_setup/wrapper_container.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +# basic directory variables +ENV_SETUP_DIR=`pwd` +WORKING_DIR="$(dirname "$ENV_SETUP_DIR")" +EXE_DIR="$(basename "$ENV_SETUP_DIR")" +VENV_DIR=$WORKING_DIR/virtual_envs/$1 +shift # replaces $1 by $2, so that $@ does not include the name of the virtual environment anymore + +# sanity checks +if [[ "${EXE_DIR}" != "HPC_scripts" ]]; then + echo "ERROR: Run the setup-script for the enviornment from the HPC_scripts-directory!" + exit +fi + +if ! [[ -d "${VENV_DIR}" ]]; then + echo "ERROR: Could not found virtual environment under ${VENV_DIR}!" + exit +fi + +#expand PYHTONPATH +# Include site-packages from virtual environment... +unset PYTHONPATH +export PYTHONPATH=${VENV_DIR}/lib/python3.8/site-packages/:$PYTHONPATH +# ... dist-packages from container singularity... +export PYTHONPATH=/usr/local/lib/python3.8/dist-packages:$PYTHONPATH +# ... and modules from this project +export PYTHONPATH=${WORKING_DIR}:$PYTHONPATH +export PYTHONPATH=${WORKING_DIR}/utils:$PYTHONPATH + +# Control +echo "****** Check PYTHONPATH *****" +echo $PYTHONPATH +# MPI related environmental variables +export PMIX_SECURITY_MODE="native" # default would include munge which is unavailable + +$@ diff --git a/video_prediction_tools/main_scripts/main_train_models.py b/video_prediction_tools/main_scripts/main_train_models.py index 4fe3e518b7c573fe2ed92ba647611d372e264fd6..7ccddc88c66128bcab07104a818a9ff73faa3316 100644 --- a/video_prediction_tools/main_scripts/main_train_models.py +++ b/video_prediction_tools/main_scripts/main_train_models.py @@ -355,13 +355,13 @@ class TrainModel(object): self.create_fetches_for_train() # In addition to the loss, we fetch the optimizer self.results = sess.run(self.fetches) # ...and run it here! # Note: For SAVP, the obtained loss is a list where the first element is of interest, for convLSTM, - # it's just a number. Thus, with list(<losses>)[0], we can handle both - train_losses.append(list(self.results[self.saver_loss])[0]) + # it's just a number. Thus, with ensure_list(<losses>)[0], we can handle both + train_losses.append(ensure_list(self.results[self.saver_loss])[0]) # run and fetch losses for validation data val_handle_eval = sess.run(self.val_handle) self.create_fetches_for_val() self.val_results = sess.run(self.val_fetches, feed_dict={self.train_handle: val_handle_eval}) - val_losses.append(list(self.val_results[self.saver_loss])[0]) + val_losses.append(ensure_list(self.val_results[self.saver_loss])[0]) self.write_to_summary() self.print_results(step, self.results) # track iteration time diff --git a/video_prediction_tools/model_modules/video_prediction/models/vanilla_convLSTM_model.py b/video_prediction_tools/model_modules/video_prediction/models/vanilla_convLSTM_model.py index a9e59e7584e563dd8b173337b6c91c01f9a6ffbe..50364b52d2dd13b48bb3087abcc15e147ee1cfd1 100644 --- a/video_prediction_tools/model_modules/video_prediction/models/vanilla_convLSTM_model.py +++ b/video_prediction_tools/model_modules/video_prediction/models/vanilla_convLSTM_model.py @@ -113,7 +113,7 @@ class VanillaConvLstmVideoPredictionModel(object): bce = tf.keras.losses.BinaryCrossentropy() self.total_loss = bce(x_flatten, x_hat_predict_frames_flatten) else: - raise ValueError("Loss function is not selected properly, you should chose either 'rmse' or 'cross_entropy'") + raise ValueError("Loss function is not selected properly, you should chose either 'mse' or 'cross_entropy'") #This is the loss for only all the channels(temperature, geo500, pressure) #self.total_loss = tf.reduce_mean( diff --git a/video_prediction_tools/postprocess/statistical_evaluation.py b/video_prediction_tools/postprocess/statistical_evaluation.py index 7469ee61284a25aa61d8f0ed0517403cd7534fc9..965165a4afc6967e0cadce4ffd93da3a44f14dc0 100644 --- a/video_prediction_tools/postprocess/statistical_evaluation.py +++ b/video_prediction_tools/postprocess/statistical_evaluation.py @@ -13,7 +13,7 @@ __date__ = "2021-05-xx" import numpy as np import xarray as xr from typing import Union, List -from skimage.measure import compare_ssim as ssim +from skimage.metrics import structural_similarity as ssim import datetime import pandas as pd try: diff --git a/video_prediction_tools/utils/general_utils.py b/video_prediction_tools/utils/general_utils.py index 64000ee95e1708ed6442947ce08f800e4e8c9526..d18c9d11000df9cb73b0d41ffde3f5ece518982c 100644 --- a/video_prediction_tools/utils/general_utils.py +++ b/video_prediction_tools/utils/general_utils.py @@ -7,6 +7,7 @@ Some auxilary routines which may are used throughout the project. Provides: * get_unique_vars * add_str_to_path * is_integer + * ensure_list * isw * check_str_in_list * check_dir @@ -72,6 +73,27 @@ def is_integer(n): return float(n).is_integer() +def ensure_list(x): + """ + Converts input generically to list-object + :param x: the input data (can be a list, a number/string or an array) + """ + method = ensure_list.__name__ + + if isinstance(x, list): + return x + elif isinstance(x, str): + return [x] + + try: + return list(x) + except TypeError: + try: + return [x] + except: + raise TypeError("%{0}: Failed to put input into list.".format(method)) + + def isw(value, interval): """ Checks if value lies within given interval diff --git a/video_prediction_tools/utils/runscript_generator/config_preprocess_step1.py b/video_prediction_tools/utils/runscript_generator/config_preprocess_step1.py index c0d66695d8199f86f488bafc4f87714e21a1d87a..195530bc1679af365c7761dd74bd0f3737316058 100755 --- a/video_prediction_tools/utils/runscript_generator/config_preprocess_step1.py +++ b/video_prediction_tools/utils/runscript_generator/config_preprocess_step1.py @@ -14,7 +14,7 @@ try: import xarray as xr except: raise ImportError("Loading preprocssing modules in advance is mandotory, " + - "i.e. execute 'source modules_preprocess.sh' from env_setup-directory in terminal first.") + "i.e. execute 'source modules_preprocess+extract.sh' from env_setup-directory in terminal first.") from netcdf_datahandling import NetcdfUtils from general_utils import check_str_in_list diff --git a/video_prediction_tools/utils/runscript_generator/config_training.py b/video_prediction_tools/utils/runscript_generator/config_training.py index a224148fd370023a90f87fa35f399af01c5ee230..b382e00a7b3662a4350fe10fe972289d28f1c14e 100755 --- a/video_prediction_tools/utils/runscript_generator/config_training.py +++ b/video_prediction_tools/utils/runscript_generator/config_training.py @@ -18,6 +18,7 @@ from model_modules.model_architectures import known_models from data_preprocess.dataset_options import known_datasets from runscript_generator.config_utils import Config_runscript_base # import parent class + class Config_Train(Config_runscript_base): cls_name = "Config_Train"#.__name__ @@ -36,13 +37,15 @@ class Config_Train(Config_runscript_base): self.long_name_wrk_step = "Training" self.rscrpt_tmpl_prefix = "train_model_" # initialize additional runscript-specific attributes to be set via keyboard interaction + self.dataset = None + self.runscript_template = None self.model = None + self.source_dir = None self.destination_dir = None self.datasplit_dict = None self.model_hparams = None # list of variables to be written to runscript - self.list_batch_vars = ["VIRT_ENV_NAME", "source_dir", "model", "destination_dir", "datasplit_dict", - "model_hparams"] + self.list_batch_vars = ["VIRT_ENV_NAME", "source_dir", "model", "destination_dir"] # copy over method for keyboard interaction self.run_config = Config_Train.run_training # @@ -63,7 +66,8 @@ class Config_Train(Config_runscript_base): dset_err, ntries=2) # get source dir (relative to base_dir_source!) - self.runscript_template = os.path.join(self.runscript_dir, "train_model_{0}{1}".format(self.dataset, self.suffix_template)) + self.runscript_template = os.path.join(self.runscript_dir, "train_model_{0}{1}" + .format(self.dataset, self.suffix_template)) source_dir_base = Config_Train.handle_source_dir(self, "preprocessedData") expdir_req_str = "Choose a subdirectory listed above where the preprocessed TFrecords are located:" @@ -89,7 +93,6 @@ class Config_Train(Config_runscript_base): prefix2arg=os.path.join(self.source_dir, Config_Train.basename_tfdirs)) - # split up directory path in order to retrieve exp_dir used for setting up the destination directory exp_dir_split = Config_Train.path_rec_split(self.source_dir) index = [idx for idx, s in enumerate(exp_dir_split) if self.dataset in s] diff --git a/video_prediction_tools/utils/runscript_generator/config_utils.py b/video_prediction_tools/utils/runscript_generator/config_utils.py index eb4d3119a89b8dcce8bd3afde00176e20843f6cf..e29ab9457615cbb6641aa27e0d5b9da2ac9bdc9f 100755 --- a/video_prediction_tools/utils/runscript_generator/config_utils.py +++ b/video_prediction_tools/utils/runscript_generator/config_utils.py @@ -117,7 +117,7 @@ class Config_runscript_base: else: batch_var_val = getattr(self, batch_var) if batch_var_val is None: - err= AttributeError("%{0}: Attribute '{1}' is still None.".format(method_name, batch_var)) + err = AttributeError("%{0}: Attribute '{1}' is still None.".format(method_name, batch_var)) if not err is None: raise err @@ -132,7 +132,8 @@ class Config_runscript_base: if stat_batch_var: stat = os.system(write_cmd) if stat > 0: - print("%{0}: Runscript script variable {1} could not be set properly.".format(method_name, batch_var)) + print("%{0}: Runscript script variable {1} could not be set properly." + .format(method_name, batch_var)) else: print("%{0}: Could not find variable {1} in runscript {2} could not be set.".format(method_name, batch_var, runscript)) # @@ -375,24 +376,29 @@ def in_virtualenv(): # #-------------------------------------------------------------------------------------------------------- # -def check_virtualenv(labort=False): +def check_virtualenv(lactive: bool = True, venv_path: str = "", labort=False): """ - New version! -> relies on "VIRTUAL_ENV" environmental variable which also works in conjunction with loaded modules Checks if current script is running a virtual environment and returns the directory's name - :param labort: If True, the an Exception is raised. If False, only a Warning is given + :param lactive: If True, virtual environment must be activated. If False, the existence is required only. + :param venv_path: Path to virtual environment (required if lactive is set to False) + :param labort: If True, an Exception is raised. If False, only a Warning is given :return: name of virtual environment """ + method = check_virtualenv.__name__ - method_name = check_virtualenv.__name__ - - lvirt = in_virtualenv() + if lactive: + lvirt = in_virtualenv() + err_mess = "%{0}: No virtual environment is running.".format(method) + venv_path = os.environ.get("VIRTUAL_ENV") + else: + lvirt = os.path.isfile(os.path.join(venv_path, "bin", "activate")) + err_mess = "%{0}: Virtual environment is not existing under '{1}'".format(method, venv_path) if not lvirt: if labort: - raise EnvironmentError("%{0}: generate_runscript.py has to run in an activated virtual environment!" - .format(method_name)) + raise EnvironmentError(err_mess) else: - print("%{0}: config_runscript.py is not running in an activated virtual environment!".format(method_name)) + raise Warning(err_mess) return else: - return os.path.basename(os.environ.get("VIRTUAL_ENV")) + return os.path.basename(venv_path) diff --git a/video_prediction_tools/utils/runscript_generator/configurations.py b/video_prediction_tools/utils/runscript_generator/configurations.py deleted file mode 100644 index c5663bdcb4acebcec36def048c66df38ff3c3d3b..0000000000000000000000000000000000000000 --- a/video_prediction_tools/utils/runscript_generator/configurations.py +++ /dev/null @@ -1,118 +0,0 @@ -# SPDX-FileCopyrightText: 2021 Earth System Data Exploration (ESDE), Jülich Supercomputing Center (JSC) -# -# SPDX-License-Identifier: MIT - -""" -Auxiliary functions that are used in config_runscript.py. -They are used for facilating the customized conversion of the preprocessing step 2- and training runscript-templates -to executable runscripts -""" - -# robust check if script is running in virtual env from -# https://stackoverflow.com/questions/1871549/determine-if-python-is-running-inside-virtualenv/38939054 -def get_base_prefix_compat(): - """Get base/real prefix, or sys.prefix if there is none.""" - return getattr(sys, "base_prefix", None) or getattr(sys, "real_prefix", None) or sys.prefix -# -#-------------------------------------------------------------------------------------------------------- -# -def path_rec_split(full_path): - """ - :param full_path: input path to be splitted in its components - :return: list of all splitted components - """ - rest, tail = os.path.split(full_path) - if rest in ('', os.path.sep): return tail, - - return path_rec_split(rest) + (tail,) -# -#-------------------------------------------------------------------------------------------------------- -# -def in_virtualenv(): - return get_base_prefix_compat() != sys.prefix -# -#-------------------------------------------------------------------------------------------------------- -# -def check_virtualenv(labort=False): - ''' - Checks if current script is running a virtual environment and returns the directory's name - :param labort: If True, the an Exception is raised. If False, only a Warning is given - :return: name of virtual environment - ''' - lvirt = in_virtualenv() - - if not lvirt: - if labort: - raise EnvironmentError("config_train.py has to run in an activated virtual environment!") - else: - raise Warning("config_train.py is not running in an activated virtual environment!") - return - else: - return os.path.basename(sys.prefix) -# -# -------------------------------------------------------------------------------------------------------- -# -def get_variable_from_runscript(runscript_file, script_variable): - ''' - Search for the declaration of variable in a Shell script and returns its value. - :param runscript_file: path to shell script/runscript - :param script_variable: name of variable which is declared in shell script at hand - :return: value of script_variable - ''' - script_variable = script_variable + "=" - found = False - - with open(runscript_file) as runscript: - # Skips text before the beginning of the interesting block: - for line in runscript: - if script_variable in line: - var_value = (line.strip(script_variable)).replace("\n", "") - found = True - break - - if not found: - raise Exception("Could not find declaration of '"+script_variable+"' in '"+runscript_file+"'.") - - return var_value -# -#-------------------------------------------------------------------------------------------------------- -# -def keyboard_interaction(console_str,check_input,err,ntries=1): - """ - Function to check if the user has passed a proper input via keyboard interaction - :param console_str: Request printed to the console - :param check_input: function returning boolean which needs to be passed by input from keyboard interaction. - Must have two arguments with the latter being an optional bool called silent. - :param ntries: maximum number of tries (default: 1) - :return: The approved input from keyboard interaction - """ - # sanity checks - if not callable(check_input): - raise ValueError("check_input must be a function!") - else: - try: - if not type(check_input("xxx",silent=True)) is bool: - raise TypeError("check_input argument does not return a boolean.") - else: - pass - except: - raise Exception("Cannot approve check_input-argument to be proper.") - if not isinstance(err,BaseException): - raise ValueError("err_str-argument must be an instance of BaseException!") - if not isinstance(ntries,int) and ntries <= 1: - raise ValueError("ntries-argument must be an integer greater equal 1!") - - attempt = 0 - while attempt < ntries: - input_req = input(console_str) - if check_input(input_req): - break - else: - attempt += 1 - if attempt < ntries: - print(err) - console_str = "Retry!\n" - else: - raise err - - return input_req \ No newline at end of file diff --git a/video_prediction_tools/utils/runscript_generator/setup_runscript_templates.sh b/video_prediction_tools/utils/runscript_generator/setup_runscript_templates.sh index ba2bf2b06095532c72be6cbc42975c292d2230cf..f47f6dcb390db9a14f09570e4e0d354aa7d50d9b 100755 --- a/video_prediction_tools/utils/runscript_generator/setup_runscript_templates.sh +++ b/video_prediction_tools/utils/runscript_generator/setup_runscript_templates.sh @@ -26,7 +26,7 @@ USER=$USER # check/handle input arguments if [[ "$#" -lt 1 ]]; then data_dir=${base_data_dir_default} - echo "No base directory passed. Thus, the default path ${base_data_dir_default} will be applied." + echo "No base data-directory passed. Thus, the default path ${base_data_dir_default} will be applied." echo "In order to set it pass the directory path as a first argument." echo "Example: ./setup_runscript_templates.sh /my/desired/path/" elif [[ "$#" -ge 2 ]]; then @@ -37,6 +37,7 @@ else base_data_dir="$(dirname "${data_dir}")" if [[ ! -d ${base_data_dir} ]]; then echo "ERROR: Top-level data directory ${base_data_dir} does not exist. Cannot create passed directory." + echo "DEBUG: Parsed data_dir was '${data_dir}'" exit 2 fi if [[ ! -d ${data_dir} ]]; then