update predrnn-v2

86c1f592 · Yan Ji · 20e72fad · 86c1f592 · 86c1f592 · 86c1f592
Commit 86c1f592 authored 2 years ago by Yan Ji
--- a/video_prediction_tools/HPC_scripts/train_predrnn_v2.sh
+++ b/video_prediction_tools/HPC_scripts/train_predrnn_v2.sh
@@ -6,23 +6,23 @@
 #SBATCH --cpus-per-task=1
 #SBATCH --output=../HPC_scripts/train_predrnn_v2-out.%j
 #SBATCH --error=../HPC_scripts/train_predrnn_v2-err.%j
-#SBATCH --time=01:00:00
+#SBATCH --time=00:30:00
 #SBATCH --gres=gpu:1
-#SBATCH --partition=develgpus
+#SBATCH --partition=batch
 #SBATCH --mail-type=ALL
 #SBATCH --mail-user=y.ji@fz-juelich.de
 ##jutil env activate -p cjjsc42

 # Name of virtual environment 
-VIRT_ENV_NAME=venv_container
+VIRT_ENV_NAME=venv_hdfml

 # Loading mouldes
-source ../env_setup/modules_train.sh
+source ../env_setup/modules.sh
 # Activate virtual environment if needed (and possible)
 if [ -z ${VIRTUAL_ENV} ]; then
-   if [[ -f ../${VIRT_ENV_NAME}/bin/activate ]]; then
+   if [[ -f ../virtual_envs/${VIRT_ENV_NAME}/bin/activate ]]; then
      echo "Activating virtual environment..."
-      source ../${VIRT_ENV_NAME}/bin/activate
+      source ../virtual_envs/${VIRT_ENV_NAME}/bin/activate
   else 
      echo "ERROR: Requested virtual environment ${VIRT_ENV_NAME} not found..."
      exit 1

--- a/video_prediction_tools/env_setup/create_env.sh
+++ b/video_prediction_tools/env_setup/create_env.sh
 #!/usr/bin/env bash
 #
-# __authors__ = Bing Gong, Michael Langguth
-# __date__  = '2020_01_15'
-# __last_update__  = '2021_04_28' by Michael Langguth
+# __authors__ = Michael Langguth
+# __date__  = '2022-01-21'
+# __update__= '2022-01-21'
 #
 # **************** Description ****************
-# This script can be used for setting up the virtual environment needed for AMBS-project
-# The name of the virtual environment is controlled by the first parsed argument.
-# It also setups the (Batch) runscript templates to customized runscripts (to be used by generate_runscript.py)
-# Note that the basic output directory for the workflow may be set may passing a path as second argument to this script.
+# This script can be used for setting up the virtual environment needed for downscaling with the U-net architecture
+# as being implemented for the Tier-1 dataset in MAELSTROM (see https://www.maelstrom-eurohpc.eu/content/docs/uploads/doc6.pdf) 
 # **************** Description ****************
 #
-# **************** Auxiliary functions ****************
+### auxiliary-function S ###
 check_argin() {
-# Handle input arguments and check if one is equal to -lcontainer
+# Handle input arguments and check if one is equal to -lcontainer (not needed currently)
 # Can also be used to check for non-positional arguments (such as -exp_id=*, see commented lines)
    for argin in "$@"; do
-        if [[ $argin == *"-base_dir="* ]]; then
-          base_dir=${argin#"-base_dir="}
-        fi
+        # if [[ $argin == *"-exp_id="* ]]; then
+        #  exp_id=${argin#"-exp_id="}
        if [[ $argin == *"-lcontainer"* ]]; then
 	        bool_container=1
        fi  
@@ -27,201 +24,116 @@ check_argin() {
        bool_container=0
    fi
 }
-# **************** Auxiliary functions ****************
+### auxiliary-function E ###
+
+### MAIN S ###
+#set -eu              # enforce abortion if a command is not re
+
+SCR_SETUP="%create_env.sh: "

-# **************** Actual script ****************
-# some first sanity checks
-if [[ ${BASH_SOURCE[0]} == ${0} ]]; then
-  echo "ERROR: 'create_env.sh' must be sourced, i.e. execute by prompting 'source create_env.sh [virt_env_name]'"
+## some first sanity checks
+# script is sourced?
+if [[ ${BASH_SOURCE[0]} == "${0}" ]]; then
+  echo "${SCR_SETUP}ERROR: 'create_env.sh' must be sourced, i.e. execute by prompting 'source create_env.sh [virt_env_name]'"
  exit 1
 fi

+
 # from now on, just return if something unexpected occurs instead of exiting
 # as the latter would close the terminal including logging out
-if [[ ! -n "$1" ]]; then
-  echo "ERROR: Provide a name to set up the virtual environment, i.e. execute by prompting 'source create_env.sh [virt_env_name]"
+if [[ -z "$1" ]]; then
+  echo "${SCR_SETUP}ERROR: Provide a name to set up the virtual environment, i.e. execute by prompting 'source create_env.sh [virt_env_name]"
  return
 fi

-if [[ "$#" -gt 1 ]]; then
-  check_argin ${@:2}                 # sets base_dir if provided, always sets l_container
-fi
-
 # set some variables
-HOST_NAME=`hostname`
+HOST_NAME=$(hostname)
 ENV_NAME=$1
-ENV_SETUP_DIR=`pwd`
-WORKING_DIR="$(dirname "$ENV_SETUP_DIR")"
-EXE_DIR="$(basename "$ENV_SETUP_DIR")"
-ENV_DIR=${WORKING_DIR}/${ENV_NAME}
+SETUP_DIR=$(pwd)
+SETUP_DIR_NAME="$(basename "${SETUP_DIR}")"
+BASE_DIR="$(dirname "${SETUP_DIR}")"
+VENV_DIR="${BASE_DIR}/virtual_envs/${ENV_NAME}"

 ## perform sanity checks
-# correct bool_container if host is Juwels Booster and ensure running singularity
-if [[ "${bool_container}" == 0 ]] && [[ "${HOST_NAME}" == jwlogin2[1-4]* ]]; then
-  echo "******************************************** NOTE ********************************************"
-  echo "          Running on Juwels Booster is only possible inside a container environment.          "
-  echo "******************************************** NOTE ********************************************"
-  bool_container=1
-fi
-
-if [[ "${bool_container}" == 1 ]]; then
-  echo "******************************************** NOTE ********************************************"
-  echo "                Make use of dedicated Horovod-related working branches only!!!                "
-  echo "******************************************** NOTE ********************************************"
-  # Check if singularity is running
-  if [[ -z "${SINGULARITY_NAME}" ]]; then
-    echo "ERROR: create_env.sh must be executed in a running singularity on Juwels in conjuction with container-usage."
-    echo "Thus, execute 'singularity shell [my_docker_image]' first!"
-    return
-  fi
-fi
-
-# further sanity checks:
-# * ensure execution from env_setup-directory
+# * check if script is called from env_setup-directory
 # * check if virtual env has already been set up

-if [[ "${EXE_DIR}" != "env_setup"  ]]; then
-  echo "ERROR: Execute 'create_env.sh' from the env_setup-subdirectory only!"
+# script is called from env_setup-directory?
+if [[ "${SETUP_DIR_NAME}" != "env_setup"  ]]; then
+  echo "${SCR_SETUP}ERROR: Execute 'create_env.sh' from the env_setup-subdirectory only!"
+  echo ${SETUP_DIR_NAME}
  return
 fi

-if [[ -d ${ENV_DIR} ]]; then
-  echo "Virtual environment has already been set up under ${ENV_DIR}. The present virtual environment will be activated now."
+# virtual environment already set-up?
+if [[ -d ${VENV_DIR} ]]; then
+  echo "${SCR_SETUP}Virtual environment has already been set up under ${VENV_DIR} and is ready to use."
  echo "NOTE: If you wish to set up a new virtual environment, delete the existing one or provide a different name."
  ENV_EXIST=1
 else
  ENV_EXIST=0
 fi

-## check integratability of modules
+## check integratability of operating system
 if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *jwlogin* ]]; then
-  if [[ "${bool_container}" > 0 ]]; then
-    # on Juwels Booster, we are in a container environment -> loading modules is not possible
-    echo "***** Note for container environment! *****"
-    echo "Already checked the required modules?"
-    echo "To do so, run 'source modules_train.sh' after exiting the singularity."
-    echo "***** Note for container environment! *****"
-  else
-    # load modules and check for their availability
-    echo "***** Checking modules required during the workflow... *****"
-    source ${ENV_SETUP_DIR}/modules_preprocess.sh purge
-    source ${ENV_SETUP_DIR}/modules_train.sh purge
-    source ${ENV_SETUP_DIR}/modules_postprocess.sh
-  fi
+  # unset PYTHONPATH to ensure that system-realted paths are not set (container-environment should be used only)
+  unset PYTHONPATH
 else
-  echo "ERROR: AMBS-workflow is currently only supported on the Juelich HPC-systems HDF-ML, Juwels and Juwels Booster"
+  echo "${SCR_SETUP}ERROR: Model only runs on HDF-ML and Juwels (Booster) so far."
  return
-  # unset PYTHONPATH on every other machine that is not a known HPC-system
-  # unset PYTHONPATH
-fi
-
-
-# Create fresh virtual environment or just activate the existing one
-if [[ "$ENV_EXIST" == 0 ]]; then
-  # Check modules first
-  if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *juwels* ]]; then
-    if [[ "${HOST_NAME}" == jwlogin2[1-4]* ]]; then  
-      # on Juwels Booster, we are in a container environment -> loading modules is not possible	  
-      echo "***** Note for Juwels Booster! *****"
-      echo "Already checked the required modules?"
-      echo "To do so, run 'source modules_train.sh' after exiting the singularity."
-      echo "***** Note for Juwels Booster! *****"
-    else
-      # load modules and check for their availability
-      echo "***** Checking modules required during the workflow... *****"
-      source ${ENV_SETUP_DIR}/modules_preprocess.sh purge
-      source ${ENV_SETUP_DIR}/modules_train.sh purge
-      source ${ENV_SETUP_DIR}/modules_postprocess.sh
-    fi
-  else 
-    # unset PYTHONPATH on every other machine that is not a known HPC-system	
-    unset PYTHONPATH
-  fi
 fi

 ## set up virtual environment
 if [[ "$ENV_EXIST" == 0 ]]; then
-  # Activate virtual environment and install additional Python packages.
-  echo "Configuring and activating virtual environment on ${HOST_NAME}"
+  # Install virtualenv-package and set-up virtual environment with required additional Python packages.
+  echo "${SCR_SETUP}Configuring and activating virtual environment on ${HOST_NAME}"

-  python3 -m venv $ENV_DIR
+  source modules.sh

-  activate_virt_env=${ENV_DIR}/bin/activate
+  python3 -m venv --system-site-packages "${VENV_DIR}"

-  echo "Entering virtual environment ${ENV_DIR} to install required Python modules..."
-  source ${activate_virt_env}
+  activate_virt_env=${VENV_DIR}/bin/activate

-  # install some requirements and/or check for modules
-  if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *jwlogin* ]]; then
-    # Install packages depending on host
-    echo "***** Start installing additional Python modules with pip... *****"
-    req_file=${ENV_SETUP_DIR}/requirements.txt 
-    if [[ "${bool_container}" > 0 ]]; then req_file=${ENV_SETUP_DIR}/requirements_container.txt; fi
+  echo "${SCR_SETUP}Entering virtual environment ${VENV_DIR} to install required Python modules..."
+  source "${activate_virt_env}"
 
-    pip3 install --no-cache-dir -r ${req_file}
-  else
-    echo "***** Start installing additional Python modules with pip... *****"
-    pip3 install --upgrade pip
-    pip3 install -r ${ENV_SETUP_DIR}/requirements.txt
-    pip3 install  mpi4py 
-    pip3 install netCDF4
-    pip3 install  numpy
-    pip3 install h5py
-    pip3 install tensorflow-gpu==1.13.1
+  # handle systematic issues with Stages/2022 
+  MACHINE=$(hostname -f | cut -d. -f2)
+  if [[ "${HOST}" == jwlogin2[2-4] ]]; then
+     MACHINE="juwelsbooster"
  fi
+  PY_VERSION=$(python --version 2>&1 | cut -d ' ' -f2 | cut -d. -f1-2)

-  # expand PYTHONPATH...
-  export PYTHONPATH=${WORKING_DIR}:$PYTHONPATH >> ${activate_virt_env}
-  export PYTHONPATH=${WORKING_DIR}/utils:$PYTHONPATH >> ${activate_virt_env}
-  export PYTHONPATH=${WORKING_DIR}/external_package/lpips-tensorflow:$PYTHONPATH >> ${activate_virt_env}
-  export PYTHONPATH=${WORKING_DIR}/model_modules:$PYTHONPATH >> ${activate_virt_env}
-  export PYTHONPATH=${WORKING_DIR}/postprocess:$PYTHONPATH >> ${activate_virt_env}
+  echo "${SCR_SETUP}Appending PYTHONPATH on ${MACHINE} for Python version ${PY_VERSION} to ensure proper set-up..."
+
+  req_file=${SETUP_DIR}/requirements.txt
+
+  # Without the environmental variables set above, we need to install wheel and explictly set the target directory
+  pip3 install --no-cache-dir -r "${req_file}"
+
+  # expand PYTHONPATH
+  export PYTHONPATH=${BASE_DIR}:$PYTHONPATH >> ${activate_virt_env} 
+  export PYTHONPATH=${BASE_DIR}/utils:$PYTHONPATH >> ${activate_virt_env}
+  export PYTHONPATH=${BASE_DIR}/handle_data:$PYTHONPATH >> ${activate_virt_env}
+  export PYTHONPATH=${BASE_DIR}/models:$PYTHONPATH >> ${activate_virt_env}
+  export PYTHONPATH=${BASE_DIR}/postprocess:$PYTHONPATH >> ${activate_virt_env}
+  export PYTHONPATH=${BASE_DIR}/preprocess:$PYTHONPATH >> ${activate_virt_env}

-  if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *jwlogin* ]]; then
-     export PYTHONPATH=${ENV_DIR}/lib/python3.6/site-packages:$PYTHONPATH >> ${activate_virt_env}
-     if [[ "${bool_container}" > 0 ]]; then
-       export PYTONPATH=/usr/locali/lib/python3.6/dist-packages:$PYTHONPATH
-     fi
-  fi
  # ...and ensure that this also done when the
  echo "" >> ${activate_virt_env}
  echo "# Expand PYTHONPATH..." >> ${activate_virt_env}
-  echo "export PYTHONPATH=${WORKING_DIR}:\$PYTHONPATH" >> ${activate_virt_env}
-  echo "export PYTHONPATH=${WORKING_DIR}/utils/:\$PYTHONPATH" >> ${activate_virt_env}
-  echo "export PYTHONPATH=${WORKING_DIR}/model_modules:$PYTHONPATH " >> ${activate_virt_env}
-  echo "export PYTHONPATH=${WORKING_DIR}/external_package/lpips-tensorflow:\$PYTHONPATH" >> ${activate_virt_env}
-  echo "export PYTHONPATH=${WORKING_DIR}/postprocess:$PYTHONPATH" >> ${activate_virt_env}
-
-  if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *juwels* ]]; then
-    echo "export PYTHONPATH=${ENV_DIR}/lib/python3.6/site-packages:\$PYTHONPATH" >> ${activate_virt_env}
-    if [[ "${bool_container}" > 0 ]]; then
-       echo "export PYTONPATH=/usr/locali/lib/python3.6/dist-packages:\$PYTHONPATH" >> ${activate_virt_env}
-     fi
-  fi
-  info_str="Virtual environment ${ENV_DIR} has been set up successfully."
+  echo "export PYTHONPATH=${BASE_DIR}:\$PYTHONPATH" >> ${activate_virt_env}
+  echo "export PYTHONPATH=${BASE_DIR}/utils/:\$PYTHONPATH" >> ${activate_virt_env}
+  echo "export PYTHONPATH=${BASE_DIR}/models:\$PYTHONPATH " >> ${activate_virt_env}
+  echo "export PYTHONPATH=${BASE_DIR}/handle_data:\$PYTHONPATH" >> ${activate_virt_env}
+  echo "export PYTHONPATH=${BASE_DIR}/postprocess:\$PYTHONPATH" >> ${activate_virt_env}
+  echo "export PYTHONPATH=${BASE_DIR}/preprocess:\$PYTHONPATH" >> ${activate_virt_env}
+
+  info_str="Virtual environment ${VENV_DIR} has been set up successfully."
 elif [[ "$ENV_EXIST" == 1 ]]; then
-  # loading modules of postprocessing and activating virtual env are suifficient
-  if [[ "${bool_container}" == 0 ]]; then
-    source ${ENV_SETUP_DIR}/modules_postprocess.sh
+  # simply activate virtual environment
+  info_str="Virtual environment ${VENV_DIR} has already been set up before. Nothing to be done."
 fi
-  source ${ENV_DIR}/bin/activate
-  info_str="Virtual environment ${ENV_DIR} has been activated successfully."
-fi
-
-echo "Set up runscript template for user ${USER}..."
-if [[ -z "${base_dir}" ]]; then
-  shift
-  source "${WORKING_DIR}"/utils/runscript_generator/setup_runscript_templates.sh
-else
-  source "${WORKING_DIR}"/utils/runscript_generator/setup_runscript_templates.sh ${base_dir}
-fi
-
-echo "******************************************** NOTE ********************************************"
-echo "${info_str}"
-echo "Make use of generate_runscript.py to generate customized runscripts of the workflow steps."
-echo "******************************************** NOTE ********************************************"

-# finally clean up loaded modules (if we are not on Juwels)
-#if [[ "${HOST_NAME}" == *hdfml* || "${HOST_NAME}" == *juwels* ]] && [[ "${HOST_NAME}" != jwlogin2[1-4]* ]]; then
-#  module --force purge
-#fi
+echo "${SCR_SETUP}${info_str}"
+### MAIN E ###
--- a/video_prediction_tools/env_setup/requirements.txt
+++ b/video_prediction_tools/env_setup/requirements.txt
-matplotlib==3.3.0
-mpi4py==3.0.1
-pandas==0.25.3
-xarray==0.16.0
-basemap==1.3.0
-numpy==1.17.3     # although this numpy-version is in the container, we set it here to avoid any further installation
-scikit-image==0.17.2
-opencv-python-headless==4.2.0.34
-netcdf4==1.5.8
-#metadata==0.2
-normalization==0.4
-utils==1.0.1
-
+decorator==5.0.9
+networkx==2.6.3
+Werkzeug==2.0.1
+numpy==1.21.3
+xarray==0.20.1
+climetlab==0.8.14
+climetlab-maelstrom-downscaling==0.1.0
+wandb
+meteva
+opencv-contrib-python
+opencv-python
+wradlib
+pysteps
+timm
--- a/video_prediction_tools/env_setup/wrapper_container.sh
+++ b/video_prediction_tools/env_setup/wrapper_container.sh
@@ -7,31 +7,16 @@ EXE_DIR="$(basename "$ENV_SETUP_DIR")"
 VENV_DIR=$WORKING_DIR/virtual_envs/$1
 shift                     # replaces $1 by $2, so that $@ does not include the name of the virtual environment anymore

-# sanity checks
-if [[ "${EXE_DIR}" = "HPC_scripts"   ]] || [[ "${EXE_DIR}" = "no_HPC_scripts" ]];
-then
-  echo "The runscript is running under the folder ${EXE_DIR}"
-else
-  echo "ERROR: Run the setup-script for the enviornment from the (no_)HPC_scripts-directory!"
-  exit
-fi
-
 if ! [[ -d "${VENV_DIR}" ]]; then
   echo "ERROR: Could not found virtual environment under ${VENV_DIR}!"
   exit
 fi

-#expand PYHTONPATH
-# Include site-packages from virtual environment...
+ml ml go-1.17.6/singularity-3.9.5
+
+# unset PYTHONPATH and activate virtual environment
 unset PYTHONPATH
-export PYTHONPATH=${VENV_DIR}/lib/python3.8/site-packages/:$PYTHONPATH
-# ... dist-packages from container singularity...
-export PYTHONPATH=/usr/local/lib/python3.8/dist-packages:$PYTHONPATH
-# ... and modules from this project
-export PYTHONPATH=${WORKING_DIR}:$PYTHONPATH
-export PYTHONPATH=${WORKING_DIR}/utils:$PYTHONPATH
-export PYTHONPATH=${WORKING_DIR}/model_modules:$PYTHONPATH
-export PYTHONPATH=${WORKING_DIR}/postprocess:$PYTHONPATH
+source ${VENV_DIR}/bin/activate

 # Control
 echo "****** Check PYTHONPATH *****"
@@ -40,3 +25,4 @@ echo $PYTHONPATH
 export PMIX_SECURITY_MODE="native"     # default would include munge which is unavailable

 $@
+
--- a/video_prediction_tools/model_modules/video_prediction/datasets/__init__.py
+++ b/video_prediction_tools/model_modules/video_prediction/datasets/__init__.py
-from .base_dataset import BaseVideoDataset
-from .base_dataset import VideoDataset, SequenceExampleVideoDataset, VarLenFeatureVideoDataset
+#from .base_dataset import BaseVideoDataset
+#from .base_dataset import VideoDataset, SequenceExampleVideoDataset, VarLenFeatureVideoDataset
 #from .google_robot_dataset import GoogleRobotVideoDataset
 #from .sv2p_dataset import SV2PVideoDataset
 #from .softmotion_dataset import SoftmotionVideoDataset
 #from .kth_dataset import KTHVideoDataset
 #from .ucf101_dataset import UCF101VideoDataset
 #from .cartgripper_dataset import CartgripperVideoDataset
-from .era5_dataset import ERA5Dataset
+#from .era5_dataset import ERA5Dataset
 #from .moving_mnist import MovingMnist
-from .gzprcp_data import GZprcp
+#from .gzprcp_data import GZprcp
 from data_preprocess.dataset_options import known_datasets
 #from .era5_dataset_v2_anomaly import ERA5Dataset_v2_anomaly


--- a/video_prediction_tools/model_modules/video_prediction/models/__init__.py
+++ b/video_prediction_tools/model_modules/video_prediction/models/__init__.py
-from .base_model import BaseVideoPredictionModel
-from .base_model import VideoPredictionModel
+#from .base_model import BaseVideoPredictionModel
+#from .base_model import VideoPredictionModel
 #from .non_trainable_model import NonTrainableVideoPredictionModel
 #from .non_trainable_model import GroundTruthVideoPredictionModel
 #from .non_trainable_model import RepeatVideoPredictionModel
-from .savp_model import SAVPVideoPredictionModel
-from .vanilla_vae_model import VanillaVAEVideoPredictionModel
-from .vanilla_convLSTM_model import VanillaConvLstmVideoPredictionModel
+#from .savp_model import SAVPVideoPredictionModel
+#from .vanilla_vae_model import VanillaVAEVideoPredictionModel
+#from .vanilla_convLSTM_model import VanillaConvLstmVideoPredictionModel
 #from .mcnet_model import McNetVideoPredictionModel
 #from .test_model import TestModelVideoPredictionModel
-from model_modules.model_architectures import known_models
-from .convLSTM_GAN_model import ConvLstmGANVideoPredictionModel
-from .vanilla_predrnnv2 import PredRNNv2VideoPredictionModel
+#from model_modules.model_architectures import known_models
+#from .convLSTM_GAN_model import ConvLstmGANVideoPredictionModel
+#from .vanilla_predrnnv2 import PredRNNv2VideoPredictionModel


 def get_model_class(model):