diff --git a/video_prediction_tools/HPC_scripts/train_predrnn_v2.sh b/video_prediction_tools/HPC_scripts/train_predrnn_v2.sh index 5644ed4add6654d997a35a57ddcf878670135405..8634737502470f69ef98946f3eed7265cd211108 100644 --- a/video_prediction_tools/HPC_scripts/train_predrnn_v2.sh +++ b/video_prediction_tools/HPC_scripts/train_predrnn_v2.sh @@ -6,23 +6,23 @@ #SBATCH --cpus-per-task=1 #SBATCH --output=../HPC_scripts/train_predrnn_v2-out.%j #SBATCH --error=../HPC_scripts/train_predrnn_v2-err.%j -#SBATCH --time=01:00:00 +#SBATCH --time=00:30:00 #SBATCH --gres=gpu:1 -#SBATCH --partition=develgpus +#SBATCH --partition=batch #SBATCH --mail-type=ALL #SBATCH --mail-user=y.ji@fz-juelich.de ##jutil env activate -p cjjsc42 # Name of virtual environment -VIRT_ENV_NAME=venv_container +VIRT_ENV_NAME=venv_hdfml # Loading mouldes -source ../env_setup/modules_train.sh +source ../env_setup/modules.sh # Activate virtual environment if needed (and possible) if [ -z ${VIRTUAL_ENV} ]; then - if [[ -f ../${VIRT_ENV_NAME}/bin/activate ]]; then + if [[ -f ../virtual_envs/${VIRT_ENV_NAME}/bin/activate ]]; then echo "Activating virtual environment..." - source ../${VIRT_ENV_NAME}/bin/activate + source ../virtual_envs/${VIRT_ENV_NAME}/bin/activate else echo "ERROR: Requested virtual environment ${VIRT_ENV_NAME} not found..." exit 1 diff --git a/video_prediction_tools/env_setup/create_env.sh b/video_prediction_tools/env_setup/create_env.sh index 750abfb9a8a95db6c6f115e9ad3c14c81320b109..e42ba13cb5e791a2c0ba82b9348739ff7cf64f94 100755 --- a/video_prediction_tools/env_setup/create_env.sh +++ b/video_prediction_tools/env_setup/create_env.sh @@ -1,227 +1,139 @@ #!/usr/bin/env bash # -# __authors__ = Bing Gong, Michael Langguth -# __date__ = '2020_01_15' -# __last_update__ = '2021_04_28' by Michael Langguth +# __authors__ = Michael Langguth +# __date__ = '2022-01-21' +# __update__= '2022-01-21' # # **************** Description **************** -# This script can be used for setting up the virtual environment needed for AMBS-project -# The name of the virtual environment is controlled by the first parsed argument. -# It also setups the (Batch) runscript templates to customized runscripts (to be used by generate_runscript.py) -# Note that the basic output directory for the workflow may be set may passing a path as second argument to this script. +# This script can be used for setting up the virtual environment needed for downscaling with the U-net architecture +# as being implemented for the Tier-1 dataset in MAELSTROM (see https://www.maelstrom-eurohpc.eu/content/docs/uploads/doc6.pdf) # **************** Description **************** # -# **************** Auxiliary functions **************** +### auxiliary-function S ### check_argin() { -# Handle input arguments and check if one is equal to -lcontainer +# Handle input arguments and check if one is equal to -lcontainer (not needed currently) # Can also be used to check for non-positional arguments (such as -exp_id=*, see commented lines) for argin in "$@"; do - if [[ $argin == *"-base_dir="* ]]; then - base_dir=${argin#"-base_dir="} - fi + # if [[ $argin == *"-exp_id="* ]]; then + # exp_id=${argin#"-exp_id="} if [[ $argin == *"-lcontainer"* ]]; then bool_container=1 - fi + fi done if [[ -z "${bool_container}" ]]; then bool_container=0 fi } -# **************** Auxiliary functions **************** +### auxiliary-function E ### -# **************** Actual script **************** -# some first sanity checks -if [[ ${BASH_SOURCE[0]} == ${0} ]]; then - echo "ERROR: 'create_env.sh' must be sourced, i.e. execute by prompting 'source create_env.sh [virt_env_name]'" +### MAIN S ### +#set -eu # enforce abortion if a command is not re + +SCR_SETUP="%create_env.sh: " + +## some first sanity checks +# script is sourced? +if [[ ${BASH_SOURCE[0]} == "${0}" ]]; then + echo "${SCR_SETUP}ERROR: 'create_env.sh' must be sourced, i.e. execute by prompting 'source create_env.sh [virt_env_name]'" exit 1 fi + # from now on, just return if something unexpected occurs instead of exiting # as the latter would close the terminal including logging out -if [[ ! -n "$1" ]]; then - echo "ERROR: Provide a name to set up the virtual environment, i.e. execute by prompting 'source create_env.sh [virt_env_name]" +if [[ -z "$1" ]]; then + echo "${SCR_SETUP}ERROR: Provide a name to set up the virtual environment, i.e. execute by prompting 'source create_env.sh [virt_env_name]" return fi -if [[ "$#" -gt 1 ]]; then - check_argin ${@:2} # sets base_dir if provided, always sets l_container -fi - # set some variables -HOST_NAME=`hostname` +HOST_NAME=$(hostname) ENV_NAME=$1 -ENV_SETUP_DIR=`pwd` -WORKING_DIR="$(dirname "$ENV_SETUP_DIR")" -EXE_DIR="$(basename "$ENV_SETUP_DIR")" -ENV_DIR=${WORKING_DIR}/${ENV_NAME} +SETUP_DIR=$(pwd) +SETUP_DIR_NAME="$(basename "${SETUP_DIR}")" +BASE_DIR="$(dirname "${SETUP_DIR}")" +VENV_DIR="${BASE_DIR}/virtual_envs/${ENV_NAME}" ## perform sanity checks -# correct bool_container if host is Juwels Booster and ensure running singularity -if [[ "${bool_container}" == 0 ]] && [[ "${HOST_NAME}" == jwlogin2[1-4]* ]]; then - echo "******************************************** NOTE ********************************************" - echo " Running on Juwels Booster is only possible inside a container environment. " - echo "******************************************** NOTE ********************************************" - bool_container=1 -fi - -if [[ "${bool_container}" == 1 ]]; then - echo "******************************************** NOTE ********************************************" - echo " Make use of dedicated Horovod-related working branches only!!! " - echo "******************************************** NOTE ********************************************" - # Check if singularity is running - if [[ -z "${SINGULARITY_NAME}" ]]; then - echo "ERROR: create_env.sh must be executed in a running singularity on Juwels in conjuction with container-usage." - echo "Thus, execute 'singularity shell [my_docker_image]' first!" - return - fi -fi - -# further sanity checks: -# * ensure execution from env_setup-directory +# * check if script is called from env_setup-directory # * check if virtual env has already been set up -if [[ "${EXE_DIR}" != "env_setup" ]]; then - echo "ERROR: Execute 'create_env.sh' from the env_setup-subdirectory only!" +# script is called from env_setup-directory? +if [[ "${SETUP_DIR_NAME}" != "env_setup" ]]; then + echo "${SCR_SETUP}ERROR: Execute 'create_env.sh' from the env_setup-subdirectory only!" + echo ${SETUP_DIR_NAME} return fi -if [[ -d ${ENV_DIR} ]]; then - echo "Virtual environment has already been set up under ${ENV_DIR}. The present virtual environment will be activated now." +# virtual environment already set-up? +if [[ -d ${VENV_DIR} ]]; then + echo "${SCR_SETUP}Virtual environment has already been set up under ${VENV_DIR} and is ready to use." echo "NOTE: If you wish to set up a new virtual environment, delete the existing one or provide a different name." ENV_EXIST=1 else ENV_EXIST=0 fi -## check integratability of modules +## check integratability of operating system if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *jwlogin* ]]; then - if [[ "${bool_container}" > 0 ]]; then - # on Juwels Booster, we are in a container environment -> loading modules is not possible - echo "***** Note for container environment! *****" - echo "Already checked the required modules?" - echo "To do so, run 'source modules_train.sh' after exiting the singularity." - echo "***** Note for container environment! *****" - else - # load modules and check for their availability - echo "***** Checking modules required during the workflow... *****" - source ${ENV_SETUP_DIR}/modules_preprocess.sh purge - source ${ENV_SETUP_DIR}/modules_train.sh purge - source ${ENV_SETUP_DIR}/modules_postprocess.sh - fi + # unset PYTHONPATH to ensure that system-realted paths are not set (container-environment should be used only) + unset PYTHONPATH else - echo "ERROR: AMBS-workflow is currently only supported on the Juelich HPC-systems HDF-ML, Juwels and Juwels Booster" + echo "${SCR_SETUP}ERROR: Model only runs on HDF-ML and Juwels (Booster) so far." return - # unset PYTHONPATH on every other machine that is not a known HPC-system - # unset PYTHONPATH -fi - - -# Create fresh virtual environment or just activate the existing one -if [[ "$ENV_EXIST" == 0 ]]; then - # Check modules first - if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *juwels* ]]; then - if [[ "${HOST_NAME}" == jwlogin2[1-4]* ]]; then - # on Juwels Booster, we are in a container environment -> loading modules is not possible - echo "***** Note for Juwels Booster! *****" - echo "Already checked the required modules?" - echo "To do so, run 'source modules_train.sh' after exiting the singularity." - echo "***** Note for Juwels Booster! *****" - else - # load modules and check for their availability - echo "***** Checking modules required during the workflow... *****" - source ${ENV_SETUP_DIR}/modules_preprocess.sh purge - source ${ENV_SETUP_DIR}/modules_train.sh purge - source ${ENV_SETUP_DIR}/modules_postprocess.sh - fi - else - # unset PYTHONPATH on every other machine that is not a known HPC-system - unset PYTHONPATH - fi fi ## set up virtual environment if [[ "$ENV_EXIST" == 0 ]]; then - # Activate virtual environment and install additional Python packages. - echo "Configuring and activating virtual environment on ${HOST_NAME}" - - python3 -m venv $ENV_DIR - - activate_virt_env=${ENV_DIR}/bin/activate - - echo "Entering virtual environment ${ENV_DIR} to install required Python modules..." - source ${activate_virt_env} - - # install some requirements and/or check for modules - if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *jwlogin* ]]; then - # Install packages depending on host - echo "***** Start installing additional Python modules with pip... *****" - req_file=${ENV_SETUP_DIR}/requirements.txt - if [[ "${bool_container}" > 0 ]]; then req_file=${ENV_SETUP_DIR}/requirements_container.txt; fi - - pip3 install --no-cache-dir -r ${req_file} - else - echo "***** Start installing additional Python modules with pip... *****" - pip3 install --upgrade pip - pip3 install -r ${ENV_SETUP_DIR}/requirements.txt - pip3 install mpi4py - pip3 install netCDF4 - pip3 install numpy - pip3 install h5py - pip3 install tensorflow-gpu==1.13.1 - fi + # Install virtualenv-package and set-up virtual environment with required additional Python packages. + echo "${SCR_SETUP}Configuring and activating virtual environment on ${HOST_NAME}" + + source modules.sh + + python3 -m venv --system-site-packages "${VENV_DIR}" - # expand PYTHONPATH... - export PYTHONPATH=${WORKING_DIR}:$PYTHONPATH >> ${activate_virt_env} - export PYTHONPATH=${WORKING_DIR}/utils:$PYTHONPATH >> ${activate_virt_env} - export PYTHONPATH=${WORKING_DIR}/external_package/lpips-tensorflow:$PYTHONPATH >> ${activate_virt_env} - export PYTHONPATH=${WORKING_DIR}/model_modules:$PYTHONPATH >> ${activate_virt_env} - export PYTHONPATH=${WORKING_DIR}/postprocess:$PYTHONPATH >> ${activate_virt_env} - - if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *jwlogin* ]]; then - export PYTHONPATH=${ENV_DIR}/lib/python3.6/site-packages:$PYTHONPATH >> ${activate_virt_env} - if [[ "${bool_container}" > 0 ]]; then - export PYTONPATH=/usr/locali/lib/python3.6/dist-packages:$PYTHONPATH - fi + activate_virt_env=${VENV_DIR}/bin/activate + + echo "${SCR_SETUP}Entering virtual environment ${VENV_DIR} to install required Python modules..." + source "${activate_virt_env}" + + # handle systematic issues with Stages/2022 + MACHINE=$(hostname -f | cut -d. -f2) + if [[ "${HOST}" == jwlogin2[2-4] ]]; then + MACHINE="juwelsbooster" fi - # ...and ensure that this also done when the + PY_VERSION=$(python --version 2>&1 | cut -d ' ' -f2 | cut -d. -f1-2) + + echo "${SCR_SETUP}Appending PYTHONPATH on ${MACHINE} for Python version ${PY_VERSION} to ensure proper set-up..." + + req_file=${SETUP_DIR}/requirements.txt + + # Without the environmental variables set above, we need to install wheel and explictly set the target directory + pip3 install --no-cache-dir -r "${req_file}" + + # expand PYTHONPATH + export PYTHONPATH=${BASE_DIR}:$PYTHONPATH >> ${activate_virt_env} + export PYTHONPATH=${BASE_DIR}/utils:$PYTHONPATH >> ${activate_virt_env} + export PYTHONPATH=${BASE_DIR}/handle_data:$PYTHONPATH >> ${activate_virt_env} + export PYTHONPATH=${BASE_DIR}/models:$PYTHONPATH >> ${activate_virt_env} + export PYTHONPATH=${BASE_DIR}/postprocess:$PYTHONPATH >> ${activate_virt_env} + export PYTHONPATH=${BASE_DIR}/preprocess:$PYTHONPATH >> ${activate_virt_env} + + # ...and ensure that this also done when the echo "" >> ${activate_virt_env} echo "# Expand PYTHONPATH..." >> ${activate_virt_env} - echo "export PYTHONPATH=${WORKING_DIR}:\$PYTHONPATH" >> ${activate_virt_env} - echo "export PYTHONPATH=${WORKING_DIR}/utils/:\$PYTHONPATH" >> ${activate_virt_env} - echo "export PYTHONPATH=${WORKING_DIR}/model_modules:$PYTHONPATH " >> ${activate_virt_env} - echo "export PYTHONPATH=${WORKING_DIR}/external_package/lpips-tensorflow:\$PYTHONPATH" >> ${activate_virt_env} - echo "export PYTHONPATH=${WORKING_DIR}/postprocess:$PYTHONPATH" >> ${activate_virt_env} - - if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *juwels* ]]; then - echo "export PYTHONPATH=${ENV_DIR}/lib/python3.6/site-packages:\$PYTHONPATH" >> ${activate_virt_env} - if [[ "${bool_container}" > 0 ]]; then - echo "export PYTONPATH=/usr/locali/lib/python3.6/dist-packages:\$PYTHONPATH" >> ${activate_virt_env} - fi - fi - info_str="Virtual environment ${ENV_DIR} has been set up successfully." + echo "export PYTHONPATH=${BASE_DIR}:\$PYTHONPATH" >> ${activate_virt_env} + echo "export PYTHONPATH=${BASE_DIR}/utils/:\$PYTHONPATH" >> ${activate_virt_env} + echo "export PYTHONPATH=${BASE_DIR}/models:\$PYTHONPATH " >> ${activate_virt_env} + echo "export PYTHONPATH=${BASE_DIR}/handle_data:\$PYTHONPATH" >> ${activate_virt_env} + echo "export PYTHONPATH=${BASE_DIR}/postprocess:\$PYTHONPATH" >> ${activate_virt_env} + echo "export PYTHONPATH=${BASE_DIR}/preprocess:\$PYTHONPATH" >> ${activate_virt_env} + + info_str="Virtual environment ${VENV_DIR} has been set up successfully." elif [[ "$ENV_EXIST" == 1 ]]; then - # loading modules of postprocessing and activating virtual env are suifficient - if [[ "${bool_container}" == 0 ]]; then - source ${ENV_SETUP_DIR}/modules_postprocess.sh - fi - source ${ENV_DIR}/bin/activate - info_str="Virtual environment ${ENV_DIR} has been activated successfully." + # simply activate virtual environment + info_str="Virtual environment ${VENV_DIR} has already been set up before. Nothing to be done." fi -echo "Set up runscript template for user ${USER}..." -if [[ -z "${base_dir}" ]]; then - shift - source "${WORKING_DIR}"/utils/runscript_generator/setup_runscript_templates.sh -else - source "${WORKING_DIR}"/utils/runscript_generator/setup_runscript_templates.sh ${base_dir} -fi - -echo "******************************************** NOTE ********************************************" -echo "${info_str}" -echo "Make use of generate_runscript.py to generate customized runscripts of the workflow steps." -echo "******************************************** NOTE ********************************************" - -# finally clean up loaded modules (if we are not on Juwels) -#if [[ "${HOST_NAME}" == *hdfml* || "${HOST_NAME}" == *juwels* ]] && [[ "${HOST_NAME}" != jwlogin2[1-4]* ]]; then -# module --force purge -#fi +echo "${SCR_SETUP}${info_str}" +### MAIN E ### diff --git a/video_prediction_tools/env_setup/requirements.txt b/video_prediction_tools/env_setup/requirements.txt index 9c188138ea805d0f05203938a910e6247d4dd8ac..da23f7b39f199049dcec7150075cf6faf10426e7 100755 --- a/video_prediction_tools/env_setup/requirements.txt +++ b/video_prediction_tools/env_setup/requirements.txt @@ -1,13 +1,14 @@ -matplotlib==3.3.0 -mpi4py==3.0.1 -pandas==0.25.3 -xarray==0.16.0 -basemap==1.3.0 -numpy==1.17.3 # although this numpy-version is in the container, we set it here to avoid any further installation -scikit-image==0.17.2 -opencv-python-headless==4.2.0.34 -netcdf4==1.5.8 -#metadata==0.2 -normalization==0.4 -utils==1.0.1 - +decorator==5.0.9 +networkx==2.6.3 +Werkzeug==2.0.1 +numpy==1.21.3 +xarray==0.20.1 +climetlab==0.8.14 +climetlab-maelstrom-downscaling==0.1.0 +wandb +meteva +opencv-contrib-python +opencv-python +wradlib +pysteps +timm diff --git a/video_prediction_tools/env_setup/wrapper_container.sh b/video_prediction_tools/env_setup/wrapper_container.sh old mode 100755 new mode 100644 index 97089e38a8dacc809bab5a3f9bbc62c9d5997690..69518820f97ce3a3a65859e79c80a5e1fdd753e1 --- a/video_prediction_tools/env_setup/wrapper_container.sh +++ b/video_prediction_tools/env_setup/wrapper_container.sh @@ -7,31 +7,16 @@ EXE_DIR="$(basename "$ENV_SETUP_DIR")" VENV_DIR=$WORKING_DIR/virtual_envs/$1 shift # replaces $1 by $2, so that $@ does not include the name of the virtual environment anymore -# sanity checks -if [[ "${EXE_DIR}" = "HPC_scripts" ]] || [[ "${EXE_DIR}" = "no_HPC_scripts" ]]; -then - echo "The runscript is running under the folder ${EXE_DIR}" -else - echo "ERROR: Run the setup-script for the enviornment from the (no_)HPC_scripts-directory!" - exit -fi - if ! [[ -d "${VENV_DIR}" ]]; then echo "ERROR: Could not found virtual environment under ${VENV_DIR}!" exit fi -#expand PYHTONPATH -# Include site-packages from virtual environment... +ml ml go-1.17.6/singularity-3.9.5 + +# unset PYTHONPATH and activate virtual environment unset PYTHONPATH -export PYTHONPATH=${VENV_DIR}/lib/python3.8/site-packages/:$PYTHONPATH -# ... dist-packages from container singularity... -export PYTHONPATH=/usr/local/lib/python3.8/dist-packages:$PYTHONPATH -# ... and modules from this project -export PYTHONPATH=${WORKING_DIR}:$PYTHONPATH -export PYTHONPATH=${WORKING_DIR}/utils:$PYTHONPATH -export PYTHONPATH=${WORKING_DIR}/model_modules:$PYTHONPATH -export PYTHONPATH=${WORKING_DIR}/postprocess:$PYTHONPATH +source ${VENV_DIR}/bin/activate # Control echo "****** Check PYTHONPATH *****" @@ -40,3 +25,4 @@ echo $PYTHONPATH export PMIX_SECURITY_MODE="native" # default would include munge which is unavailable $@ + diff --git a/video_prediction_tools/model_modules/video_prediction/datasets/__init__.py b/video_prediction_tools/model_modules/video_prediction/datasets/__init__.py index 1cf4a247a5c97d9751a03ababad67cefd17a76e4..ed32ddb3d4e8b7e934a230612419de9f889b7bc0 100644 --- a/video_prediction_tools/model_modules/video_prediction/datasets/__init__.py +++ b/video_prediction_tools/model_modules/video_prediction/datasets/__init__.py @@ -1,14 +1,14 @@ -from .base_dataset import BaseVideoDataset -from .base_dataset import VideoDataset, SequenceExampleVideoDataset, VarLenFeatureVideoDataset +#from .base_dataset import BaseVideoDataset +#from .base_dataset import VideoDataset, SequenceExampleVideoDataset, VarLenFeatureVideoDataset #from .google_robot_dataset import GoogleRobotVideoDataset #from .sv2p_dataset import SV2PVideoDataset #from .softmotion_dataset import SoftmotionVideoDataset #from .kth_dataset import KTHVideoDataset #from .ucf101_dataset import UCF101VideoDataset #from .cartgripper_dataset import CartgripperVideoDataset -from .era5_dataset import ERA5Dataset +#from .era5_dataset import ERA5Dataset #from .moving_mnist import MovingMnist -from .gzprcp_data import GZprcp +#from .gzprcp_data import GZprcp from data_preprocess.dataset_options import known_datasets #from .era5_dataset_v2_anomaly import ERA5Dataset_v2_anomaly diff --git a/video_prediction_tools/model_modules/video_prediction/models/__init__.py b/video_prediction_tools/model_modules/video_prediction/models/__init__.py index f53f90306be15f10f80af3b951e4b00476cec44e..4bf18dd5a5a843f1f2d77cd65428d220a6a95517 100644 --- a/video_prediction_tools/model_modules/video_prediction/models/__init__.py +++ b/video_prediction_tools/model_modules/video_prediction/models/__init__.py @@ -1,16 +1,16 @@ -from .base_model import BaseVideoPredictionModel -from .base_model import VideoPredictionModel +#from .base_model import BaseVideoPredictionModel +#from .base_model import VideoPredictionModel #from .non_trainable_model import NonTrainableVideoPredictionModel #from .non_trainable_model import GroundTruthVideoPredictionModel #from .non_trainable_model import RepeatVideoPredictionModel -from .savp_model import SAVPVideoPredictionModel -from .vanilla_vae_model import VanillaVAEVideoPredictionModel -from .vanilla_convLSTM_model import VanillaConvLstmVideoPredictionModel +#from .savp_model import SAVPVideoPredictionModel +#from .vanilla_vae_model import VanillaVAEVideoPredictionModel +#from .vanilla_convLSTM_model import VanillaConvLstmVideoPredictionModel #from .mcnet_model import McNetVideoPredictionModel #from .test_model import TestModelVideoPredictionModel -from model_modules.model_architectures import known_models -from .convLSTM_GAN_model import ConvLstmGANVideoPredictionModel -from .vanilla_predrnnv2 import PredRNNv2VideoPredictionModel +#from model_modules.model_architectures import known_models +#from .convLSTM_GAN_model import ConvLstmGANVideoPredictionModel +#from .vanilla_predrnnv2 import PredRNNv2VideoPredictionModel def get_model_class(model):