Commit 86c1f592 authored by Yan Ji's avatar Yan Ji
Browse files

update predrnn-v2

parent 20e72fad
Pipeline #121135 failed with stages
in 13 seconds
......@@ -6,23 +6,23 @@
#SBATCH --cpus-per-task=1
#SBATCH --output=../HPC_scripts/train_predrnn_v2-out.%j
#SBATCH --error=../HPC_scripts/train_predrnn_v2-err.%j
#SBATCH --time=01:00:00
#SBATCH --time=00:30:00
#SBATCH --gres=gpu:1
#SBATCH --partition=develgpus
#SBATCH --partition=batch
#SBATCH --mail-type=ALL
#SBATCH --mail-user=y.ji@fz-juelich.de
##jutil env activate -p cjjsc42
# Name of virtual environment
VIRT_ENV_NAME=venv_container
VIRT_ENV_NAME=venv_hdfml
# Loading mouldes
source ../env_setup/modules_train.sh
source ../env_setup/modules.sh
# Activate virtual environment if needed (and possible)
if [ -z ${VIRTUAL_ENV} ]; then
if [[ -f ../${VIRT_ENV_NAME}/bin/activate ]]; then
if [[ -f ../virtual_envs/${VIRT_ENV_NAME}/bin/activate ]]; then
echo "Activating virtual environment..."
source ../${VIRT_ENV_NAME}/bin/activate
source ../virtual_envs/${VIRT_ENV_NAME}/bin/activate
else
echo "ERROR: Requested virtual environment ${VIRT_ENV_NAME} not found..."
exit 1
......
#!/usr/bin/env bash
#
# __authors__ = Bing Gong, Michael Langguth
# __date__ = '2020_01_15'
# __last_update__ = '2021_04_28' by Michael Langguth
# __authors__ = Michael Langguth
# __date__ = '2022-01-21'
# __update__= '2022-01-21'
#
# **************** Description ****************
# This script can be used for setting up the virtual environment needed for AMBS-project
# The name of the virtual environment is controlled by the first parsed argument.
# It also setups the (Batch) runscript templates to customized runscripts (to be used by generate_runscript.py)
# Note that the basic output directory for the workflow may be set may passing a path as second argument to this script.
# This script can be used for setting up the virtual environment needed for downscaling with the U-net architecture
# as being implemented for the Tier-1 dataset in MAELSTROM (see https://www.maelstrom-eurohpc.eu/content/docs/uploads/doc6.pdf)
# **************** Description ****************
#
# **************** Auxiliary functions ****************
### auxiliary-function S ###
check_argin() {
# Handle input arguments and check if one is equal to -lcontainer
# Handle input arguments and check if one is equal to -lcontainer (not needed currently)
# Can also be used to check for non-positional arguments (such as -exp_id=*, see commented lines)
for argin in "$@"; do
if [[ $argin == *"-base_dir="* ]]; then
base_dir=${argin#"-base_dir="}
fi
# if [[ $argin == *"-exp_id="* ]]; then
# exp_id=${argin#"-exp_id="}
if [[ $argin == *"-lcontainer"* ]]; then
bool_container=1
fi
fi
done
if [[ -z "${bool_container}" ]]; then
bool_container=0
fi
}
# **************** Auxiliary functions ****************
### auxiliary-function E ###
# **************** Actual script ****************
# some first sanity checks
if [[ ${BASH_SOURCE[0]} == ${0} ]]; then
echo "ERROR: 'create_env.sh' must be sourced, i.e. execute by prompting 'source create_env.sh [virt_env_name]'"
### MAIN S ###
#set -eu # enforce abortion if a command is not re
SCR_SETUP="%create_env.sh: "
## some first sanity checks
# script is sourced?
if [[ ${BASH_SOURCE[0]} == "${0}" ]]; then
echo "${SCR_SETUP}ERROR: 'create_env.sh' must be sourced, i.e. execute by prompting 'source create_env.sh [virt_env_name]'"
exit 1
fi
# from now on, just return if something unexpected occurs instead of exiting
# as the latter would close the terminal including logging out
if [[ ! -n "$1" ]]; then
echo "ERROR: Provide a name to set up the virtual environment, i.e. execute by prompting 'source create_env.sh [virt_env_name]"
if [[ -z "$1" ]]; then
echo "${SCR_SETUP}ERROR: Provide a name to set up the virtual environment, i.e. execute by prompting 'source create_env.sh [virt_env_name]"
return
fi
if [[ "$#" -gt 1 ]]; then
check_argin ${@:2} # sets base_dir if provided, always sets l_container
fi
# set some variables
HOST_NAME=`hostname`
HOST_NAME=$(hostname)
ENV_NAME=$1
ENV_SETUP_DIR=`pwd`
WORKING_DIR="$(dirname "$ENV_SETUP_DIR")"
EXE_DIR="$(basename "$ENV_SETUP_DIR")"
ENV_DIR=${WORKING_DIR}/${ENV_NAME}
SETUP_DIR=$(pwd)
SETUP_DIR_NAME="$(basename "${SETUP_DIR}")"
BASE_DIR="$(dirname "${SETUP_DIR}")"
VENV_DIR="${BASE_DIR}/virtual_envs/${ENV_NAME}"
## perform sanity checks
# correct bool_container if host is Juwels Booster and ensure running singularity
if [[ "${bool_container}" == 0 ]] && [[ "${HOST_NAME}" == jwlogin2[1-4]* ]]; then
echo "******************************************** NOTE ********************************************"
echo " Running on Juwels Booster is only possible inside a container environment. "
echo "******************************************** NOTE ********************************************"
bool_container=1
fi
if [[ "${bool_container}" == 1 ]]; then
echo "******************************************** NOTE ********************************************"
echo " Make use of dedicated Horovod-related working branches only!!! "
echo "******************************************** NOTE ********************************************"
# Check if singularity is running
if [[ -z "${SINGULARITY_NAME}" ]]; then
echo "ERROR: create_env.sh must be executed in a running singularity on Juwels in conjuction with container-usage."
echo "Thus, execute 'singularity shell [my_docker_image]' first!"
return
fi
fi
# further sanity checks:
# * ensure execution from env_setup-directory
# * check if script is called from env_setup-directory
# * check if virtual env has already been set up
if [[ "${EXE_DIR}" != "env_setup" ]]; then
echo "ERROR: Execute 'create_env.sh' from the env_setup-subdirectory only!"
# script is called from env_setup-directory?
if [[ "${SETUP_DIR_NAME}" != "env_setup" ]]; then
echo "${SCR_SETUP}ERROR: Execute 'create_env.sh' from the env_setup-subdirectory only!"
echo ${SETUP_DIR_NAME}
return
fi
if [[ -d ${ENV_DIR} ]]; then
echo "Virtual environment has already been set up under ${ENV_DIR}. The present virtual environment will be activated now."
# virtual environment already set-up?
if [[ -d ${VENV_DIR} ]]; then
echo "${SCR_SETUP}Virtual environment has already been set up under ${VENV_DIR} and is ready to use."
echo "NOTE: If you wish to set up a new virtual environment, delete the existing one or provide a different name."
ENV_EXIST=1
else
ENV_EXIST=0
fi
## check integratability of modules
## check integratability of operating system
if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *jwlogin* ]]; then
if [[ "${bool_container}" > 0 ]]; then
# on Juwels Booster, we are in a container environment -> loading modules is not possible
echo "***** Note for container environment! *****"
echo "Already checked the required modules?"
echo "To do so, run 'source modules_train.sh' after exiting the singularity."
echo "***** Note for container environment! *****"
else
# load modules and check for their availability
echo "***** Checking modules required during the workflow... *****"
source ${ENV_SETUP_DIR}/modules_preprocess.sh purge
source ${ENV_SETUP_DIR}/modules_train.sh purge
source ${ENV_SETUP_DIR}/modules_postprocess.sh
fi
# unset PYTHONPATH to ensure that system-realted paths are not set (container-environment should be used only)
unset PYTHONPATH
else
echo "ERROR: AMBS-workflow is currently only supported on the Juelich HPC-systems HDF-ML, Juwels and Juwels Booster"
echo "${SCR_SETUP}ERROR: Model only runs on HDF-ML and Juwels (Booster) so far."
return
# unset PYTHONPATH on every other machine that is not a known HPC-system
# unset PYTHONPATH
fi
# Create fresh virtual environment or just activate the existing one
if [[ "$ENV_EXIST" == 0 ]]; then
# Check modules first
if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *juwels* ]]; then
if [[ "${HOST_NAME}" == jwlogin2[1-4]* ]]; then
# on Juwels Booster, we are in a container environment -> loading modules is not possible
echo "***** Note for Juwels Booster! *****"
echo "Already checked the required modules?"
echo "To do so, run 'source modules_train.sh' after exiting the singularity."
echo "***** Note for Juwels Booster! *****"
else
# load modules and check for their availability
echo "***** Checking modules required during the workflow... *****"
source ${ENV_SETUP_DIR}/modules_preprocess.sh purge
source ${ENV_SETUP_DIR}/modules_train.sh purge
source ${ENV_SETUP_DIR}/modules_postprocess.sh
fi
else
# unset PYTHONPATH on every other machine that is not a known HPC-system
unset PYTHONPATH
fi
fi
## set up virtual environment
if [[ "$ENV_EXIST" == 0 ]]; then
# Activate virtual environment and install additional Python packages.
echo "Configuring and activating virtual environment on ${HOST_NAME}"
python3 -m venv $ENV_DIR
activate_virt_env=${ENV_DIR}/bin/activate
echo "Entering virtual environment ${ENV_DIR} to install required Python modules..."
source ${activate_virt_env}
# install some requirements and/or check for modules
if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *jwlogin* ]]; then
# Install packages depending on host
echo "***** Start installing additional Python modules with pip... *****"
req_file=${ENV_SETUP_DIR}/requirements.txt
if [[ "${bool_container}" > 0 ]]; then req_file=${ENV_SETUP_DIR}/requirements_container.txt; fi
pip3 install --no-cache-dir -r ${req_file}
else
echo "***** Start installing additional Python modules with pip... *****"
pip3 install --upgrade pip
pip3 install -r ${ENV_SETUP_DIR}/requirements.txt
pip3 install mpi4py
pip3 install netCDF4
pip3 install numpy
pip3 install h5py
pip3 install tensorflow-gpu==1.13.1
fi
# Install virtualenv-package and set-up virtual environment with required additional Python packages.
echo "${SCR_SETUP}Configuring and activating virtual environment on ${HOST_NAME}"
source modules.sh
python3 -m venv --system-site-packages "${VENV_DIR}"
# expand PYTHONPATH...
export PYTHONPATH=${WORKING_DIR}:$PYTHONPATH >> ${activate_virt_env}
export PYTHONPATH=${WORKING_DIR}/utils:$PYTHONPATH >> ${activate_virt_env}
export PYTHONPATH=${WORKING_DIR}/external_package/lpips-tensorflow:$PYTHONPATH >> ${activate_virt_env}
export PYTHONPATH=${WORKING_DIR}/model_modules:$PYTHONPATH >> ${activate_virt_env}
export PYTHONPATH=${WORKING_DIR}/postprocess:$PYTHONPATH >> ${activate_virt_env}
if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *jwlogin* ]]; then
export PYTHONPATH=${ENV_DIR}/lib/python3.6/site-packages:$PYTHONPATH >> ${activate_virt_env}
if [[ "${bool_container}" > 0 ]]; then
export PYTONPATH=/usr/locali/lib/python3.6/dist-packages:$PYTHONPATH
fi
activate_virt_env=${VENV_DIR}/bin/activate
echo "${SCR_SETUP}Entering virtual environment ${VENV_DIR} to install required Python modules..."
source "${activate_virt_env}"
# handle systematic issues with Stages/2022
MACHINE=$(hostname -f | cut -d. -f2)
if [[ "${HOST}" == jwlogin2[2-4] ]]; then
MACHINE="juwelsbooster"
fi
# ...and ensure that this also done when the
PY_VERSION=$(python --version 2>&1 | cut -d ' ' -f2 | cut -d. -f1-2)
echo "${SCR_SETUP}Appending PYTHONPATH on ${MACHINE} for Python version ${PY_VERSION} to ensure proper set-up..."
req_file=${SETUP_DIR}/requirements.txt
# Without the environmental variables set above, we need to install wheel and explictly set the target directory
pip3 install --no-cache-dir -r "${req_file}"
# expand PYTHONPATH
export PYTHONPATH=${BASE_DIR}:$PYTHONPATH >> ${activate_virt_env}
export PYTHONPATH=${BASE_DIR}/utils:$PYTHONPATH >> ${activate_virt_env}
export PYTHONPATH=${BASE_DIR}/handle_data:$PYTHONPATH >> ${activate_virt_env}
export PYTHONPATH=${BASE_DIR}/models:$PYTHONPATH >> ${activate_virt_env}
export PYTHONPATH=${BASE_DIR}/postprocess:$PYTHONPATH >> ${activate_virt_env}
export PYTHONPATH=${BASE_DIR}/preprocess:$PYTHONPATH >> ${activate_virt_env}
# ...and ensure that this also done when the
echo "" >> ${activate_virt_env}
echo "# Expand PYTHONPATH..." >> ${activate_virt_env}
echo "export PYTHONPATH=${WORKING_DIR}:\$PYTHONPATH" >> ${activate_virt_env}
echo "export PYTHONPATH=${WORKING_DIR}/utils/:\$PYTHONPATH" >> ${activate_virt_env}
echo "export PYTHONPATH=${WORKING_DIR}/model_modules:$PYTHONPATH " >> ${activate_virt_env}
echo "export PYTHONPATH=${WORKING_DIR}/external_package/lpips-tensorflow:\$PYTHONPATH" >> ${activate_virt_env}
echo "export PYTHONPATH=${WORKING_DIR}/postprocess:$PYTHONPATH" >> ${activate_virt_env}
if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == *juwels* ]]; then
echo "export PYTHONPATH=${ENV_DIR}/lib/python3.6/site-packages:\$PYTHONPATH" >> ${activate_virt_env}
if [[ "${bool_container}" > 0 ]]; then
echo "export PYTONPATH=/usr/locali/lib/python3.6/dist-packages:\$PYTHONPATH" >> ${activate_virt_env}
fi
fi
info_str="Virtual environment ${ENV_DIR} has been set up successfully."
echo "export PYTHONPATH=${BASE_DIR}:\$PYTHONPATH" >> ${activate_virt_env}
echo "export PYTHONPATH=${BASE_DIR}/utils/:\$PYTHONPATH" >> ${activate_virt_env}
echo "export PYTHONPATH=${BASE_DIR}/models:\$PYTHONPATH " >> ${activate_virt_env}
echo "export PYTHONPATH=${BASE_DIR}/handle_data:\$PYTHONPATH" >> ${activate_virt_env}
echo "export PYTHONPATH=${BASE_DIR}/postprocess:\$PYTHONPATH" >> ${activate_virt_env}
echo "export PYTHONPATH=${BASE_DIR}/preprocess:\$PYTHONPATH" >> ${activate_virt_env}
info_str="Virtual environment ${VENV_DIR} has been set up successfully."
elif [[ "$ENV_EXIST" == 1 ]]; then
# loading modules of postprocessing and activating virtual env are suifficient
if [[ "${bool_container}" == 0 ]]; then
source ${ENV_SETUP_DIR}/modules_postprocess.sh
fi
source ${ENV_DIR}/bin/activate
info_str="Virtual environment ${ENV_DIR} has been activated successfully."
# simply activate virtual environment
info_str="Virtual environment ${VENV_DIR} has already been set up before. Nothing to be done."
fi
echo "Set up runscript template for user ${USER}..."
if [[ -z "${base_dir}" ]]; then
shift
source "${WORKING_DIR}"/utils/runscript_generator/setup_runscript_templates.sh
else
source "${WORKING_DIR}"/utils/runscript_generator/setup_runscript_templates.sh ${base_dir}
fi
echo "******************************************** NOTE ********************************************"
echo "${info_str}"
echo "Make use of generate_runscript.py to generate customized runscripts of the workflow steps."
echo "******************************************** NOTE ********************************************"
# finally clean up loaded modules (if we are not on Juwels)
#if [[ "${HOST_NAME}" == *hdfml* || "${HOST_NAME}" == *juwels* ]] && [[ "${HOST_NAME}" != jwlogin2[1-4]* ]]; then
# module --force purge
#fi
echo "${SCR_SETUP}${info_str}"
### MAIN E ###
matplotlib==3.3.0
mpi4py==3.0.1
pandas==0.25.3
xarray==0.16.0
basemap==1.3.0
numpy==1.17.3 # although this numpy-version is in the container, we set it here to avoid any further installation
scikit-image==0.17.2
opencv-python-headless==4.2.0.34
netcdf4==1.5.8
#metadata==0.2
normalization==0.4
utils==1.0.1
decorator==5.0.9
networkx==2.6.3
Werkzeug==2.0.1
numpy==1.21.3
xarray==0.20.1
climetlab==0.8.14
climetlab-maelstrom-downscaling==0.1.0
wandb
meteva
opencv-contrib-python
opencv-python
wradlib
pysteps
timm
......@@ -7,31 +7,16 @@ EXE_DIR="$(basename "$ENV_SETUP_DIR")"
VENV_DIR=$WORKING_DIR/virtual_envs/$1
shift # replaces $1 by $2, so that $@ does not include the name of the virtual environment anymore
# sanity checks
if [[ "${EXE_DIR}" = "HPC_scripts" ]] || [[ "${EXE_DIR}" = "no_HPC_scripts" ]];
then
echo "The runscript is running under the folder ${EXE_DIR}"
else
echo "ERROR: Run the setup-script for the enviornment from the (no_)HPC_scripts-directory!"
exit
fi
if ! [[ -d "${VENV_DIR}" ]]; then
echo "ERROR: Could not found virtual environment under ${VENV_DIR}!"
exit
fi
#expand PYHTONPATH
# Include site-packages from virtual environment...
ml ml go-1.17.6/singularity-3.9.5
# unset PYTHONPATH and activate virtual environment
unset PYTHONPATH
export PYTHONPATH=${VENV_DIR}/lib/python3.8/site-packages/:$PYTHONPATH
# ... dist-packages from container singularity...
export PYTHONPATH=/usr/local/lib/python3.8/dist-packages:$PYTHONPATH
# ... and modules from this project
export PYTHONPATH=${WORKING_DIR}:$PYTHONPATH
export PYTHONPATH=${WORKING_DIR}/utils:$PYTHONPATH
export PYTHONPATH=${WORKING_DIR}/model_modules:$PYTHONPATH
export PYTHONPATH=${WORKING_DIR}/postprocess:$PYTHONPATH
source ${VENV_DIR}/bin/activate
# Control
echo "****** Check PYTHONPATH *****"
......@@ -40,3 +25,4 @@ echo $PYTHONPATH
export PMIX_SECURITY_MODE="native" # default would include munge which is unavailable
$@
from .base_dataset import BaseVideoDataset
from .base_dataset import VideoDataset, SequenceExampleVideoDataset, VarLenFeatureVideoDataset
#from .base_dataset import BaseVideoDataset
#from .base_dataset import VideoDataset, SequenceExampleVideoDataset, VarLenFeatureVideoDataset
#from .google_robot_dataset import GoogleRobotVideoDataset
#from .sv2p_dataset import SV2PVideoDataset
#from .softmotion_dataset import SoftmotionVideoDataset
#from .kth_dataset import KTHVideoDataset
#from .ucf101_dataset import UCF101VideoDataset
#from .cartgripper_dataset import CartgripperVideoDataset
from .era5_dataset import ERA5Dataset
#from .era5_dataset import ERA5Dataset
#from .moving_mnist import MovingMnist
from .gzprcp_data import GZprcp
#from .gzprcp_data import GZprcp
from data_preprocess.dataset_options import known_datasets
#from .era5_dataset_v2_anomaly import ERA5Dataset_v2_anomaly
......
from .base_model import BaseVideoPredictionModel
from .base_model import VideoPredictionModel
#from .base_model import BaseVideoPredictionModel
#from .base_model import VideoPredictionModel
#from .non_trainable_model import NonTrainableVideoPredictionModel
#from .non_trainable_model import GroundTruthVideoPredictionModel
#from .non_trainable_model import RepeatVideoPredictionModel
from .savp_model import SAVPVideoPredictionModel
from .vanilla_vae_model import VanillaVAEVideoPredictionModel
from .vanilla_convLSTM_model import VanillaConvLstmVideoPredictionModel
#from .savp_model import SAVPVideoPredictionModel
#from .vanilla_vae_model import VanillaVAEVideoPredictionModel
#from .vanilla_convLSTM_model import VanillaConvLstmVideoPredictionModel
#from .mcnet_model import McNetVideoPredictionModel
#from .test_model import TestModelVideoPredictionModel
from model_modules.model_architectures import known_models
from .convLSTM_GAN_model import ConvLstmGANVideoPredictionModel
from .vanilla_predrnnv2 import PredRNNv2VideoPredictionModel
#from model_modules.model_architectures import known_models
#from .convLSTM_GAN_model import ConvLstmGANVideoPredictionModel
#from .vanilla_predrnnv2 import PredRNNv2VideoPredictionModel
def get_model_class(model):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment