diff --git a/README.md b/README.md index 9b17f2010590202c6a7e2b1805e0ccdbddf81506..1b82518c87cbdb9b1a8c6aba57c9c27140d862d8 100644 --- a/README.md +++ b/README.md @@ -64,19 +64,26 @@ whose order is the following: ``` ### Create additional runscripts ### -In case that you want to perform experiments with varying configuration (e.g. another region of interest), -it is convenient to create individual runscripts from the templates. +In case that you want to perform experiments with varying configuration (e.g. another set of hyperparameters, but still the same input dataset for training), it is convenient to create individual runscripts from the templates. This can be done with the help of `generate_workflow_runscripts.sh`. + The first argument `<runscript_name>` defines the (relative) path to the template runscript which should be converted to an executable one. Note that only the suffix of the template's name must be passed, e.g. `../HPC_scripts/train_era5` in order to create a runscript for the training substep. -The second argument `<exp_id>` denotes again the experiment identifier. If this argument is omitted, -the default value `exp1` is used which might conflict the step where the virtual environment itself -is set up. +The second argument `<venv_name>` denotes the name of the virtual environment which has to be set up in advanceand which should be used by the runscript. + +Additional optional arguments can be passed to control the experimental identifier and to set manually the realtive path to the +directory where the preprocessed data is stored (used for the training and postprocessing substep). These optional arguments have to follow a naming convention in order to be identified by `generate_workflow_runscripts.sh`. +The experimental identifer can be passed by adding `-exp_id=<id>`while the path to the preprocessed data requires passing of `-exp_dir=<relative_path_to_dir>`. Note, that the default value `exp1` is used as experimental identifier if the `-exp_id=<id>` is omitted. + +``` bash +./generate_workflow_runscripts.sh <runscript_name> <venv_name> [-exp_id=<id>] [-exp_dir=<relative_dir_to_path>] +``` +*Specific example:* ``` bash -./generate_workflow_runscripts.sh <runscript_name> [<exp_id>] +./generate_workflow_runscripts.sh train_era5 venv_juwels -exp_id=exp_test -exp_dir=era5-Y2010toY2222M01to12-160x128-2970N1500W-T2_MSL_gph500 ``` ### Output folder structure and naming convention diff --git a/video_prediction_savp/HPC_scripts/generate_era5_template.sh b/video_prediction_savp/HPC_scripts/generate_era5_template.sh index 6d0f321cf67f99c12632c8012e8c43f2835b1e3e..99f46ffb2d21449eb4157b2d3dc08b0c547e70d6 100644 --- a/video_prediction_savp/HPC_scripts/generate_era5_template.sh +++ b/video_prediction_savp/HPC_scripts/generate_era5_template.sh @@ -35,9 +35,9 @@ if [ -z ${VIRTUAL_ENV} ]; then fi # declare directory-variables which will be modified appropriately during Preprocessing (invoked by mpi_split_data_multi_years.py) -source_dir=/p/scratch/deepacf/video_prediction_shared_folder/preprocessedData/ -checkpoint_dir=/p/scratch/deepacf/video_prediction_shared_folder/models/ -results_dir=/p/scratch/deepacf/video_prediction_shared_folder/results/ +source_dir=/p/project/deepacf/deeprain/video_prediction_shared_folder/preprocessedData/ +checkpoint_dir=/p/project/deepacf/deeprain/video_prediction_shared_folder/models/ +results_dir=/p/project/deepacf/deeprain/video_prediction_shared_folder/results/ # name of model model=convLSTM diff --git a/video_prediction_savp/HPC_scripts/train_era5_template.sh b/video_prediction_savp/HPC_scripts/train_era5_template.sh index f368a8a1a506f4b6127e01b3f16df5f4d648241c..5342c32a8883c6c0e1534b5a05ef9e59d9c92ffa 100644 --- a/video_prediction_savp/HPC_scripts/train_era5_template.sh +++ b/video_prediction_savp/HPC_scripts/train_era5_template.sh @@ -34,18 +34,16 @@ if [ -z ${VIRTUAL_ENV} ]; then fi fi - - - # declare directory-variables which will be modified appropriately during Preprocessing (invoked by mpi_split_data_multi_years.py) source_dir=/p/project/deepacf/deeprain/video_prediction_shared_folder/preprocessedData/ destination_dir=/p/project/deepacf/deeprain/video_prediction_shared_folder/models/ -# for choosing the model for choosing the model, convLSTM,savp, mcnet,vae +# valid identifiers for model-argument are: convLSTM, savp, mcnet and vae model=convLSTM model_hparams=../hparams/era5/${model}/model_hparams.json +destination_dir=${destination_dir}/${model}/"$(date +"%Y%m%dT%H%M")_"$USER"/" -# rund training -srun python ../scripts/train_dummy.py --input_dir ${source_dir}/tfrecords/ --dataset era5 --model ${model} --model_hparams_dict ${model_hparams} --output_dir ${destination_dir}/${model}/ +# run training +srun python ../scripts/train_dummy.py --input_dir ${source_dir}/tfrecords/ --dataset era5 --model ${model} --model_hparams_dict ${model_hparams} --output_dir ${destination_dir} diff --git a/video_prediction_savp/Zam347_scripts/train_era5_template.sh b/video_prediction_savp/Zam347_scripts/train_era5_template.sh index d06392621182ea6b046bfda77c93b1d2b422aa56..c8fcf81f717433a10a8dd1e8652a743e5e8b5332 100644 --- a/video_prediction_savp/Zam347_scripts/train_era5_template.sh +++ b/video_prediction_savp/Zam347_scripts/train_era5_template.sh @@ -9,10 +9,11 @@ exit 99 source_dir=/home/${USER}/preprocessedData/ destination_dir=/home/${USER}/models/ -# for choosing the model +# valid identifiers for model-argument are: convLSTM, savp, mcnet and vae model=mcnet model_hparams=../hparams/era5/model_hparams.json +destination_dir=${destination_dir}/${model}/"$(date +"%Y%m%dT%H%M")_"$USER"/" + +# run training +python ../scripts/train_dummy.py --input_dir ${source_dir}/tfrecords/ --dataset era5 --model ${model} --model_hparams_dict ${model_hparams} --output_dir ${destination_dir} -# execute respective Python-script -python ../scripts/train_dummy.py --input_dir ${source_dir}/tfrecords/ --dataset era5 --model ${model} --model_hparams_dict ${model_hparams} --output_dir ${destination_dir}/${model}/ -#srun python scripts/train.py --input_dir data/era5 --dataset era5 --model savp --model_hparams_dict hparams/kth/ours_savp/model_hparams.json --output_dir logs/era5/ours_savp diff --git a/video_prediction_savp/env_setup/create_env.sh b/video_prediction_savp/env_setup/create_env.sh index 9f8a4c5aa007695a6d668040aacf08e158b3a12f..8aa6c0a6e0aa2100acb49dbc0d849908c9f4eb5a 100755 --- a/video_prediction_savp/env_setup/create_env.sh +++ b/video_prediction_savp/env_setup/create_env.sh @@ -6,8 +6,8 @@ # **************** Description **************** # This script can be used for setting up the virtual environment needed for ambs-project # or to simply activate it. -# In the former case, it also converts the (Batch) script templates to executable runscripts. -# Note, that you may pass an experiment identifier as second argument to this runscript +# It also converts the (Batch) runscript templates to executable runscripts. +# Note, that you may pass an experiment identifier as second argument (default 'exp1') to this runscript # which will also be used as suffix in the executable runscripts. # **************** Description **************** # @@ -32,7 +32,7 @@ fi # list of (Batch) scripts used for the steps in the workflow # !!! Expects that a template named [script_name]_template.sh exists!!! -workflow_scripts=(DataExtraction DataPreprocess DataPreprocess2tf train_era5 generate_era5 DatePreprocess2tf_movingmnist train_movingmnist generate_movingmnist) +workflow_scripts=(DataExtraction DataPreprocess DataPreprocess2tf train_era5 generate_era5 DataPreprocess2tf_movingmnist train_movingmnist generate_movingmnist) HOST_NAME=`hostname` ENV_NAME=$1 @@ -46,7 +46,7 @@ ENV_DIR=${WORKING_DIR}/${ENV_NAME} # * check if virtual env has already been set up if [[ "${EXE_DIR}" != "env_setup" ]]; then - echo "ERROR: The setup-script for the virtual environment from the env_setup-directory!" + echo "ERROR: Execute 'create_env.sh' from the env_setup-subdirectory only!" return fi @@ -119,26 +119,27 @@ if [[ "$ENV_EXIST" == 0 ]]; then if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == juwels* ]]; then echo "export PYTHONPATH=${ENV_DIR}/lib/python3.6/site-packages:\$PYTHONPATH" >> ${activate_virt_env} fi - # After checking and setting up the virt env, create user-specific runscripts for all steps of the workflow - if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == juwels* ]]; then - echo "***** Creating Batch-scripts for running workflow... *****" - script_dir=../HPC_scripts - elif [[ "${HOST_NAME}" == "zam347" ]]; then - echo "***** Creating Batch-scripts for running workflow... *****" - script_dir=../Zam347_scripts - fi - - for wf_script in "${workflow_scripts[@]}"; do - curr_script=${script_dir}/${wf_script} - if [[ -z "${exp_id}" ]]; then - ./generate_workflow_runscripts.sh ${curr_script} - else - ./generate_workflow_runscripts.sh ${curr_script} ${exp_id} - fi done - # *** finished *** elif [[ "$ENV_EXIST" == 1 ]]; then # activating virtual env is suifficient source ${ENV_DIR}/bin/activate fi +# Finish by creating runscripts + # After checking and setting up the virt env, create user-specific runscripts for all steps of the workflow +if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == juwels* ]]; then + echo "***** Creating Batch-scripts for running workflow... *****" + script_dir=../HPC_scripts +elif [[ "${HOST_NAME}" == "zam347" ]]; then + echo "***** Creating Batch-scripts for running workflow... *****" + script_dir=../Zam347_scripts +fi + +for wf_script in "${workflow_scripts[@]}"; do + curr_script=${script_dir}/${wf_script} + if [[ -z "${exp_id}" ]]; then + ./generate_workflow_runscripts.sh ${curr_script} ${ENV_NAME} + else + ./generate_workflow_runscripts.sh ${curr_script} ${ENV_NAME} -exp_id=${exp_id} + fi + diff --git a/video_prediction_savp/env_setup/generate_workflow_runscripts.sh b/video_prediction_savp/env_setup/generate_workflow_runscripts.sh index d778192b40151ff224e60b39ab4af2b65b88e347..c8cc49470461fc121bc6681cb8c15ae8c7dcf75e 100755 --- a/video_prediction_savp/env_setup/generate_workflow_runscripts.sh +++ b/video_prediction_savp/env_setup/generate_workflow_runscripts.sh @@ -1,29 +1,80 @@ #!/usr/bin/env bash +# +# __authors__ = Michael Langguth +# __date__ = '2020_09_29' +# # **************** Description **************** -# Converts given template workflow script (path has to be passed as first argument) to +# Converts a given template workflow script (path/name has to be passed as first argument) to # an executable workflow (Batch) script. -# Note, that this first argument has to be passed with "_template.sh" omitted! -# A second argument can be passed to set an experiment identifier whose default is exp1. -# Note, that the second argument can be omitted only if there are no existing (Batch) scritps -# carrying this identifier which is added as a suffix. -# Example: -# ./generate_workflow_scripts.sh ../HPC_scripts/generate exp5 -# ... will convert generate_template.sh to generate_exp5.sh +# Note, that the first argument has to be passed with "_template.sh" omitted! +# The second argument denotes the name of the virtual environment to be used. +# Additionally, -exp_id=[some_id] and -exp_dir=[some_dir] can be optionally passed as NON-POSITIONAL arguments. +# -exp_id allows to set an experimental identifier explicitly (default is -exp_id=exp1) while +# -exp_dir allows setting manually the experimental directory. +# Note, that the latter is done during the preprocessing step in an end-to-end workflow. +# However, if the preprocessing step can be skipped (i.e. preprocessed data already exists), +# one may wish to set the experimental directory explicitly +# +# Examples: +# ./generate_workflow_scripts.sh ../HPC_scripts/generate_era5 venv_hdfml -exp_id=exp5 +# ... will convert generate_era5_template.sh to generate_era5_exp5.sh where +# venv_hdfml is the virtual environment for operation. +# +# ./generate_workflow_scripts.sh ../HPC_scripts/generate_era5 venv_hdfml -exp_id=exp5 -exp_dir=testdata +# ... does the same as the previous example, but additionally extends source_dir=[...]/preprocessedData/, +# checkpoint_dir=[...]/models/ and results_dir=[...]/results/ by testdata/ # **************** Description **************** # +# **************** Auxilary functions **************** +check_argin() { +# Handle input arguments and check if one of them holds -exp_id= +# or -exp_dir= to emulate them as non-positional arguments + for argin in "$@"; do + if [[ $argin == *"-exp_id="* ]]; then + exp_id=${argin#"-exp_id="} + elif [[ $argin == *"-exp_dir="* ]]; then + exp_dir=${argin#"-exp_dir="} + fi + done +} + +add_exp_dir() { +# Add exp_dir to paths in <target_script> which end with /<prefix>/ + prefix=$1 + if [[ `grep "/${prefix}/$" ${target_script}` ]]; then + echo "Add experimental directory after '${prefix}/' in runscript '${target_script}'" + sed -i "s|/${prefix}/$|/${prefix}/${exp_dir}/|g" ${target_script} + status=1 + fi +} +# **************** Auxilary functions **************** HOST_NAME=`hostname` +BASE_DIR=`pwd` +WORKING_DIR="$(dirname "$BASE_DIR")" +EXE_DIR="$(basename "$BASE_DIR")" -### some sanity checks ### +### Some sanity checks ### +# ensure that the script is executed from the env_setup-subdirectory +if [[ "${EXE_DIR}" != "env_setup" ]]; then + echo "ERROR: Execute 'generate_workflow_scripts.sh' from the env_setup-subdirectory only!" + exit 1 +fi # check input arguments -if [[ "$#" -lt 1 ]]; then - echo "ERROR: Pass path to workflow runscript (without '_template.sh') to be generated..." +if [[ "$#" -lt 2 ]]; then + echo "ERROR: Pass path to workflow runscript (without '_template.sh') and pass name of virtual environment..." + echo "Example: ./generate_workflow_scripts.sh ../HPC_scripts/DataExtraction venv_hdfml" exit 1 else curr_script=$1 - if [[ "$#" -gt 1 ]]; then - exp_id=$2 - else + curr_script_loc="$(basename "$curr_script")" + curr_venv=$2 + # check if any known non-positional argument is present... + if [[ "$#" -gt 2 ]]; then + check_argin ${@:3} + fi + #...and ensure that exp_id is always set + if [[ -z "${exp_id}" ]]; then exp_id="exp1" fi fi @@ -34,19 +85,27 @@ if ! [[ -f ${curr_script}_template.sh ]]; then echo "Thus, no corresponding executable script is created!" if [[ ${curr_script} == *"template"* || ${curr_script} == *".sh"* ]]; then echo "ERROR: Omit '_template' and/or '.sh' from Bash script argument." - exit 1 + exit 2 else exit 0 # still ok, i.e. only a WARNING is raised fi fi -# check if target script is unique + +# Check existence of virtual environment (2nd argument) +if [[ ! -d ${WORKING_DIR}/${curr_venv} ]]; then + echo "ERROR: Could not find directory of virtual environment under ${WORKING_DIR}/${curr_venv} " + exit 3 +fi + +# Check if target script is unique target_script=${curr_script}_${exp_id}.sh if [[ -f ${target_script} ]]; then echo "ERROR: ${target_script} already exist." echo "Set explicitly a different experiment identifier." - exit 1 + exit 4 fi -### do the work ### + +### Do the work ### # create copy of template which is modified subsequently cp ${curr_script}_template.sh ${target_script} # remove template identifiers @@ -56,10 +115,24 @@ line_e=`echo ${num_lines} | cut -d' ' -f 2` if [[ ${line_s} == "" || ${line_e} == "" ]]; then echo "ERROR: ${curr_script}_template.sh exists, but does not seem to be a valid template script." rm ${target_script} # remove copy again - exit 1 + exit 5 else sed -i "${line_s},${line_e}d" ${target_script} fi + +# set virtual environment to be used in Batch scripts +if [[ `grep "VIRT_ENV_NAME=" ${target_script}` ]]; then + sed -i 's/VIRT_ENV_NAME=.*/VIRT_ENV_NAME="'${curr_venv}'"/g' ${target_script} +fi + +# also adapt name output- and error-files of submitted job with exp_id (if we are on Juwels or HDF-ML) +if [[ `grep "#SBATCH --output=" ${target_script}` ]]; then + sed -i "s|#SBATCH --output=.*|#SBATCH --output=${curr_script_loc}_${exp_id}-out\.%j|g" ${target_script} +fi +if [[ `grep "#SBATCH --error=" ${target_script}` ]]; then + sed -i "s|#SBATCH --error=.*|#SBATCH --error=${curr_script_loc}_${exp_id}-err\.%j|g" ${target_script} +fi + # set exp_id in (Batch) script if present if [[ `grep "exp_id=" ${target_script}` ]]; then sed -i "s/exp_id=.*/exp_id=$exp_id/g" ${target_script} @@ -75,4 +148,18 @@ if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == juwels* ]]; then sed -i "s/--mail-user=.*/--mail-user=$USER_EMAIL/g" ${target_script} fi +# finally set experimental directory if exp_dir is present +if [[ ! -z "${exp_dir}" ]]; then + status=0 # status to check if exp_dir is added to the runscript at hand + # -> will be set to one by add_exp_dir if modifictaion takes place + add_exp_dir preprocessedData + add_exp_dir models + add_exp_dir results + + if [[ ${status} == 0 ]]; then + echo "WARNING: -exp_dir has been passed, but no addition to any path in runscript at hand done..." + fi +fi + +