diff --git a/.gitignore b/.gitignore index a7dfc3d7d776b25730741f12db6f98cdfa127751..4b7022228f10be8b83d42ef87eb66b01a4e0687f 100644 --- a/.gitignore +++ b/.gitignore @@ -121,3 +121,9 @@ virt_env*/ *.tfrecords **/era5_size_64_64_3_3t_norm +# Ignore (Batch) runscripts +HPC_scripts/*.sh +!HPC_scripts/*_template.sh +Zam347_scripts/*.sh +!Zam347_scripts/*_template.sh + diff --git a/video_prediction_savp/HPC_scripts/DataExtraction.sh b/video_prediction_savp/HPC_scripts/DataExtraction_template.sh old mode 100755 new mode 100644 similarity index 90% rename from video_prediction_savp/HPC_scripts/DataExtraction.sh rename to video_prediction_savp/HPC_scripts/DataExtraction_template.sh index b44065e7babb0411cda6d2849ec429f3672c60d5..a80a3b8779908fc51121c6682817f20ec197a327 --- a/video_prediction_savp/HPC_scripts/DataExtraction.sh +++ b/video_prediction_savp/HPC_scripts/DataExtraction_template.sh @@ -12,6 +12,10 @@ #SBATCH --mail-type=ALL #SBATCH --mail-user=b.gong@fz-juelich.de +######### Template identifier (don't remove) ######### +echo "Do not run the template scripts" +exit 99 +######### Template identifier (don't remove) ######### jutil env activate -p deepacf diff --git a/video_prediction_savp/HPC_scripts/DataPreprocess_to_tf_movingmnist.sh b/video_prediction_savp/HPC_scripts/DataPreprocess2tf_movingmnist_template.sh old mode 100755 new mode 100644 similarity index 89% rename from video_prediction_savp/HPC_scripts/DataPreprocess_to_tf_movingmnist.sh rename to video_prediction_savp/HPC_scripts/DataPreprocess2tf_movingmnist_template.sh index a81e9a1499ce2619c6d934d32396c7128bd6b565..dc1fbb4a83788a4cc1f69fdf151d8419129dc06d --- a/video_prediction_savp/HPC_scripts/DataPreprocess_to_tf_movingmnist.sh +++ b/video_prediction_savp/HPC_scripts/DataPreprocess2tf_movingmnist_template.sh @@ -11,6 +11,10 @@ #SBATCH --mail-type=ALL #SBATCH --mail-user=b.gong@fz-juelich.de +######### Template identifier (don't remove) ######### +echo "Do not run the template scripts" +exit 99 +######### Template identifier (don't remove) ######### # Name of virtual environment VIRT_ENV_NAME="vp" diff --git a/video_prediction_savp/HPC_scripts/DataPreprocess_to_tf.sh b/video_prediction_savp/HPC_scripts/DataPreprocess2tf_template.sh old mode 100755 new mode 100644 similarity index 89% rename from video_prediction_savp/HPC_scripts/DataPreprocess_to_tf.sh rename to video_prediction_savp/HPC_scripts/DataPreprocess2tf_template.sh index bcf950e93145bcc8b0d15892a606d4cc5d7dd66e..e953b5bc3fd2a836a74b647c1066735d19e39640 --- a/video_prediction_savp/HPC_scripts/DataPreprocess_to_tf.sh +++ b/video_prediction_savp/HPC_scripts/DataPreprocess2tf_template.sh @@ -11,6 +11,10 @@ #SBATCH --mail-type=ALL #SBATCH --mail-user=b.gong@fz-juelich.de +######### Template identifier (don't remove) ######### +echo "Do not run the template scripts" +exit 99 +######### Template identifier (don't remove) ######### # Name of virtual environment VIRT_ENV_NAME="vp" diff --git a/video_prediction_savp/HPC_scripts/DataPreprocess.sh b/video_prediction_savp/HPC_scripts/DataPreprocess_template.sh old mode 100755 new mode 100644 similarity index 63% rename from video_prediction_savp/HPC_scripts/DataPreprocess.sh rename to video_prediction_savp/HPC_scripts/DataPreprocess_template.sh index aa84de9de7dce7015b26f040aaec48d0b096a816..b686976ea8dbb3e00feffe1c0f26180f463b9db3 --- a/video_prediction_savp/HPC_scripts/DataPreprocess.sh +++ b/video_prediction_savp/HPC_scripts/DataPreprocess_template.sh @@ -12,6 +12,10 @@ #SBATCH --mail-type=ALL #SBATCH --mail-user=b.gong@fz-juelich.de +######### Template identifier (don't remove) ######### +echo "Do not run the template scripts" +exit 99 +######### Template identifier (don't remove) ######### # Name of virtual environment VIRT_ENV_NAME="virt_env_hdfml" @@ -29,34 +33,29 @@ fi # Loading mouldes source ../env_setup/modules_preprocess.sh -source_dir=${SAVE_DIR}/extractedData -destination_dir=${SAVE_DIR}/preprocessedData/era5-Y2015to2017M01to12 -script_dir=`pwd` - -declare -a years=("2222" - "2010_1" - "2012" - "2013_complete" - "2015" - "2016" - "2017" - "2019" - ) - +# select years for dataset declare -a years=( "2015" "2016" "2017" ) +max_year=`echo "${years[*]}" | sort -nr | head -n1` +min_year=`echo "${years[*]}" | sort -nr | tail -n1` +# set some paths +# note, that destination_dir is used during runtime to set a proper experiment directory +exp_id=xxx # experiment identifier is set by 'generate_workflow_runscripts.sh' +source_dir=/p/project/deepacf/deeprain/video_prediction_shared_folder/extractedData +destination_dir=/p/project/deepacf/deeprain/video_prediction_shared_folder/preprocessedData/era5-Y${min_year}to${max_year}M01to12 +script_dir=`pwd` -# ececute Python-scripts +# execute Python-scripts for year in "${years[@]}"; do echo "Year $year" echo "source_dir ${source_dir}/${year}" srun python ../../workflow_parallel_frame_prediction/DataPreprocess/mpi_stager_v2_process_netCDF.py \ - --source_dir ${source_dir} -scr_dir ${script_dir} \ + --source_dir ${source_dir} -scr_dir ${script_dir} -exp_id ${exp_id} \ --destination_dir ${destination_dir} --years ${year} --vars T2 MSL gph500 --lat_s 74 --lat_e 202 --lon_s 550 --lon_e 710 done diff --git a/video_prediction_savp/HPC_scripts/generate_era5.sh b/video_prediction_savp/HPC_scripts/generate_era5_template.sh old mode 100755 new mode 100644 similarity index 91% rename from video_prediction_savp/HPC_scripts/generate_era5.sh rename to video_prediction_savp/HPC_scripts/generate_era5_template.sh index bb36609129d2c45c5c0cbfaaf0675a7e338eb09c..6d0f321cf67f99c12632c8012e8c43f2835b1e3e --- a/video_prediction_savp/HPC_scripts/generate_era5.sh +++ b/video_prediction_savp/HPC_scripts/generate_era5_template.sh @@ -13,6 +13,11 @@ #SBATCH --mail-user=b.gong@fz-juelich.de ##jutil env activate -p cjjsc42 +######### Template identifier (don't remove) ######### +echo "Do not run the template scripts" +exit 99 +######### Template identifier (don't remove) ######### + # Name of virtual environment VIRT_ENV_NAME="vp" diff --git a/video_prediction_savp/HPC_scripts/generate_movingmnist.sh b/video_prediction_savp/HPC_scripts/generate_movingmnist_template.sh similarity index 89% rename from video_prediction_savp/HPC_scripts/generate_movingmnist.sh rename to video_prediction_savp/HPC_scripts/generate_movingmnist_template.sh index 1de81d2543d255a160ff811ff391a963ef712bde..493eb51c7ce2a8b533cd27d062f3c0649c97a2ea 100755 --- a/video_prediction_savp/HPC_scripts/generate_movingmnist.sh +++ b/video_prediction_savp/HPC_scripts/generate_movingmnist_template.sh @@ -10,9 +10,14 @@ #SBATCH --gres=gpu:1 #SBATCH --partition=develgpus #SBATCH --mail-type=ALL -#SBATCH --mail-user=s.stadtler@fz-juelich.de +#SBATCH --mail-user=b.gong@fz-juelich.de ##jutil env activate -p cjjsc42 +######### Template identifier (don't remove) ######### +echo "Do not run the template scripts" +exit 99 +######### Template identifier (don't remove) ######### + # Name of virtual environment VIRT_ENV_NAME="vp" diff --git a/video_prediction_savp/HPC_scripts/train_era5.sh b/video_prediction_savp/HPC_scripts/train_era5_template.sh old mode 100755 new mode 100644 similarity index 90% rename from video_prediction_savp/HPC_scripts/train_era5.sh rename to video_prediction_savp/HPC_scripts/train_era5_template.sh index 5173564faae730cda10ac3acc072fe9ed43cb7b3..f368a8a1a506f4b6127e01b3f16df5f4d648241c --- a/video_prediction_savp/HPC_scripts/train_era5.sh +++ b/video_prediction_savp/HPC_scripts/train_era5_template.sh @@ -13,6 +13,10 @@ #SBATCH --mail-user=b.gong@fz-juelich.de ##jutil env activate -p cjjsc42 +######### Template identifier (don't remove) ######### +echo "Do not run the template scripts" +exit 99 +######### Template identifier (don't remove) ######### # Name of virtual environment VIRT_ENV_NAME="vp" diff --git a/video_prediction_savp/HPC_scripts/train_movingmnist.sh b/video_prediction_savp/HPC_scripts/train_movingmnist_template.sh similarity index 91% rename from video_prediction_savp/HPC_scripts/train_movingmnist.sh rename to video_prediction_savp/HPC_scripts/train_movingmnist_template.sh index 006ff73c30c4a53c80aef9371bfbe29fac39f973..158b9dc7b2614c2997b5df04be8cd5f07fbaa459 100755 --- a/video_prediction_savp/HPC_scripts/train_movingmnist.sh +++ b/video_prediction_savp/HPC_scripts/train_movingmnist_template.sh @@ -13,6 +13,10 @@ #SBATCH --mail-user=b.gong@fz-juelich.de ##jutil env activate -p cjjsc42 +######### Template identifier (don't remove) ######### +echo "Do not run the template scripts" +exit 99 +######### Template identifier (don't remove) ######### # Name of virtual environment VIRT_ENV_NAME="vp" diff --git a/video_prediction_savp/Zam347_scripts/DataExtraction.sh b/video_prediction_savp/Zam347_scripts/DataExtraction_template.sh old mode 100755 new mode 100644 similarity index 56% rename from video_prediction_savp/Zam347_scripts/DataExtraction.sh rename to video_prediction_savp/Zam347_scripts/DataExtraction_template.sh index 6953b7d8484b0eba9d8928b86b1ffbe9d396e8f0..e208fd69322cb82938482c745e311c0eb0d4fe11 --- a/video_prediction_savp/Zam347_scripts/DataExtraction.sh +++ b/video_prediction_savp/Zam347_scripts/DataExtraction_template.sh @@ -1,4 +1,8 @@ #!/bin/bash -x +######### Template identifier (don't remove) ######### +echo "Do not run the template scripts" +exit 99 +######### Template identifier (don't remove) ######### mpirun -np 4 python ../../workflow_parallel_frame_prediction/DataExtraction/mpi_stager_v2.py --source_dir /home/b.gong/data_era5/2017/ --destination_dir /home/${USER}/extractedData/2017 diff --git a/video_prediction_savp/Zam347_scripts/DataPreprocess.sh b/video_prediction_savp/Zam347_scripts/DataPreprocess.sh deleted file mode 100755 index b9941b0e703346f31fd62339882a07ccc20454da..0000000000000000000000000000000000000000 --- a/video_prediction_savp/Zam347_scripts/DataPreprocess.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -x - - -source_dir=/home/$USER/extractedData -destination_dir=/home/$USER/preprocessedData/era5-Y2017M01to02 -script_dir=`pwd` - -declare -a years=("2017") - -for year in "${years[@]}"; - do - echo "Year $year" - echo "source_dir ${source_dir}/${year}" - mpirun -np 2 python ../../workflow_parallel_frame_prediction/DataPreprocess/mpi_stager_v2_process_netCDF.py \ - --source_dir ${source_dir} -scr_dir ${script_dir} \ - --destination_dir ${destination_dir} --years ${years} --vars T2 MSL gph500 --lat_s 74 --lat_e 202 --lon_s 550 --lon_e 710 - done -python ../../workflow_parallel_frame_prediction/DataPreprocess/mpi_split_data_multi_years.py --destination_dir ${destination_dir} --varnames T2 MSL gph500 - - - - diff --git a/video_prediction_savp/Zam347_scripts/DataPreprocess_to_tf.sh b/video_prediction_savp/Zam347_scripts/DataPreprocess2tf_template.sh old mode 100755 new mode 100644 similarity index 72% rename from video_prediction_savp/Zam347_scripts/DataPreprocess_to_tf.sh rename to video_prediction_savp/Zam347_scripts/DataPreprocess2tf_template.sh index 64f25726cbca71717d634fa6f107bac2d5ef323c..fc92afd8fe7e8385162ba95a5e1ab3c6ceab599a --- a/video_prediction_savp/Zam347_scripts/DataPreprocess_to_tf.sh +++ b/video_prediction_savp/Zam347_scripts/DataPreprocess2tf_template.sh @@ -1,5 +1,10 @@ #!/bin/bash -x +######### Template identifier (don't remove) ######### +echo "Do not run the template scripts" +exit 99 +######### Template identifier (don't remove) ######### + # declare directory-variables which will be modified appropriately during Preprocessing (invoked by mpi_split_data_multi_years.py) source_dir=/home/${USER}/preprocessedData/ destination_dir=/home/${USER}/preprocessedData/ diff --git a/video_prediction_savp/Zam347_scripts/DataPreprocess_template.sh b/video_prediction_savp/Zam347_scripts/DataPreprocess_template.sh new file mode 100644 index 0000000000000000000000000000000000000000..172763151557108664c8ba8c9b9028ea2aa5f4f0 --- /dev/null +++ b/video_prediction_savp/Zam347_scripts/DataPreprocess_template.sh @@ -0,0 +1,34 @@ +#!/bin/bash -x + +######### Template identifier (don't remove) ######### +echo "Do not run the template scripts" +exit 99 +######### Template identifier (don't remove) ######### + +# select years for dataset +declare -a years=( + "2017" + ) + +max_year=`echo "${years[*]}" | sort -nr | head -n1` +min_year=`echo "${years[*]}" | sort -nr | tail -n1` +# set some paths +# note, that destination_dir is used during runtime to set a proper experiment directory +exp_id=xxx # experiment identifier is set by 'generate_workflow_runscripts.sh' +source_dir=${SAVE_DIR}/extractedData +destination_dir=${SAVE_DIR}/preprocessedData/era5-Y${min_year}to${max_year}M01to12 +script_dir=`pwd` + +for year in "${years[@]}"; + do + echo "Year $year" + echo "source_dir ${source_dir}/${year}" + mpirun -np 2 python ../../workflow_parallel_frame_prediction/DataPreprocess/mpi_stager_v2_process_netCDF.py \ + --source_dir ${source_dir} -scr_dir ${script_dir} -exp_dir ${exp_id} \ + --destination_dir ${destination_dir} --years ${years} --vars T2 MSL gph500 --lat_s 74 --lat_e 202 --lon_s 550 --lon_e 710 + done +python ../../workflow_parallel_frame_prediction/DataPreprocess/mpi_split_data_multi_years.py --destination_dir ${destination_dir} --varnames T2 MSL gph500 + + + + diff --git a/video_prediction_savp/Zam347_scripts/generate_era5.sh b/video_prediction_savp/Zam347_scripts/generate_era5_template.sh old mode 100755 new mode 100644 similarity index 83% rename from video_prediction_savp/Zam347_scripts/generate_era5.sh rename to video_prediction_savp/Zam347_scripts/generate_era5_template.sh index d9d710e5c4f3cc2d2825bf67bf2b668f6f9ddbd8..72768e241ce5ce612a6775668610ab1456436201 --- a/video_prediction_savp/Zam347_scripts/generate_era5.sh +++ b/video_prediction_savp/Zam347_scripts/generate_era5_template.sh @@ -1,5 +1,10 @@ #!/bin/bash -x +######### Template identifier (don't remove) ######### +echo "Do not run the template scripts" +exit 99 +######### Template identifier (don't remove) ######### + # declare directory-variables which will be modified appropriately during Preprocessing (invoked by mpi_split_data_multi_years.py) source_dir=/home/${USER}/preprocessedData/ checkpoint_dir=/home/${USER}/models/ diff --git a/video_prediction_savp/Zam347_scripts/train_era5.sh b/video_prediction_savp/Zam347_scripts/train_era5_template.sh old mode 100755 new mode 100644 similarity index 81% rename from video_prediction_savp/Zam347_scripts/train_era5.sh rename to video_prediction_savp/Zam347_scripts/train_era5_template.sh index aadb25997e2715ac719457c969a6f54982ec93a6..d06392621182ea6b046bfda77c93b1d2b422aa56 --- a/video_prediction_savp/Zam347_scripts/train_era5.sh +++ b/video_prediction_savp/Zam347_scripts/train_era5_template.sh @@ -1,5 +1,10 @@ #!/bin/bash -x +######### Template identifier (don't remove) ######### +echo "Do not run the template scripts" +exit 99 +######### Template identifier (don't remove) ######### + # declare directory-variables which will be modified appropriately during Preprocessing (invoked by mpi_split_data_multi_years.py) source_dir=/home/${USER}/preprocessedData/ destination_dir=/home/${USER}/models/ diff --git a/video_prediction_savp/env_setup/create_env.sh b/video_prediction_savp/env_setup/create_env.sh old mode 100644 new mode 100755 index 888f543db6891ebfd2a57f06dc6cce3f5f2743dc..9f8a4c5aa007695a6d668040aacf08e158b3a12f --- a/video_prediction_savp/env_setup/create_env.sh +++ b/video_prediction_savp/env_setup/create_env.sh @@ -1,10 +1,15 @@ #!/usr/bin/env bash # # __authors__ = Bing Gong, Michael Langguth -# __date__ = '2020_07_24' - +# __date__ = '2020_09_10' +# +# **************** Description **************** # This script can be used for setting up the virtual environment needed for ambs-project # or to simply activate it. +# In the former case, it also converts the (Batch) script templates to executable runscripts. +# Note, that you may pass an experiment identifier as second argument to this runscript +# which will also be used as suffix in the executable runscripts. +# **************** Description **************** # # some first sanity checks if [[ ${BASH_SOURCE[0]} == ${0} ]]; then @@ -19,6 +24,16 @@ if [[ ! -n "$1" ]]; then return fi +if [[ -n "$2" ]]; then + exp_id=$2 +else + exp_id="" +fi + +# list of (Batch) scripts used for the steps in the workflow +# !!! Expects that a template named [script_name]_template.sh exists!!! +workflow_scripts=(DataExtraction DataPreprocess DataPreprocess2tf train_era5 generate_era5 DatePreprocess2tf_movingmnist train_movingmnist generate_movingmnist) + HOST_NAME=`hostname` ENV_NAME=$1 ENV_SETUP_DIR=`pwd` @@ -46,9 +61,6 @@ fi # add personal email-address to Batch-scripts if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == juwels* ]]; then - USER_EMAIL=$(jutil user show -o json | grep email | cut -f2 -d':' | cut -f1 -d',' | cut -f2 -d'"') - #replace the email in sbatch script with the USER_EMAIL - sed -i "s/--mail-user=.*/--mail-user=$USER_EMAIL/g" ../HPC_scripts/*.sh # load modules and check for their availability echo "***** Checking modules required during the workflow... *****" source ${ENV_SETUP_DIR}/modules_preprocess.sh @@ -86,6 +98,7 @@ if [[ "$ENV_EXIST" == 0 ]]; then pip3 install h5py pip3 install tensorflow-gpu==1.13.1 fi + # expand PYTHONPATH... export PYTHONPATH=${WORKING_DIR}:$PYTHONPATH >> ${activate_virt_env} export PYTHONPATH=${WORKING_DIR}/utils:$PYTHONPATH >> ${activate_virt_env} @@ -105,7 +118,25 @@ if [[ "$ENV_EXIST" == 0 ]]; then if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == juwels* ]]; then echo "export PYTHONPATH=${ENV_DIR}/lib/python3.6/site-packages:\$PYTHONPATH" >> ${activate_virt_env} - fi + fi + # After checking and setting up the virt env, create user-specific runscripts for all steps of the workflow + if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == juwels* ]]; then + echo "***** Creating Batch-scripts for running workflow... *****" + script_dir=../HPC_scripts + elif [[ "${HOST_NAME}" == "zam347" ]]; then + echo "***** Creating Batch-scripts for running workflow... *****" + script_dir=../Zam347_scripts + fi + + for wf_script in "${workflow_scripts[@]}"; do + curr_script=${script_dir}/${wf_script} + if [[ -z "${exp_id}" ]]; then + ./generate_workflow_runscripts.sh ${curr_script} + else + ./generate_workflow_runscripts.sh ${curr_script} ${exp_id} + fi + done + # *** finished *** elif [[ "$ENV_EXIST" == 1 ]]; then # activating virtual env is suifficient source ${ENV_DIR}/bin/activate diff --git a/video_prediction_savp/env_setup/generate_workflow_runscripts.sh b/video_prediction_savp/env_setup/generate_workflow_runscripts.sh new file mode 100755 index 0000000000000000000000000000000000000000..d778192b40151ff224e60b39ab4af2b65b88e347 --- /dev/null +++ b/video_prediction_savp/env_setup/generate_workflow_runscripts.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# **************** Description **************** +# Converts given template workflow script (path has to be passed as first argument) to +# an executable workflow (Batch) script. +# Note, that this first argument has to be passed with "_template.sh" omitted! +# A second argument can be passed to set an experiment identifier whose default is exp1. +# Note, that the second argument can be omitted only if there are no existing (Batch) scritps +# carrying this identifier which is added as a suffix. +# Example: +# ./generate_workflow_scripts.sh ../HPC_scripts/generate exp5 +# ... will convert generate_template.sh to generate_exp5.sh +# **************** Description **************** +# + +HOST_NAME=`hostname` + +### some sanity checks ### +# check input arguments +if [[ "$#" -lt 1 ]]; then + echo "ERROR: Pass path to workflow runscript (without '_template.sh') to be generated..." + exit 1 +else + curr_script=$1 + if [[ "$#" -gt 1 ]]; then + exp_id=$2 + else + exp_id="exp1" + fi +fi + +# check existence of template script +if ! [[ -f ${curr_script}_template.sh ]]; then + echo "WARNING: Could not find expected Batch script '${curr_script}_template.sh'." + echo "Thus, no corresponding executable script is created!" + if [[ ${curr_script} == *"template"* || ${curr_script} == *".sh"* ]]; then + echo "ERROR: Omit '_template' and/or '.sh' from Bash script argument." + exit 1 + else + exit 0 # still ok, i.e. only a WARNING is raised + fi +fi +# check if target script is unique +target_script=${curr_script}_${exp_id}.sh +if [[ -f ${target_script} ]]; then + echo "ERROR: ${target_script} already exist." + echo "Set explicitly a different experiment identifier." + exit 1 +fi +### do the work ### +# create copy of template which is modified subsequently +cp ${curr_script}_template.sh ${target_script} +# remove template identifiers +num_lines=`awk '/Template identifier/{ print NR }' ${target_script}` +line_s=`echo ${num_lines} | cut -d' ' -f 1` +line_e=`echo ${num_lines} | cut -d' ' -f 2` +if [[ ${line_s} == "" || ${line_e} == "" ]]; then + echo "ERROR: ${curr_script}_template.sh exists, but does not seem to be a valid template script." + rm ${target_script} # remove copy again + exit 1 +else + sed -i "${line_s},${line_e}d" ${target_script} +fi +# set exp_id in (Batch) script if present +if [[ `grep "exp_id=" ${target_script}` ]]; then + sed -i "s/exp_id=.*/exp_id=$exp_id/g" ${target_script} +fi + +# set correct e-mail address in Batch scripts on Juwels and HDF-ML +if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == juwels* ]]; then + if ! [[ -z `command -v jutil` ]]; then + USER_EMAIL=$(jutil user show -o json | grep email | cut -f2 -d':' | cut -f1 -d',' | cut -f2 -d'"') + else + USER_EMAIL="" + fi + sed -i "s/--mail-user=.*/--mail-user=$USER_EMAIL/g" ${target_script} +fi + + diff --git a/video_prediction_savp/utils/metadata.py b/video_prediction_savp/utils/metadata.py index fada7c47b1cf3d79a4d88fe6e5584a7da5bc7ec3..c4ef4f36ef68dd70ae129706211a4468f60a9404 100644 --- a/video_prediction_savp/utils/metadata.py +++ b/video_prediction_savp/utils/metadata.py @@ -15,11 +15,17 @@ class MetaData: Class for handling, storing and retrieving meta-data """ - def __init__(self,json_file=None,suffix_indir=None,data_filename=None,slices=None,variables=None): - + def __init__(self,json_file=None,suffix_indir=None,exp_id=None,data_filename=None,slices=None,variables=None): + """ Initailizes MetaData instance by reading a corresponding json-file or by handling arguments of the Preprocessing step - (i.e. exemplary input file, slices defining region of interest, input variables) + (i.e. exemplary input file, slices defining region of interest, input variables) + :param json_file: already existing json-file with metadata, if present the following arguments are not needed + :param suffix_indir: suffix of directory where processed data is stored for running the models + :param exp_id: experiment identifier + :param data_filename: name of netCDF-file serving as base for metadata retrieval + :param slices: indices defining the region of interest + :param variables: predictor variables """ method_name = MetaData.__init__.__name__+" of Class "+MetaData.__name__ @@ -36,6 +42,12 @@ class MetaData: else: if not isinstance(suffix_indir,str): raise TypeError(method_name+": 'suffix_indir'-argument must be a string.") + + if not exp_id: + raise TypeError(method_name+": 'exp_id'-argument is required if 'json_file' is not passed.") + else: + if not isinstance(exp_id,str): + raise TypeError(method_name+": 'exp_id'-argument must be a string.") if not data_filename: raise TypeError(method_name+": 'data_filename'-argument is required if 'json_file' is not passed.") @@ -55,12 +67,12 @@ class MetaData: if not isinstance(variables,list): raise TypeError(method_name+": 'variables'-argument must be a list.") - MetaData.get_and_set_metadata_from_file(self,suffix_indir,data_filename,slices,variables) + MetaData.get_and_set_metadata_from_file(self,suffix_indir,exp_id,data_filename,slices,variables) MetaData.write_metadata_to_file(self) - def get_and_set_metadata_from_file(self,suffix_indir,datafile_name,slices,variables): + def get_and_set_metadata_from_file(self,suffix_indir,exp_id,datafile_name,slices,variables): """ Retrieves several meta data from netCDF-file and sets corresponding class instance attributes. Besides, the name of the experiment directory is constructed following the naming convention (see below) @@ -90,7 +102,6 @@ class MetaData: MetaData.check_datafile(datafile,variables) self.varnames = variables - self.nx, self.ny = np.abs(slices['lon_e'] - slices['lon_s']), np.abs(slices['lat_e'] - slices['lat_s']) sw_c = [float(datafile.variables['lat'][slices['lat_e']-1]),float(datafile.variables['lon'][slices['lon_s']])] # meridional axis lat is oriented from north to south (i.e. monotonically decreasing) self.sw_c = sw_c @@ -129,6 +140,7 @@ class MetaData: self.expname = expname self.expdir = expdir + self.exp_id = exp_id self.status = "" # uninitialized (is set when metadata is written/compared to/with json-file, see write_metadata_to_file-method) # ML 2020/04/24 E @@ -142,7 +154,8 @@ class MetaData: method_name = MetaData.write_metadata_to_file.__name__+" of Class "+MetaData.__name__ # actual work: meta_dict = {"expname": self.expname, - "expdir" : self.expdir} + "expdir" : self.expdir, + "exp_id" : self.exp_id} meta_dict["sw_corner_frame"] = { "lat" : np.around(self.sw_c[0],decimals=2), @@ -206,6 +219,8 @@ class MetaData: dict_in = json.load(js_file) self.expdir = dict_in["expdir"] + if "exp_id" in dict_in: + self.exp_id = dict_in["exp_id"] self.sw_c = [dict_in["sw_corner_frame"]["lat"],dict_in["sw_corner_frame"]["lon"] ] self.lat = dict_in["coordinates"]["lat"] @@ -226,7 +241,13 @@ class MetaData: """ paths_to_mod = ["source_dir=","destination_dir=","checkpoint_dir=","results_dir="] # known directory-variables in batch-scripts - + + # For backward compability: + # Check if exp_id (if present) needs to be added to batch_script in order to access the file + if hasattr(self,"exp_id"): + sep_idx = batch_script.index(".sh") + batch_script = batch_script[:sep_idx] + "_" + self.exp_id + batch_script[sep_idx:] + with open(batch_script,'r') as file: data = file.readlines() diff --git a/workflow_parallel_frame_prediction/DataPreprocess/mpi_stager_v2_process_netCDF.py b/workflow_parallel_frame_prediction/DataPreprocess/mpi_stager_v2_process_netCDF.py index 71c661c49ba3502b12dbd409fb76a5c9b4517087..377b8b8f4f9761e4e81b60381f10999485fcd663 100755 --- a/workflow_parallel_frame_prediction/DataPreprocess/mpi_stager_v2_process_netCDF.py +++ b/workflow_parallel_frame_prediction/DataPreprocess/mpi_stager_v2_process_netCDF.py @@ -28,6 +28,8 @@ def main(): parser.add_argument("--lat_e", type=int, default=202-32) parser.add_argument("--lon_s", type=int, default=550+16+32) parser.add_argument("--lon_e", type=int, default=710-16-32) + parser.add_argument("--experimental_id","-exp_id",dest="exp_id",type=str, default="exp1",\ + help="Experimental identifier helping to distinguish between different experiments.") args = parser.parse_args() current_path = os.getcwd() @@ -52,6 +54,8 @@ def main(): print("Selected variables",vars) print("Selected Slices",slices) + exp_id = args.exp_id + os.chdir(current_path) time.sleep(0) @@ -111,12 +115,12 @@ def main(): data_files_list = glob.glob(source_dir+"/**/*.nc",recursive=True) if not data_files_list: raise ValueError("Could not find any data to be processed in '"+source_dir+"'") - md = MetaData(suffix_indir=destination_dir,data_filename=data_files_list[0],slices=slices,variables=vars) + md = MetaData(suffix_indir=destination_dir,exp_id=exp_id,data_filename=data_files_list[0],slices=slices,variables=vars) # modify Batch scripts if metadata has been retrieved for the first time (md.status = "new") if (md.status == "new"): - md.write_dirs_to_batch_scripts(scr_dir+"/DataPreprocess_to_tf.sh") + md.write_dirs_to_batch_scripts(scr_dir+"/DataPreprocess2tf.sh") + md.write_dirs_to_batch_scripts(scr_dir + "/train_era5.sh") md.write_dirs_to_batch_scripts(scr_dir+"/generate_era5.sh") - md.write_dirs_to_batch_scripts(scr_dir+"/train_era5.sh") elif (md.status == "old"): # meta-data file already exists and is ok # check for temp.json in working directory (required by slave nodes)