diff --git a/.gitignore b/.gitignore index 4b7022228f10be8b83d42ef87eb66b01a4e0687f..6dc2f3392135d3b43923f87460f274b5aea104fe 100644 --- a/.gitignore +++ b/.gitignore @@ -85,7 +85,7 @@ celerybeat-schedule # virtualenv .venv -venv/ +venv*/ ENV/ virtual_env*/ virt_env*/ @@ -122,8 +122,8 @@ virt_env*/ **/era5_size_64_64_3_3t_norm # Ignore (Batch) runscripts -HPC_scripts/*.sh -!HPC_scripts/*_template.sh -Zam347_scripts/*.sh -!Zam347_scripts/*_template.sh +video_prediction_tools/HPC_scripts/** +!video_prediction_tools/HPC_scripts/*_template.sh +video_prediction_tools/Zam347_scripts/** +!video_prediction_tools/Zam347_scripts/*_template.sh diff --git a/README.md b/README.md index 6a3e8044fd1afc46f777aaca8999e47dd882a995..1b82518c87cbdb9b1a8c6aba57c9c27140d862d8 100644 --- a/README.md +++ b/README.md @@ -17,86 +17,76 @@ Atmopsheric Machine learning Benchmarking Systems (AMBS) aims to privde state-of git clone https://gitlab.version.fz-juelich.de/toar/ambs.git ``` -### Set-up env on JUWELS and ZAM347 +### Set-up env on Jülich's HPC systems and zam347 + +The following commands will setup a user-specific virtual environment +either on Juwels, HDF-ML (HPC clusters) or on zam347 for you. +The script `create_env.sh` automatically detects on which machine it is executed and loads/installs +all required Python (binary) modules and packages. +The virtual environment is set up under the subfolder `video_prediction_savp/<env_name>`. +Besides, user-specific runscripts for each step of the workflow may be created, +e.g. `train_era5_exp1.sh` where `exp1` denotes the default experiment identifier. +The name of this identifier can be controlled by the optional second argument `<exp_id>`. -- Setup env and install packages ```bash cd video_prediction_savp/env_setup -source create_env_zam347.sh <env_name> +source create_env.sh <env_name> [<exp_id>] ``` -### Run workflow on JUWELS +### Run workflow the workflow -- Go to HPC_scripts directory -```bash -cd video_prediction_savp/HPC_scripts +Depending on the machine you are workin on, change either to +`video_prediction_savp/HPC_scripts` (on Juwels and HDF-ML) or to +`video_prediction_savp/Zam347_scripts`. +There, the respective runscripts for all steps of the workflow are located +whose order is the following: -``` -- Data Extraction -```bash -sbatch DataExtraction.sh -``` -- Data Preprocessing +1. Data Extraction: Retrieve ERA5 reanalysis data for one year. For multiple year, execute the runscript sequentially. ```bash -sbatch /DataPreprocess.sh -sbatch /DataPreprocess_to_tf.sh +./DataExtraction_<exp_id>.sh ``` -- Setup hyperparams - -This step will setup the hyper-parameters that used for training, and create a folder named "datetime_user" where save the trained model +2. Data Preprocessing: Crop all data (multiple years possible) to the region of interest and perform normalization ```bash -source hyperparam_setup.sh +./DataPreprocess_<exp_id>.sh +./DataPreprocess2tf_<exp_id>.sh ``` - -- Training +3. Training: Training of one of the available models (see bewlow) with the preprocessed data. ```bash -sbatch train_era5.sh +./train_era5_<exp_id>.sh ``` -- Postprocess -```bash -sbatch generate_era5.sh -``` - -- Reset all the generated path to origin state - +4. Postprocess: Create some plots and calculate evaluation metrics for test dataset. ```bash -source reset_dirs.sh +./generate_era5_<exp_id>.sh ``` +### Create additional runscripts ### +In case that you want to perform experiments with varying configuration (e.g. another set of hyperparameters, but still the same input dataset for training), it is convenient to create individual runscripts from the templates. +This can be done with the help of `generate_workflow_runscripts.sh`. +The first argument `<runscript_name>` defines the (relative) path to the template runscript +which should be converted to an executable one. Note that only the suffix of the +template's name must be passed, e.g. `../HPC_scripts/train_era5` in order to create +a runscript for the training substep. +The second argument `<venv_name>` denotes the name of the virtual environment which has to be set up in advanceand which should be used by the runscript. -### Run workflow on ZAM347 +Additional optional arguments can be passed to control the experimental identifier and to set manually the realtive path to the +directory where the preprocessed data is stored (used for the training and postprocessing substep). These optional arguments have to follow a naming convention in order to be identified by `generate_workflow_runscripts.sh`. +The experimental identifer can be passed by adding `-exp_id=<id>`while the path to the preprocessed data requires passing of `-exp_dir=<relative_path_to_dir>`. Note, that the default value `exp1` is used as experimental identifier if the `-exp_id=<id>` is omitted. -- Go to zam347_scripts directory -```bash -cd video_prediction_savp/Zam347_scripts -``` -- Data Extraction -```bash -./DataExtraction.sh -``` - -- Data Preprocessing -```bash -./DataPreprocess.sh -./DataPreprocess_to_tf.sh +``` bash +./generate_workflow_runscripts.sh <runscript_name> <venv_name> [-exp_id=<id>] [-exp_dir=<relative_dir_to_path>] ``` -- Training -```bash -./train_era5.sh -``` - -- Postprocess -```bash -./generate_era5.sh +*Specific example:* +``` bash +./generate_workflow_runscripts.sh train_era5 venv_juwels -exp_id=exp_test -exp_dir=era5-Y2010toY2222M01to12-160x128-2970N1500W-T2_MSL_gph500 ``` -### Recomendation for output folder structure and name convention +### Output folder structure and naming convention The details can be found [name_convention](docs/structure_name_convention.md) ``` @@ -106,7 +96,7 @@ The details can be found [name_convention](docs/structure_name_convention.md) │ │ │ ├── **/*.netCDF ├── PreprocessedData │ ├── [Data_name_convention] -│ │ ├── hickle +│ │ ├── pickle │ │ │ ├── train │ │ │ ├── val │ │ │ ├── test diff --git a/video_prediction_tools/HPC_scripts/train_model_era5_template.sh b/video_prediction_tools/HPC_scripts/train_model_era5_template.sh index 08e1f40914b3a68b9e602c79dea9c849cbc23a25..19af1297cae1869d4d88cf7748fee71c4c4216d4 100644 --- a/video_prediction_tools/HPC_scripts/train_model_era5_template.sh +++ b/video_prediction_tools/HPC_scripts/train_model_era5_template.sh @@ -34,20 +34,16 @@ if [ -z ${VIRTUAL_ENV} ]; then fi fi - - - # declare directory-variables which will be modified appropriately during Preprocessing (invoked by mpi_split_data_multi_years.py) source_dir=/p/project/deepacf/deeprain/video_prediction_shared_folder/preprocessedData/ destination_dir=/p/project/deepacf/deeprain/video_prediction_shared_folder/models/ - # valid identifiers for model-argument are: convLSTM, savp, mcnet and vae model=convLSTM model_hparams=../hparams/era5/${model}/model_hparams.json -destination_dir=${destination_dir}/${model}/"$(date +"%Y%m%dT%H%M")_"$USER"/" +destination_dir_full=${destination_dir}/${model}/"$(date +"%Y%m%dT%H%M")_"$USER"/" # run training -srun python ../main_scripts/main_train_models.py --input_dir ${source_dir}/tfrecords/ --dataset era5 --model ${model} --model_hparams_dict ${model_hparams} --output_dir ${destination_dir}/ +srun python ../main_scripts/main_train_models.py --input_dir ${source_dir}/tfrecords/ --dataset era5 --model ${model} --model_hparams_dict ${model_hparams} --output_dir ${destination_dir_full}/ diff --git a/video_prediction_tools/Zam347_scripts/train_model_era5_template.sh b/video_prediction_tools/Zam347_scripts/train_model_era5_template.sh index d06392621182ea6b046bfda77c93b1d2b422aa56..c8fcf81f717433a10a8dd1e8652a743e5e8b5332 100644 --- a/video_prediction_tools/Zam347_scripts/train_model_era5_template.sh +++ b/video_prediction_tools/Zam347_scripts/train_model_era5_template.sh @@ -9,10 +9,11 @@ exit 99 source_dir=/home/${USER}/preprocessedData/ destination_dir=/home/${USER}/models/ -# for choosing the model +# valid identifiers for model-argument are: convLSTM, savp, mcnet and vae model=mcnet model_hparams=../hparams/era5/model_hparams.json +destination_dir=${destination_dir}/${model}/"$(date +"%Y%m%dT%H%M")_"$USER"/" + +# run training +python ../scripts/train_dummy.py --input_dir ${source_dir}/tfrecords/ --dataset era5 --model ${model} --model_hparams_dict ${model_hparams} --output_dir ${destination_dir} -# execute respective Python-script -python ../scripts/train_dummy.py --input_dir ${source_dir}/tfrecords/ --dataset era5 --model ${model} --model_hparams_dict ${model_hparams} --output_dir ${destination_dir}/${model}/ -#srun python scripts/train.py --input_dir data/era5 --dataset era5 --model savp --model_hparams_dict hparams/kth/ours_savp/model_hparams.json --output_dir logs/era5/ours_savp diff --git a/video_prediction_tools/env_setup/create_env.sh b/video_prediction_tools/env_setup/create_env.sh index d014f5d37bd9f8f8eb900ff1529d98445b69f53d..319b8cfc5e471cce4708b8792d878e1a0606d46a 100755 --- a/video_prediction_tools/env_setup/create_env.sh +++ b/video_prediction_tools/env_setup/create_env.sh @@ -6,8 +6,8 @@ # **************** Description **************** # This script can be used for setting up the virtual environment needed for ambs-project # or to simply activate it. -# In the former case, it also converts the (Batch) script templates to executable runscripts. -# Note, that you may pass an experiment identifier as second argument to this runscript +# It also converts the (Batch) runscript templates to executable runscripts. +# Note, that you may pass an experiment identifier as second argument (default 'exp1') to this runscript # which will also be used as suffix in the executable runscripts. # **************** Description **************** # @@ -46,7 +46,7 @@ ENV_DIR=${WORKING_DIR}/${ENV_NAME} # * check if virtual env has already been set up if [[ "${EXE_DIR}" != "env_setup" ]]; then - echo "ERROR: The setup-script for the virtual environment from the env_setup-directory!" + echo "ERROR: Execute 'create_env.sh' from the env_setup-subdirectory only!" return fi @@ -119,26 +119,26 @@ if [[ "$ENV_EXIST" == 0 ]]; then if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == juwels* ]]; then echo "export PYTHONPATH=${ENV_DIR}/lib/python3.6/site-packages:\$PYTHONPATH" >> ${activate_virt_env} fi - # After checking and setting up the virt env, create user-specific runscripts for all steps of the workflow - if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == juwels* ]]; then - echo "***** Creating Batch-scripts for running workflow... *****" - script_dir=../HPC_scripts - elif [[ "${HOST_NAME}" == "zam347" ]]; then - echo "***** Creating Batch-scripts for running workflow... *****" - script_dir=../Zam347_scripts - fi - - for wf_script in "${workflow_scripts[@]}"; do - curr_script=${script_dir}/${wf_script} - if [[ -z "${exp_id}" ]]; then - ./generate_workflow_runscripts.sh ${curr_script} - else - ./generate_workflow_runscripts.sh ${curr_script} ${exp_id} - fi - done - # *** finished *** elif [[ "$ENV_EXIST" == 1 ]]; then # activating virtual env is suifficient source ${ENV_DIR}/bin/activate fi +# Finish by creating runscripts + # After checking and setting up the virt env, create user-specific runscripts for all steps of the workflow +if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == juwels* ]]; then + echo "***** Creating Batch-scripts for running workflow... *****" + script_dir=../HPC_scripts +elif [[ "${HOST_NAME}" == "zam347" ]]; then + echo "***** Creating Batch-scripts for running workflow... *****" + script_dir=../Zam347_scripts +fi + +for wf_script in "${workflow_scripts[@]}"; do + curr_script=${script_dir}/${wf_script} + if [[ -z "${exp_id}" ]]; then + ./generate_workflow_runscripts.sh ${curr_script} ${ENV_NAME} + else + ./generate_workflow_runscripts.sh ${curr_script} ${ENV_NAME} -exp_id=${exp_id} + fi +done diff --git a/video_prediction_tools/env_setup/generate_workflow_runscripts.sh b/video_prediction_tools/env_setup/generate_workflow_runscripts.sh index d778192b40151ff224e60b39ab4af2b65b88e347..34aebf26fe5021575a93839866f80f9f69a54d8d 100755 --- a/video_prediction_tools/env_setup/generate_workflow_runscripts.sh +++ b/video_prediction_tools/env_setup/generate_workflow_runscripts.sh @@ -1,29 +1,80 @@ #!/usr/bin/env bash +# +# __authors__ = Michael Langguth +# __date__ = '2020_09_29' +# # **************** Description **************** -# Converts given template workflow script (path has to be passed as first argument) to +# Converts a given template workflow script (path/name has to be passed as first argument) to # an executable workflow (Batch) script. -# Note, that this first argument has to be passed with "_template.sh" omitted! -# A second argument can be passed to set an experiment identifier whose default is exp1. -# Note, that the second argument can be omitted only if there are no existing (Batch) scritps -# carrying this identifier which is added as a suffix. -# Example: -# ./generate_workflow_scripts.sh ../HPC_scripts/generate exp5 -# ... will convert generate_template.sh to generate_exp5.sh +# Note, that the first argument has to be passed with "_template.sh" omitted! +# The second argument denotes the name of the virtual environment to be used. +# Additionally, -exp_id=[some_id] and -exp_dir=[some_dir] can be optionally passed as NON-POSITIONAL arguments. +# -exp_id allows to set an experimental identifier explicitly (default is -exp_id=exp1) while +# -exp_dir allows setting manually the experimental directory. +# Note, that the latter is done during the preprocessing step in an end-to-end workflow. +# However, if the preprocessing step can be skipped (i.e. preprocessed data already exists), +# one may wish to set the experimental directory explicitly +# +# Examples: +# ./generate_workflow_scripts.sh ../HPC_scripts/generate_era5 venv_hdfml -exp_id=exp5 +# ... will convert generate_era5_template.sh to generate_era5_exp5.sh where +# venv_hdfml is the virtual environment for operation. +# +# ./generate_workflow_scripts.sh ../HPC_scripts/generate_era5 venv_hdfml -exp_id=exp5 -exp_dir=testdata +# ... does the same as the previous example, but additionally extends source_dir=[...]/preprocessedData/, +# checkpoint_dir=[...]/models/ and results_dir=[...]/results/ by testdata/ # **************** Description **************** # +# **************** Auxilary functions **************** +check_argin() { +# Handle input arguments and check if one of them holds -exp_id= +# or -exp_dir= to emulate them as non-positional arguments + for argin in "$@"; do + if [[ $argin == *"-exp_id="* ]]; then + exp_id=${argin#"-exp_id="} + elif [[ $argin == *"-exp_dir="* ]]; then + exp_dir=${argin#"-exp_dir="} + fi + done +} + +add_exp_dir() { +# Add exp_dir to paths in <target_script> which end with /<prefix>/ + prefix=$1 + if [[ `grep "/${prefix}/$" ${target_script}` ]]; then + echo "Add experimental directory after '${prefix}/' in runscript '${target_script}'" + sed -i "s|/${prefix}/$|/${prefix}/${exp_dir}/|g" ${target_script} + status=1 + fi +} +# **************** Auxilary functions **************** HOST_NAME=`hostname` +BASE_DIR=`pwd` +WORKING_DIR="$(dirname "$BASE_DIR")" +EXE_DIR="$(basename "$BASE_DIR")" -### some sanity checks ### +### Some sanity checks ### +# ensure that the script is executed from the env_setup-subdirectory +if [[ "${EXE_DIR}" != "env_setup" ]]; then + echo "ERROR: Execute 'generate_workflow_scripts.sh' from the env_setup-subdirectory only!" + exit 1 +fi # check input arguments -if [[ "$#" -lt 1 ]]; then - echo "ERROR: Pass path to workflow runscript (without '_template.sh') to be generated..." +if [[ "$#" -lt 2 ]]; then + echo "ERROR: Pass path to workflow runscript (without '_template.sh') and pass name of virtual environment..." + echo "Example: ./generate_workflow_scripts.sh ../HPC_scripts/DataExtraction venv_hdfml" exit 1 else curr_script=$1 - if [[ "$#" -gt 1 ]]; then - exp_id=$2 - else + curr_script_loc="$(basename "$curr_script")" + curr_venv=$2 + # check if any known non-positional argument is present... + if [[ "$#" -gt 2 ]]; then + check_argin ${@:3} + fi + #...and ensure that exp_id is always set + if [[ -z "${exp_id}" ]]; then exp_id="exp1" fi fi @@ -34,19 +85,30 @@ if ! [[ -f ${curr_script}_template.sh ]]; then echo "Thus, no corresponding executable script is created!" if [[ ${curr_script} == *"template"* || ${curr_script} == *".sh"* ]]; then echo "ERROR: Omit '_template' and/or '.sh' from Bash script argument." - exit 1 + exit 2 else exit 0 # still ok, i.e. only a WARNING is raised fi fi -# check if target script is unique + +# Check existence of virtual environment (2nd argument) +if [[ ! -d ${WORKING_DIR}/${curr_venv} ]]; then + echo "ERROR: Could not find directory of virtual environment under ${WORKING_DIR}/${curr_venv} " + exit 3 +fi + +# Check if target script is unique target_script=${curr_script}_${exp_id}.sh if [[ -f ${target_script} ]]; then echo "ERROR: ${target_script} already exist." echo "Set explicitly a different experiment identifier." - exit 1 + exit 4 +else + echo "Convert ${curr_script}_template.sh to executable runscript" + echo "The executable runscript is saved under ${target_script}" fi -### do the work ### + +### Do the work ### # create copy of template which is modified subsequently cp ${curr_script}_template.sh ${target_script} # remove template identifiers @@ -56,10 +118,24 @@ line_e=`echo ${num_lines} | cut -d' ' -f 2` if [[ ${line_s} == "" || ${line_e} == "" ]]; then echo "ERROR: ${curr_script}_template.sh exists, but does not seem to be a valid template script." rm ${target_script} # remove copy again - exit 1 + exit 5 else sed -i "${line_s},${line_e}d" ${target_script} fi + +# set virtual environment to be used in Batch scripts +if [[ `grep "VIRT_ENV_NAME=" ${target_script}` ]]; then + sed -i 's/VIRT_ENV_NAME=.*/VIRT_ENV_NAME="'${curr_venv}'"/g' ${target_script} +fi + +# also adapt name output- and error-files of submitted job with exp_id (if we are on Juwels or HDF-ML) +if [[ `grep "#SBATCH --output=" ${target_script}` ]]; then + sed -i "s|#SBATCH --output=.*|#SBATCH --output=${curr_script_loc}_${exp_id}-out\.%j|g" ${target_script} +fi +if [[ `grep "#SBATCH --error=" ${target_script}` ]]; then + sed -i "s|#SBATCH --error=.*|#SBATCH --error=${curr_script_loc}_${exp_id}-err\.%j|g" ${target_script} +fi + # set exp_id in (Batch) script if present if [[ `grep "exp_id=" ${target_script}` ]]; then sed -i "s/exp_id=.*/exp_id=$exp_id/g" ${target_script} @@ -75,4 +151,18 @@ if [[ "${HOST_NAME}" == hdfml* || "${HOST_NAME}" == juwels* ]]; then sed -i "s/--mail-user=.*/--mail-user=$USER_EMAIL/g" ${target_script} fi +# finally set experimental directory if exp_dir is present +if [[ ! -z "${exp_dir}" ]]; then + status=0 # status to check if exp_dir is added to the runscript at hand + # -> will be set to one by add_exp_dir if modifictaion takes place + add_exp_dir preprocessedData + add_exp_dir models + add_exp_dir results + + if [[ ${status} == 0 ]]; then + echo "WARNING: -exp_dir has been passed, but no addition to any path in runscript at hand done..." + fi +fi + + diff --git a/video_prediction_tools/env_setup/modules_preprocess.sh b/video_prediction_tools/env_setup/modules_preprocess.sh index a9de812dbde625a18198fe078ecb86c09286ed6d..35ee316db54023e9cc015dbbd2e1051c7ea7aae9 100755 --- a/video_prediction_tools/env_setup/modules_preprocess.sh +++ b/video_prediction_tools/env_setup/modules_preprocess.sh @@ -10,8 +10,8 @@ HOST_NAME=`hostname` echo "Start loading modules on ${HOST_NAME} required for preprocessing..." echo "modules_preprocess.sh is subject to: " -echo "* DataExtraction.sh" -echo "* DataPreprocess.sh" +echo "* data_extraction_era5_<exp_id>.sh" +echo "* preprocess_data_era5_step1_<exp_id>.sh" module purge module use $OTHERSTAGES diff --git a/video_prediction_tools/env_setup/modules_train.sh b/video_prediction_tools/env_setup/modules_train.sh index d45144340d334430b3d95580ceb2e74c8105e18a..685fae1de7f18a1575fd1efcaf7e344b2e3cce7c 100755 --- a/video_prediction_tools/env_setup/modules_train.sh +++ b/video_prediction_tools/env_setup/modules_train.sh @@ -10,9 +10,9 @@ HOST_NAME=`hostname` echo "Start loading modules on ${HOST_NAME}..." echo "modules_train.sh is subject to: " -echo "* DataPreprocess_to_tf.sh" -echo "* train_era5.sh" -echo "* generate_era5.sh" +echo "* preprocess_data_era5_step2_<exp_id>.sh" +echo "* train_model_era5_<exp_id>.sh" +echo "* visualize_postprocess_era5_<exp_id>.sh" module purge module use $OTHERSTAGES diff --git a/video_prediction_tools/main_scripts/main_visualize_postprocess.py b/video_prediction_tools/main_scripts/main_visualize_postprocess.py index 7948d650cc270c9ee33dcd32c2e03c70f9216225..5460d4f8768d0d93ffac3a42ff3345334ebb2026 100644 --- a/video_prediction_tools/main_scripts/main_visualize_postprocess.py +++ b/video_prediction_tools/main_scripts/main_visualize_postprocess.py @@ -329,8 +329,102 @@ def plot_seq_imgs(imgs,lats,lons,ts,output_png_dir,label="Ground Truth"): print("image {} saved".format(output_fname)) -def get_persistence(ts): - pass +def get_persistence(ts, input_dir_pkl): + """This function gets the persistence forecast. + 'Today's weather will be like yesterday's weather. + + Inputs: + ts: output by generate_seq_timestamps(t_start,len_seq=sequence_length) + Is a list containing dateime objects + + input_dir_pkl: input directory to pickle files + + Ouputs: + time_persistence: list containing the dates and times of the + persistence forecast. + var_peristence : sequence of images corresponding to the times + in ts_persistence + """ + ts_persistence = [] + for t in range(len(ts)): # Scarlet: this certainly can be made nicer with list comprehension + ts_temp = ts[t] - datetime.timedelta(days=1) + ts_persistence.append(ts_temp) + t_persistence_start = ts_persistence[0] + t_persistence_end = ts_persistence[-1] + year_start = t_persistence_start.year + month_start = t_persistence_start.month + month_end = t_persistence_end.month + + # only one pickle file is needed (all hours during the same month) + if month_start == month_end: + # Open files to search for the indizes of the corresponding time + time_pickle = load_pickle_for_persistence(input_dir_pkl, year_start, month_start, 'T') + # Open file to search for the correspoding meteorological fields + var_pickle = load_pickle_for_persistence(input_dir_pkl, year_start, month_start, 'X') + # Retrieve starting index + ind = list(time_pickle).index(np.array(ts_persistence[0])) + #print('Scarlet, Original', ts_persistence) + #print('From Pickle', time_pickle[ind:ind+len(ts_persistence)]) + + var_persistence = var_pickle[ind:ind+len(ts_persistence)] + time_persistence = time_pickle[ind:ind+len(ts_persistence)].ravel() + print(' Scarlet Shape of time persistence',time_persistence.shape) + #print(' Scarlet Shape of var persistence',var_persistence.shape) + + + # case that we need to derive the data from two pickle files (changing month during the forecast periode) + else: + t_persistence_first_m = [] # should hold dates of the first month + t_persistence_second_m = [] # should hold dates of the second month + + for t in range(len(ts)): + m = ts_persistence[t].month + if m == month_start: + t_persistence_first_m.append(ts_persistence[t]) + if m == month_end: + t_persistence_second_m.append(ts_persistence[t]) + + # Open files to search for the indizes of the corresponding time + time_pickle_first = load_pickle_for_persistence(input_dir_pkl, year_start, month_start, 'T') + time_pickle_second = load_pickle_for_persistence(input_dir_pkl, year_start, month_end, 'T') + + # Open file to search for the correspoding meteorological fields + var_pickle_first = load_pickle_for_persistence(input_dir_pkl, year_start, month_start, 'X') + var_pickle_second = load_pickle_for_persistence(input_dir_pkl, year_start, month_end, 'X') + + # Retrieve starting index + ind_first_m = list(time_pickle_first).index(np.array(t_persistence_first_m[0])) + ind_second_m = list(time_pickle_second).index(np.array(t_persistence_second_m[0])) + + #print('Scarlet, Original', ts_persistence) + #print('From Pickle', time_pickle_first[ind_first_m:ind_first_m+len(t_persistence_first_m)], time_pickle_second[ind_second_m:ind_second_m+len(t_persistence_second_m)]) + #print(' Scarlet before', time_pickle_first[ind_first_m:ind_first_m+len(t_persistence_first_m)].shape, time_pickle_second[ind_second_m:ind_second_m+len(t_persistence_second_m)].shape) + + # append the sequence of the second month to the first month + var_persistence = np.concatenate((var_pickle_first[ind_first_m:ind_first_m+len(t_persistence_first_m)], + var_pickle_second[ind_second_m:ind_second_m+len(t_persistence_second_m)]), + axis=0) + time_persistence = np.concatenate((time_pickle_first[ind_first_m:ind_first_m+len(t_persistence_first_m)], + time_pickle_second[ind_second_m:ind_second_m+len(t_persistence_second_m)]), + axis=0).ravel() # ravel is needed to eliminate the unnecessary dimension (20,1) becomes (20,) + print(' Scarlet concatenate and ravel (time)', var_persistence.shape, time_persistence.shape) + + + # tolist() is needed for plotting + return var_persistence, time_persistence.tolist() + + + +def load_pickle_for_persistence(input_dir_pkl, year_start, month_start, pkl_type): + """Helper to get the content of the pickle files. There are two types in our workflow: + T_[month].pkl where the time stamp is stored + X_[month].pkl where the variables are stored, e.g. temperature, geopotential and pressure + This helper function constructs the directory, opens the file to read it, returns the variable. + """ + path_to_pickle = input_dir_pkl+'/'+str(year_start)+'/'+pkl_type+'_{:02}.pkl'.format(month_start) + infile = open(path_to_pickle,'rb') + var = pickle.load(infile) + return var def main(): @@ -377,6 +471,11 @@ def main(): input_dir_tf = os.path.join(args.input_dir, "tfrecords") # where tensorflow records are stored dataset = setup_dataset(dataset,input_dir_tf,args.mode,args.seed,args.num_epochs,args.dataset_hparams,dataset_hparams_dict) + # +++Scarlet 20200828 + input_dir_pkl = os.path.join(args.input_dir, "pickle") + # where pickle files records are stored, needed for the persistance forecast. + # ---Scarlet 20200828 + print("Step 2 finished") VideoPredictionModel = models.get_model_class(model) @@ -476,9 +575,17 @@ def main(): #Generate forecast images plot_seq_imgs(imgs=gen_images_denorm[context_frames:,:,:,0],lats=lats,lons=lons,ts=ts[context_frames+1:],label="Forecast by Model " + args.model,output_png_dir=args.results_dir) - #TODO: Scaret plot persistence image - #implment get_persistence() function - + #+++ Scarlet 20200922 + print('Scarlet', type(ts[context_frames+1:])) + print('ts', ts[context_frames+1:]) + print('context_frames:', context_frames) + persistence_images, ts_persistence = get_persistence(ts, input_dir_pkl) + print('Scarlet', type(ts_persistence)) + # I am not sure about the number of frames given with context_frames and context_frames +1 + plot_seq_imgs(imgs=persistence_images[context_frames+1:,:,:,0],lats=lats,lons=lons,ts=ts_persistence[context_frames+1:], + label="Persistence Forecast" + args.model,output_png_dir=args.results_dir) + #--- Scarlet 20200922 + #in case of generate the images for all the input, we just generate the first 5 sampe_ind examples for visuliation sample_ind += args.batch_size diff --git a/video_prediction_tools/utils/metadata.py b/video_prediction_tools/utils/metadata.py index c4ef4f36ef68dd70ae129706211a4468f60a9404..3f8f5a45458088d34730bef212dc9064201223a5 100644 --- a/video_prediction_tools/utils/metadata.py +++ b/video_prediction_tools/utils/metadata.py @@ -10,12 +10,13 @@ import json from netCDF4 import Dataset from general_utils import is_integer, add_str_to_path + class MetaData: """ Class for handling, storing and retrieving meta-data """ - - def __init__(self,json_file=None,suffix_indir=None,exp_id=None,data_filename=None,slices=None,variables=None): + + def __init__(self, json_file=None, suffix_indir=None, exp_id=None, data_filename=None, slices=None, variables=None): """ Initailizes MetaData instance by reading a corresponding json-file or by handling arguments of the Preprocessing step @@ -27,87 +28,103 @@ class MetaData: :param slices: indices defining the region of interest :param variables: predictor variables """ - - method_name = MetaData.__init__.__name__+" of Class "+MetaData.__name__ - - if not json_file is None: + + method_name = MetaData.__init__.__name__ + " of Class " + MetaData.__name__ + + if not json_file is None: print(json_file) print(type(json_file)) - MetaData.get_metadata_from_file(self,json_file) - + MetaData.get_metadata_from_file(self, json_file) + else: # No dictionary from json-file available, all other arguments have to set if not suffix_indir: - raise TypeError(method_name+": 'suffix_indir'-argument is required if 'json_file' is not passed.") + raise TypeError(method_name + ": 'suffix_indir'-argument is required if 'json_file' is not passed.") else: - if not isinstance(suffix_indir,str): - raise TypeError(method_name+": 'suffix_indir'-argument must be a string.") + if not isinstance(suffix_indir, str): + raise TypeError(method_name + ": 'suffix_indir'-argument must be a string.") if not exp_id: - raise TypeError(method_name+": 'exp_id'-argument is required if 'json_file' is not passed.") + raise TypeError(method_name + ": 'exp_id'-argument is required if 'json_file' is not passed.") else: - if not isinstance(exp_id,str): - raise TypeError(method_name+": 'exp_id'-argument must be a string.") - + if not isinstance(exp_id, str): + raise TypeError(method_name + ": 'exp_id'-argument must be a string.") + if not data_filename: - raise TypeError(method_name+": 'data_filename'-argument is required if 'json_file' is not passed.") + raise TypeError(method_name + ": 'data_filename'-argument is required if 'json_file' is not passed.") else: - if not isinstance(data_filename,str): - raise TypeError(method_name+": 'data_filename'-argument must be a string.") - + if not isinstance(data_filename, str): + raise TypeError(method_name + ": 'data_filename'-argument must be a string.") + if not slices: - raise TypeError(method_name+": 'slices'-argument is required if 'json_file' is not passed.") + raise TypeError(method_name + ": 'slices'-argument is required if 'json_file' is not passed.") else: - if not isinstance(slices,dict): - raise TypeError(method_name+": 'slices'-argument must be a dictionary.") - + if not isinstance(slices, dict): + raise TypeError(method_name + ": 'slices'-argument must be a dictionary.") + if not variables: - raise TypeError(method_name+": 'variables'-argument is required if 'json_file' is not passed.") + raise TypeError(method_name + ": 'variables'-argument is required if 'json_file' is not passed.") else: - if not isinstance(variables,list): - raise TypeError(method_name+": 'variables'-argument must be a list.") - - MetaData.get_and_set_metadata_from_file(self,suffix_indir,exp_id,data_filename,slices,variables) - + if not isinstance(variables, list): + raise TypeError(method_name + ": 'variables'-argument must be a list.") + + MetaData.get_and_set_metadata_from_file(self, suffix_indir, exp_id, data_filename, slices, variables) + MetaData.write_metadata_to_file(self) - - def get_and_set_metadata_from_file(self,suffix_indir,exp_id,datafile_name,slices,variables): - """ - Retrieves several meta data from netCDF-file and sets corresponding class instance attributes. + def get_and_set_metadata_from_file(self, suffix_indir, exp_id, datafile_name, slices, variables): + ''' + Retrieves several meta data from an ERA5 netCDF-file and sets corresponding class instance attributes. Besides, the name of the experiment directory is constructed following the naming convention (see below) - + Naming convention: [model_base]_Y[yyyy]to[yyyy]M[mm]to[mm]-[nx]x[ny]-[nnnn]N[eeee]E-[var1]_[var2]_(...)_[varN] ---------------- Given ----------------|---------------- Created dynamically -------------- - + Note that the model-base as well as the date-identifiers must already be included in target_dir_in. - """ - - method_name = MetaData.get_and_set_metadata_from_file.__name__+" of Class "+MetaData.__name__ - - if not suffix_indir: raise ValueError(method_name+": suffix_indir must be a non-empty path.") - + :param suffix_indir: Path to directory where the preprocessed data will be stored + :param exp_id: Experimental identifier + :param datafile_name: ERA 5 reanalysis netCDF file + :param slices: indices of lat- and lon-coordinates defining the region of interest + :param variables: meteorological variables to be processed during preprocessing + :return: A class instance with the following attributes set: + * varnames : name of variables to be processed + * nx : number of grid points of sliced region in zonal direction + * ny : same as nx, but in meridional direction + * sw_c : south-west corner [lat,lon] coordinates of region of interest + * lat : latitude coordinates of grid points (on a rectangular grid) + * lon : longitude coordinates of grid points (on a rectangular grid) + * expname : name of target experiment directory following anming convention (see above) + * expdir : basename of experiment diretory + * exp_id : experimental identifier + * status : status to indicate if a new metadata was set up or if it's pre-exsting (left empty here!) + ''' + + method_name = MetaData.get_and_set_metadata_from_file.__name__ + " of Class " + MetaData.__name__ + + if not suffix_indir: raise ValueError(method_name + ": suffix_indir must be a non-empty path.") + # retrieve required information from file flag_coords = ["N", "E"] - - print("Retrieve metadata based on file: '"+datafile_name+"'") + + print("Retrieve metadata based on file: '" + datafile_name + "'") try: - datafile = Dataset(datafile_name,'r') + datafile = Dataset(datafile_name, 'r') except: - print(method_name + ": Error when handling data file: '"+datafile_name+"'.") + print(method_name + ": Error when handling data file: '" + datafile_name + "'.") exit() - + # Check if all requested variables can be obtained from datafile - MetaData.check_datafile(datafile,variables) - self.varnames = variables - - self.nx, self.ny = np.abs(slices['lon_e'] - slices['lon_s']), np.abs(slices['lat_e'] - slices['lat_s']) - sw_c = [float(datafile.variables['lat'][slices['lat_e']-1]),float(datafile.variables['lon'][slices['lon_s']])] # meridional axis lat is oriented from north to south (i.e. monotonically decreasing) - self.sw_c = sw_c + MetaData.check_datafile(datafile, variables) + self.varnames = variables + + self.nx, self.ny = np.abs(slices['lon_e'] - slices['lon_s']), np.abs(slices['lat_e'] - slices['lat_s']) + sw_c = [float(datafile.variables['lat'][slices['lat_e'] - 1]), float(datafile.variables['lon'][slices[ + 'lon_s']])] # meridional axis lat is oriented from north to south (i.e. monotonically decreasing) + self.sw_c = sw_c self.lat = datafile.variables['lat'][slices['lat_s']:slices['lat_e']] self.lon = datafile.variables['lon'][slices['lon_s']:slices['lon_e']] - + # Now start constructing expdir-string # switch sign and coordinate-flags to avoid negative values appearing in expdir-name if sw_c[0] < 0.: @@ -116,250 +133,270 @@ class MetaData: if sw_c[1] < 0.: sw_c[1] = np.abs(sw_c[1]) flag_coords[1] = "W" - nvar = len(variables) - + nvar = len(variables) + # splitting has to be done in order to retrieve the expname-suffix (and the year if required) path_parts = os.path.split(suffix_indir.rstrip("/")) - + if (is_integer(path_parts[1])): year = path_parts[1] path_parts = os.path.split(path_parts[0].rstrip("/")) else: year = "" - - expdir, expname = path_parts[0], path_parts[1] + + expdir, expname = path_parts[0], path_parts[1] # extend expdir_in successively (splitted up for better readability) - expname += "-"+str(self.nx) + "x" + str(self.ny) - expname += "-"+(("{0: 05.2f}"+flag_coords[0]+"{1:05.2f}"+flag_coords[1]).format(*sw_c)).strip().replace(".","")+"-" - + expname += "-" + str(self.nx) + "x" + str(self.ny) + expname += "-" + (("{0: 05.2f}" + flag_coords[0] + "{1:05.2f}" + flag_coords[1]).format(*sw_c)).strip().replace( + ".", "") + "-" + # reduced for-loop length as last variable-name is not followed by an underscore (see above) - for i in range(nvar-1): - expname += variables[i]+"_" - expname += variables[nvar-1] - + for i in range(nvar - 1): + expname += variables[i] + "_" + expname += variables[nvar - 1] + self.expname = expname - self.expdir = expdir - self.exp_id = exp_id - self.status = "" # uninitialized (is set when metadata is written/compared to/with json-file, see write_metadata_to_file-method) + self.expdir = expdir + self.exp_id = exp_id + self.status = "" # uninitialized (is set when metadata is written/compared to/with json-file, see write_metadata_to_file-method) # ML 2020/04/24 E - - def write_metadata_to_file(self,dest_dir = None): - - """ - Write meta data attributes of class instance to json-file. - """ - - method_name = MetaData.write_metadata_to_file.__name__+" of Class "+MetaData.__name__ + + def write_metadata_to_file(self, dest_dir=None): + ''' + Writes meta data stored as attributes in the class instance to metadata.json. + If dest_dir is None, the destination directory is constructed based on the attributes expdir and expname. + :param dest_dir: path to directory where to store metadata.json + :return: - + ''' + + method_name = MetaData.write_metadata_to_file.__name__ + " of Class " + MetaData.__name__ # actual work: - meta_dict = {"expname": self.expname, - "expdir" : self.expdir, - "exp_id" : self.exp_id} - - meta_dict["sw_corner_frame"] = { - "lat" : np.around(self.sw_c[0],decimals=2), - "lon" : np.around(self.sw_c[1],decimals=2) - } - - meta_dict["coordinates"] = { - "lat" : np.around(self.lat,decimals=2).tolist(), - "lon" : np.around(self.lon,decimals=2).tolist() - } - - meta_dict["frame_size"] = { - "nx" : int(self.nx), - "ny" : int(self.ny) - } - - meta_dict["variables"] = [] + meta_dict = {"expname": self.expname, "expdir": self.expdir, "exp_id": self.exp_id, "sw_corner_frame": { + "lat": np.around(self.sw_c[0], decimals=2), + "lon": np.around(self.sw_c[1], decimals=2) + }, "coordinates": { + "lat": np.around(self.lat, decimals=2).tolist(), + "lon": np.around(self.lon, decimals=2).tolist() + }, "frame_size": { + "nx": int(self.nx), + "ny": int(self.ny) + }, "variables": []} + for i in range(len(self.varnames)): - #print(self.varnames[i]) - meta_dict["variables"].append( - {"var"+str(i+1) : self.varnames[i]}) - + # print(self.varnames[i]) + meta_dict["variables"].append({"var" + str(i + 1): self.varnames[i]}) # create directory if required - if dest_dir is None: - dest_dir = os.path.join(self.expdir,self.expname) + if dest_dir is None: + dest_dir = os.path.join(self.expdir, self.expname) if not os.path.exists(dest_dir): - print("Created experiment directory: '"+self.expdir+"'") - os.makedirs(dest_dir,exist_ok=True) - - meta_fname = os.path.join(dest_dir,"metadata.json") - - if os.path.exists(meta_fname): # check if a metadata-file already exists and check its content - print(method_name+": json-file ('"+meta_fname+"' already exists. Its content will be checked...") - self.status = "old" # set status to old in order to prevent repeated modification of shell-/Batch-scripts - with open(meta_fname,'r') as js_file: + print("Created experiment directory: '" + self.expdir + "'") + os.makedirs(dest_dir, exist_ok=True) + + meta_fname = os.path.join(dest_dir, "metadata.json") + + if os.path.exists(meta_fname): # check if a metadata-file already exists and check its content + print(method_name + ": json-file ('" + meta_fname + "' already exists. Its content will be checked...") + self.status = "old" # set status to old in order to prevent repeated modification of shell-/Batch-scripts + with open(meta_fname, 'r') as js_file: dict_dupl = json.load(js_file) - + if dict_dupl != meta_dict: - meta_fname_dbg = os.path.join(dest_dir,"metadata_debug.json") - print(method_name+": Already existing metadata (see '"+meta_fname+"') do not fit data being processed right now (see '" \ - +meta_fname_dbg+"'. Ensure a common data base.") - with open(meta_fname_dbg,'w') as js_file: - json.dump(meta_dict,js_file) + meta_fname_dbg = os.path.join(dest_dir, "metadata_debug.json") + print( + method_name + ": Already existing metadata (see '" + meta_fname + "') do not fit data being processed right now (see '" \ + + meta_fname_dbg + "'. Ensure a common data base.") + with open(meta_fname_dbg, 'w') as js_file: + json.dump(meta_dict, js_file) raise ValueError - else: #do not need to do anything + else: # do not need to do anything pass else: # write dictionary to file - print(method_name+": Write dictionary to json-file: '"+meta_fname+"'") - with open(meta_fname,'w') as js_file: - json.dump(meta_dict,js_file) - self.status = "new" # set status to new in order to trigger modification of shell-/Batch-scripts - - def get_metadata_from_file(self,js_file): - - """ - Retrieves meta data attributes from json-file - """ - - with open(js_file) as js_file: + print(method_name + ": Write dictionary to json-file: '" + meta_fname + "'") + with open(meta_fname, 'w') as js_file: + json.dump(meta_dict, js_file) + self.status = "new" # set status to new in order to trigger modification of shell-/Batch-scripts + + def get_metadata_from_file(self, js_file): + ''' + :param js_file: json file from which to retrieve the meta data + :return: A class instance with the following attributes set: + * varnames : name of variables to be processed + * nx : number of grid points of sliced region in zonal direction + * ny : same as nx, but in meridional direction + * sw_c : south-west corner [lat,lon] coordinates of region of interest + * lat : latitude coordinates of grid points (on a rectangular grid) + * lon : longitude coordinates of grid points (on a rectangular grid) + * expname : name of target experiment directory following naming convention (see above) + * expdir : basename of experiment directory + * exp_id : experimental identifier (if available!) + * status : status to indicate if a new metadata is set-up or pre-existing (left empty here!) + ''' + + with open(js_file) as js_file: dict_in = json.load(js_file) - + self.expdir = dict_in["expdir"] + self.expname = dict_in["expname"] + # check if exp_id is available (retained for ensuring backward compatilibity with + # old meta data files without exp_id) if "exp_id" in dict_in: self.exp_id = dict_in["exp_id"] - - self.sw_c = [dict_in["sw_corner_frame"]["lat"],dict_in["sw_corner_frame"]["lon"] ] - self.lat = dict_in["coordinates"]["lat"] - self.lon = dict_in["coordinates"]["lon"] - - self.nx = dict_in["frame_size"]["nx"] - self.ny = dict_in["frame_size"]["ny"] + + self.sw_c = [dict_in["sw_corner_frame"]["lat"], dict_in["sw_corner_frame"]["lon"]] + self.lat = dict_in["coordinates"]["lat"] + self.lon = dict_in["coordinates"]["lon"] + + self.nx = dict_in["frame_size"]["nx"] + self.ny = dict_in["frame_size"]["ny"] # dict_in["variables"] is a list like [{var1: varname1},{var2: varname2},...] - list_of_dict_aux = dict_in["variables"] + list_of_dict_aux = dict_in["variables"] # iterate through the list with an integer ivar # note: the naming of the variables starts with var1, thus add 1 to the iterator - self.variables = [list_of_dict_aux[ivar]["var"+str(ivar+1)] for ivar in range(len(list_of_dict_aux))] - - def write_dirs_to_batch_scripts(self,batch_script): - - """ - Expands ('known') directory-variables in batch_script by expdir-attribute of class instance - """ - - paths_to_mod = ["source_dir=","destination_dir=","checkpoint_dir=","results_dir="] # known directory-variables in batch-scripts + self.variables = [list_of_dict_aux[ivar]["var" + str(ivar + 1)] for ivar in range(len(list_of_dict_aux))] + + def write_dirs_to_batch_scripts(self, batch_script): + ''' + Method for automatic extension of path variables in Batch scripts by the experiment directory which is saved + in the expname-attribute of the class instance + :param batch_script: Batch script whose (known) path variables (defined by paths_to_mod below) will be expanded + by the expname-attribute of the class instance at hand + :return: modified Batch script + ''' + + paths_to_mod = ["source_dir=", "destination_dir=", "checkpoint_dir=", + "results_dir="] # known directory-variables in batch-scripts # For backward compability: # Check if exp_id (if present) needs to be added to batch_script in order to access the file - if hasattr(self,"exp_id"): + if hasattr(self, "exp_id"): sep_idx = batch_script.index(".sh") batch_script = batch_script[:sep_idx] + "_" + self.exp_id + batch_script[sep_idx:] - with open(batch_script,'r') as file: + with open(batch_script, 'r') as file: data = file.readlines() - + nlines = len(data) - matched_lines = [iline for iline in range(nlines) if any(str_id in data[iline] for str_id in paths_to_mod)] # list of line-number indices to be modified + matched_lines = [iline for iline in range(nlines) if any( + str_id in data[iline] for str_id in paths_to_mod)] # list of line-number indices to be modified for i in matched_lines: - data[i] = add_str_to_path(data[i],self.expname) + data[i] = add_str_to_path(data[i], self.expname) - - with open(batch_script,'w') as file: + with open(batch_script, 'w') as file: file.writelines(data) - + @staticmethod - def write_destdir_jsontmp(dest_dir, tmp_dir = None): - """ - Writes dest_dir to temporary json-file (temp.json) stored in the current working directory. - To be executed by Master node in parallel mode. - """ - + def write_destdir_jsontmp(dest_dir, tmp_dir=None): + ''' + Writes dest_dir to temporary json-file (temp.json) stored in the current working directory. + To be executed by Master node only in parallel mode. + :param dest_dir: path to destination directory + :param tmp_dir: directory where to store temp.json (optional) + :return: - + ''' + if not tmp_dir: tmp_dir = os.getcwd() - - file_tmp = os.path.join(tmp_dir,"temp.json") + + file_tmp = os.path.join(tmp_dir, "temp.json") dict_tmp = {"destination_dir": dest_dir} - - with open(file_tmp,"w") as js_file: - print("Save destination_dir-variable in temporary json-file: '"+file_tmp+"'") - json.dump(dict_tmp,js_file) - + + with open(file_tmp, "w") as js_file: + print("Save destination_dir-variable in temporary json-file: '" + file_tmp + "'") + json.dump(dict_tmp, js_file) + @staticmethod - def get_destdir_jsontmp(tmp_dir = None): - """ - Retrieves dest_dir from temporary json-file which is expected to exist in the current working directory and returns it. - """ - - method_name = MetaData.get_destdir_jsontmp.__name__+" of Class "+MetaData.__name__ + def get_destdir_jsontmp(tmp_dir=None): + ''' + Retrieves path destination directory from temp.json file (to be created by write_destdir_jsontmp-method) + :param tmp_dir: directory where temp.json is stored (optional). If not provided, the working directory is used. + :return: string containing the path to the destination directory + ''' + + method_name = MetaData.get_destdir_jsontmp.__name__ + " of Class " + MetaData.__name__ if not tmp_dir: tmp_dir = os.getcwd() - - file_tmp = os.path.join(tmp_dir,"temp.json") - + + file_tmp = os.path.join(tmp_dir, "temp.json") + try: - with open(file_tmp,"r") as js_file: + with open(file_tmp, "r") as js_file: dict_tmp = json.load(js_file) except: - print(method_name+": Could not open requested json-file '"+file_tmp+"'") + print(method_name + ": Could not open requested json-file '" + file_tmp + "'") sys.exit(1) - + if not "destination_dir" in dict_tmp.keys(): - raise Exception(method_name+": Could not find 'destination_dir' in dictionary obtained from "+file_tmp) + raise Exception(method_name + ": Could not find 'destination_dir' in dictionary obtained from " + file_tmp) else: - return(dict_tmp.get("destination_dir")) - + return (dict_tmp.get("destination_dir")) + @staticmethod - def wait_for_jsontmp(tmp_dir = None, waittime = 10, delay=0.5): - """ - Waits at max. waittime (in sec) until temp.json-file becomes available - """ - - method_name = MetaData.wait_for_jsontmp.__name__+" of Class "+MetaData.__name__ - + def wait_for_jsontmp(tmp_dir=None, waittime=10, delay=0.5): + ''' + Waits until temp.json-file becomes available + :param tmp_dir: directory where temp.json is stored (optional). If not provided, the working directory is used. + :param waittime: time to wait in seconds (default: 10 s) + :param delay: length of checkin intervall (default: 0.5 s) + :return: - + ''' + + method_name = MetaData.wait_for_jsontmp.__name__ + " of Class " + MetaData.__name__ + if not tmp_dir: tmp_dir = os.getcwd() - - file_tmp = os.path.join(tmp_dir,"temp.json") - - counter_max = waittime/delay + + file_tmp = os.path.join(tmp_dir, "temp.json") + + counter_max = waittime / delay counter = 0 - status = "not_ok" - + status = "not_ok" + while (counter <= counter_max): if os.path.isfile(file_tmp): status = "ok" break else: time.sleep(delay) - + counter += 1 - - if status != "ok": raise IOError(method_name+": '"+file_tmp+ \ - "' does not exist after waiting for "+str(waittime)+" sec.") - - + + if status != "ok": raise IOError(method_name + ": '" + file_tmp + \ + "' does not exist after waiting for " + str(waittime) + " sec.") + @staticmethod - def issubset(a,b): - """ + def issubset(a, b): + ''' Checks if all elements of a exist in b or vice versa (depends on the length of the corresponding lists/sets) - """ - + :param a: list 1 + :param b: list 2 + :return: True or False + ''' + if len(a) > len(b): - return(set(b).issubset(set(a))) + return (set(b).issubset(set(a))) elif len(b) >= len(a): - return(set(a).issubset(set(b))) - + return (set(a).issubset(set(b))) + @staticmethod - def check_datafile(datafile,varnames): + def check_datafile(datafile, varnames): + ''' + Checks if all variables whose names are given in varnames can be found in data-object (read in from a netCDF) + :param datafile: data-object + :param varnames: names of variables to be expected in data-object + :return: Raises a ValueError if any variable cannot be found + ''' """ Checks if all varnames can be found in datafile """ - - if not MetaData.issubset(varnames,datafile.variables.keys()): + + if not MetaData.issubset(varnames, datafile.variables.keys()): for i in range(len(varnames2check)): if not varnames2check[i] in f0.variables.keys(): - print("Variable '"+varnames2check[i]+"' not found in datafile '"+data_filenames[0]+"'.") + print("Variable '" + varnames2check[i] + "' not found in datafile '" + data_filenames[0] + "'.") raise ValueError("Could not find the above mentioned variables.") else: pass - - # ----------------------------------- end of class MetaData ----------------------------------- - - - - -