diff --git a/video_prediction_tools/HPC_scripts/data_extraction_era5_template.sh b/video_prediction_tools/HPC_scripts/data_extraction_era5_template.sh index f856eb55e47eb89fa9dbdba96e78dbe050ecdfab..518fe84998745905092c0d7a7e62e97119d8909e 100644 --- a/video_prediction_tools/HPC_scripts/data_extraction_era5_template.sh +++ b/video_prediction_tools/HPC_scripts/data_extraction_era5_template.sh @@ -1,6 +1,6 @@ #!/bin/bash -x -## Controlling Batch-job -#SBATCH --account=deepacf +## Controlling Batch-job : Need input +#SBATCH --account=<Project name> #SBATCH --nodes=1 #SBATCH --ntasks=13 ##SBATCH --ntasks-per-node=13 @@ -13,28 +13,28 @@ #SBATCH --mail-type=ALL #SBATCH --mail-user=me@somewhere.com -######### Template identifier (don't remove) ######### -echo "Do not run the template scripts" -exit 99 -######### Template identifier (don't remove) ######### - -jutil env activate -p deepacf - -# Name of virtual environment -VIRT_ENV_NAME="my_venv" - -# Loading mouldes -source ../env_setup/modules_preprocess+extract.sh -# Activate virtual environment if needed (and possible) -if [ -z ${VIRTUAL_ENV} ]; then - if [[ -f ../${VIRT_ENV_NAME}/bin/activate ]]; then - echo "Activating virtual environment..." - source ../${VIRT_ENV_NAME}/bin/activate - else - echo "ERROR: Requested virtual environment ${VIRT_ENV_NAME} not found..." - exit 1 - fi -fi +##Load basic Python module: Need input +#module load Python + + +##Create and activate a virtual environment: Need input +#VENV_NAME=<my_venv> +#Python -m venv ../virtual_envs/${VENV_NAME} +#source ../virtual_envs/${VENV_NAME}/bin/activate + + +## Install required packages +# set PYTHONPATH... +BASE_DIR="$(pwd)" +WORKING_DIR=="$(BASE_DIR "$dir")" +export PYTHONPATH=${WORKING_DIR}/virtual_envs/${VENV_NAME}/lib/python3.8/site-packages:$PYTHONPATH +export PYTHONPATH=${WORKING_DIR}:$PYTHONPATH +export PYTHONPATH=${WORKING_DIR}/utils:$PYTHONPATH +export PYTHONPATH=${WORKING_DIR}/model_modules:$PYTHONPATH +export PYTHONPATH=${WORKING_DIR}/postprocess:$PYTHONPATH +# ... install requirements +pip install --no-cache-dir -r ../env_setup/requirements.txt + # Declare path-variables (dest_dir will be set and configured automatically via generate_runscript.py) source_dir=/my/path/to/era5 diff --git a/video_prediction_tools/HPC_scripts/meta_postprocess_era5_template.sh b/video_prediction_tools/HPC_scripts/meta_postprocess_era5_template.sh index ec8b6eb42c0f0bef9dbc1d70701408b6fabda7f0..7d9dcd10cabf0b44ae75ead14711059c5c167d3c 100644 --- a/video_prediction_tools/HPC_scripts/meta_postprocess_era5_template.sh +++ b/video_prediction_tools/HPC_scripts/meta_postprocess_era5_template.sh @@ -1,25 +1,56 @@ #!/bin/bash -x -## Controlling Batch-job -#SBATCH --account=deepacf +## Controlling Batch-job: Need input +#SBATCH --account=<Project name> #SBATCH --nodes=1 -#SBATCH --ntasks=1 +#SBATCH --ntasks=13 ##SBATCH --ntasks-per-node=13 #SBATCH --cpus-per-task=1 -#SBATCH --output=meta_postprocess_era5-out.%j -#SBATCH --error=meta_postprocess_era5-err.%j -#SBATCH --time=00:20:00 +#SBATCH --output=Data_Preprocess_step1_era5-out.%j +#SBATCH --error=Data_Preprocess_step1era5-err.%j +#SBATCH --time=04:20:00 #SBATCH --partition=batch #SBATCH --gres=gpu:0 #SBATCH --mail-type=ALL #SBATCH --mail-user=me@somewhere.com -######### Template identifier (don't remove) ######### -echo "Do not run the template scripts" -exit 99 -######### Template identifier (don't remove) ######### -jutil env activate -p deepacf +##Load basic Python module: Need input +#module load Python +##Create and activate a virtual environment: Need input +#VENV_NAME=<my_venv> +#Python -m venv ../virtual_envs/${VENV_NAME} +#source ../virtual_envs/${VENV_NAME}/bin/activate + +## Install required packages +# set PYTHONPATH... +WORKING_DIR="$(pwd)" +BASE_DIR=="$(WORKING_DIR "$dir")" +export PYTHONPATH=${BASE_DIR}/virtual_envs/${VENV_NAME}/lib/python3.8/site-packages:$PYTHONPATH +export PYTHONPATH=${BASE_DIR}:$PYTHONPATH +export PYTHONPATH=${BASE_DIR}/utils:$PYTHONPATH +export PYTHONPATH=${BASE_DIR}/model_modules:$PYTHONPATH +export PYTHONPATH=${BASE_DIR}/postprocess:$PYTHONPATH +# ... install requirements +pip install --no-cache-dir -r ../env_setup/requirements.txt + +# Name of virtual environment +VENV_NAME=venv_hdfml +# Name of container image (must be available in working directory) +CONTAINER_IMG="${WORK_DIR}/tensorflow_21.09-tf1-py3.sif" +WRAPPER="${BASE_DIR}/env_setup/wrapper_container.sh" + +# sanity checks +if [[ ! -f ${CONTAINER_IMG} ]]; then + echo "ERROR: Cannot find required TF1.15 container image '${CONTAINER_IMG}'." + exit 1 +fi + +if [[ ! -f ${WRAPPER} ]]; then + echo "ERROR: Cannot find wrapper-script '${WRAPPER}' for TF1.15 container image." + exit 1 +fi + # Declare input parameters root_dir=/p/project/deepacf/deeprain/video_prediction_shared_folder/ analysis_config=video_prediction_tools/meta_postprocess_config/meta_config.json diff --git a/video_prediction_tools/HPC_scripts/preprocess_data_era5_step1_template.sh b/video_prediction_tools/HPC_scripts/preprocess_data_era5_step1_template.sh index 80d4de5266bc57c944bd57ffa5359512b4f23a4b..cc500654c49e4b619399aa8685d51b7299836d42 100644 --- a/video_prediction_tools/HPC_scripts/preprocess_data_era5_step1_template.sh +++ b/video_prediction_tools/HPC_scripts/preprocess_data_era5_step1_template.sh @@ -1,59 +1,64 @@ #!/bin/bash -x -## Controlling Batch-job -#SBATCH --account=deepacf +## Controlling Batch-job : Need input +#SBATCH --account=<Project name> #SBATCH --nodes=1 #SBATCH --ntasks=13 -##SBATCH --ntasks-per-node=12 +##SBATCH --ntasks-per-node=13 #SBATCH --cpus-per-task=1 -#SBATCH --output=DataPreprocess_era5_step1-out.%j -#SBATCH --error=DataPreprocess_era5_step1-err.%j +#SBATCH --output=Data_Preprocess_step1_era5-out.%j +#SBATCH --error=Data_Preprocess_step1era5-err.%j #SBATCH --time=04:20:00 -#SBATCH --gres=gpu:0 #SBATCH --partition=batch +#SBATCH --gres=gpu:0 #SBATCH --mail-type=ALL #SBATCH --mail-user=me@somewhere.com -######### Template identifier (don't remove) ######### -echo "Do not run the template scripts" -exit 99 -######### Template identifier (don't remove) ######### - -# Name of virtual environment -VIRT_ENV_NAME="my_venv" - -# Activate virtual environment if needed (and possible) -if [ -z ${VIRTUAL_ENV} ]; then - if [[ -f ../virtual_envs/${VIRT_ENV_NAME}/bin/activate ]]; then - echo "Activating virtual environment..." - source ../virtual_envs/${VIRT_ENV_NAME}/bin/activate - else - echo "ERROR: Requested virtual environment ${VIRT_ENV_NAME} not found..." - exit 1 - fi -fi -# Loading mouldes -source ../env_setup/modules_preprocess+extract.sh - - -# select years and variables for dataset and define target domain -years=( "2015" ) -variables=( "t2" "t2" "t2" ) -sw_corner=( -999.9 -999.9) -nyx=( -999 -999 ) +##Load basic Python module: Need input +#module load Python + + +##Create and activate a virtual environment : Need input +#VENV_NAME=<my_venv> +#Python -m venv ../virtual_envs/${VENV_NAME} +#source ../virtual_envs/${VENV_NAME}/bin/activate + +## Install required packages +# set PYTHONPATH... +BASE_DIR="$(pwd)" +WORKING_DIR=="$(BASE_DIR "$dir")" +export PYTHONPATH=${WORKING_DIR}/virtual_envs/${VENV_NAME}/lib/python3.8/site-packages:$PYTHONPATH +export PYTHONPATH=${WORKING_DIR}:$PYTHONPATH +export PYTHONPATH=${WORKING_DIR}/utils:$PYTHONPATH +export PYTHONPATH=${WORKING_DIR}/model_modules:$PYTHONPATH +export PYTHONPATH=${WORKING_DIR}/postprocess:$PYTHONPATH +# ... install requirements +pip install --no-cache-dir -r ../env_setup/requirements.txt + + +# select years for dataset +declare -a years=( + "2017" + ) + +max_year=`echo "${years[*]}" | sort -nr | head -n1` +min_year=`echo "${years[*]}" | sort -nr | tail -n1` # set some paths -# note, that destination_dir is adjusted during runtime based on the data -source_dir=/my/path/to/extracted/data/ -destination_dir=/my/path/to/pickle/files +# note, that destination_dir is used during runtime to set a proper experiment directory +exp_id=xxx # experiment identifier is set by 'generate_workflow_runscripts.sh' +source_dir=${SAVE_DIR}/extractedData +destination_dir=${SAVE_DIR}/preprocessedData/era5-Y${min_year}to${max_year}M01to12 +script_dir=`pwd` + +for year in "${years[@]}"; + do + echo "Year $year" + echo "source_dir ${source_dir}/${year}" + mpirun -np 2 python ../../workflow_parallel_frame_prediction/DataPreprocess/mpi_stager_v2_process_netCDF.py \ + --source_dir ${source_dir} -scr_dir ${script_dir} -exp_dir ${exp_id} \ + --destination_dir ${destination_dir} --years ${years} --vars T2 MSL gph500 --lat_s 74 --lat_e 202 --lon_s 550 --lon_e 710 + done + -# execute Python-scripts -for year in "${years[@]}"; do - echo "start preprocessing data for year ${year}" - srun python ../main_scripts/main_preprocess_data_step1.py \ - --source_dir ${source_dir} --destination_dir ${destination_dir} --years "${year}" \ - --vars "${variables[0]}" "${variables[1]}" "${variables[2]}" \ - --sw_corner "${sw_corner[0]}" "${sw_corner[1]}" --nyx "${nyx[0]}" "${nyx[1]}" -done -#srun python ../../workflow_parallel_frame_prediction/DataPreprocess/mpi_split_data_multi_years.py --destination_dir ${destination_dir} --varnames T2 MSL gph500 diff --git a/video_prediction_tools/HPC_scripts/preprocess_data_era5_step2_template.sh b/video_prediction_tools/HPC_scripts/preprocess_data_era5_step2_template.sh index e0440dff5ab507f0ba475485781ab63283f8f4dc..1afb89088c008666c974adfc1bebe96e0c68f169 100644 --- a/video_prediction_tools/HPC_scripts/preprocess_data_era5_step2_template.sh +++ b/video_prediction_tools/HPC_scripts/preprocess_data_era5_step2_template.sh @@ -1,27 +1,41 @@ #!/bin/bash -x -#SBATCH --account=deepacf +## Controlling Batch-job: Need input +#SBATCH --account=<Project name> #SBATCH --nodes=1 #SBATCH --ntasks=13 ##SBATCH --ntasks-per-node=13 #SBATCH --cpus-per-task=1 -#SBATCH --output=DataPreprocess_era5_step2-out.%j -#SBATCH --error=DataPreprocess_era5_step2-err.%j -#SBATCH --time=04:00:00 -#SBATCH --gres=gpu:0 +#SBATCH --output=Data_Preprocess_step1_era5-out.%j +#SBATCH --error=Data_Preprocess_step1era5-err.%j +#SBATCH --time=04:20:00 #SBATCH --partition=batch +#SBATCH --gres=gpu:0 #SBATCH --mail-type=ALL #SBATCH --mail-user=me@somewhere.com -######### Template identifier (don't remove) ######### -echo "Do not run the template scripts" -exit 99 -######### Template identifier (don't remove) ######### +##Load basic Python module: Need input +#module load Python + + +##Create and activate a virtual environment: Need input +#VENV_NAME=<my_venv> +#Python -m venv ../virtual_envs/${VENV_NAME} +#source ../virtual_envs/${VENV_NAME}/bin/activate + +## Install required packages +# set PYTHONPATH... +WORKING_DIR="$(pwd)" +BASE_DIR=="$(WORKING_DIR "$dir")" +export PYTHONPATH=${BASE_DIR}/virtual_envs/${VENV_NAME}/lib/python3.8/site-packages:$PYTHONPATH +export PYTHONPATH=${BASE_DIR}:$PYTHONPATH +export PYTHONPATH=${BASE_DIR}/utils:$PYTHONPATH +export PYTHONPATH=${BASE_DIR}/model_modules:$PYTHONPATH +export PYTHONPATH=${BASE_DIR}/postprocess:$PYTHONPATH +# ... install requirements +pip install --no-cache-dir -r ../env_setup/requirements.txt -# auxiliary variables -WORK_DIR="$(pwd)" -BASE_DIR=$(dirname "$WORK_DIR") # Name of virtual environment -VIRT_ENV_NAME="my_venv" +VENV_NAME=venv_hdfml # Name of container image (must be available in working directory) CONTAINER_IMG="${WORK_DIR}/tensorflow_21.09-tf1-py3.sif" WRAPPER="${BASE_DIR}/env_setup/wrapper_container.sh" @@ -44,14 +58,14 @@ module purge source_dir=/my/path/to/pkl/files/ destination_dir=/my/path/to/tfrecords/files -sequence_length=20 +sequence_length=24 sequences_per_file=10 # run Preprocessing (step 2 where Tf-records are generated) # run postprocessing/generation of model results including evaluation metrics export CUDA_VISIBLE_DEVICES=0 ## One node, single GPU srun --mpi=pspmix --cpu-bind=none \ - singularity exec --nv "${CONTAINER_IMG}" "${WRAPPER}" ${VIRT_ENV_NAME} \ + singularity exec --nv "${CONTAINER_IMG}" "${WRAPPER}" ${VENV_NAME} \ python3 ../main_scripts/main_preprocess_data_step2.py -source_dir ${source_dir} -dest_dir ${destination_dir} \ -sequence_length ${sequence_length} -sequences_per_file ${sequences_per_file} diff --git a/video_prediction_tools/HPC_scripts/train_model_era5_template.sh b/video_prediction_tools/HPC_scripts/train_model_era5_template.sh old mode 100755 new mode 100644 index 8d9d7d0e8780cc5152f8e22106b878caa6ee8e83..0f7b054908d09087dc266751157959f906e33fd8 --- a/video_prediction_tools/HPC_scripts/train_model_era5_template.sh +++ b/video_prediction_tools/HPC_scripts/train_model_era5_template.sh @@ -1,25 +1,41 @@ #!/bin/bash -x -#SBATCH --account=deepacf +## Controlling Batch-job: Need input +#SBATCH --account=<Project name> #SBATCH --nodes=1 -#SBATCH --ntasks=1 -#SBATCH --output=train_model_era5-out.%j -#SBATCH --error=train_model_era5-err.%j -#SBATCH --time=24:00:00 -#SBATCH --gres=gpu:1 -#SBATCH --partition=some_partition +#SBATCH --ntasks=13 +##SBATCH --ntasks-per-node=13 +#SBATCH --cpus-per-task=1 +#SBATCH --output=Data_Preprocess_step1_era5-out.%j +#SBATCH --error=Data_Preprocess_step1era5-err.%j +#SBATCH --time=04:20:00 +#SBATCH --partition=batch +#SBATCH --gres=gpu:0 #SBATCH --mail-type=ALL #SBATCH --mail-user=me@somewhere.com -######### Template identifier (don't remove) ######### -echo "Do not run the template scripts" -exit 99 -######### Template identifier (don't remove) ######### +##Load basic Python module: Need input +#module load Python + + +##Create and activate a virtual environment: Need input +#VENV_NAME=<my_venv> +#Python -m venv ../virtual_envs/${VENV_NAME} +#source ../virtual_envs/${VENV_NAME}/bin/activate + +## Install required packages +# set PYTHONPATH... +WORKING_DIR="$(pwd)" +BASE_DIR=="$(WORKING_DIR "$dir")" +export PYTHONPATH=${BASE_DIR}/virtual_envs/${VENV_NAME}/lib/python3.8/site-packages:$PYTHONPATH +export PYTHONPATH=${BASE_DIR}:$PYTHONPATH +export PYTHONPATH=${BASE_DIR}/utils:$PYTHONPATH +export PYTHONPATH=${BASE_DIR}/model_modules:$PYTHONPATH +export PYTHONPATH=${BASE_DIR}/postprocess:$PYTHONPATH +# ... install requirements +pip install --no-cache-dir -r ../env_setup/requirements.txt -# auxiliary variables -WORK_DIR="$(pwd)" -BASE_DIR=$(dirname "$WORK_DIR") # Name of virtual environment -VIRT_ENV_NAME="my_venv" +VENV_NAME=venv_hdfml # Name of container image (must be available in working directory) CONTAINER_IMG="${WORK_DIR}/tensorflow_21.09-tf1-py3.sif" WRAPPER="${BASE_DIR}/env_setup/wrapper_container.sh" @@ -38,6 +54,7 @@ fi # clean-up modules to avoid conflicts between host and container settings module purge + # declare directory-variables which will be modified by generate_runscript.py source_dir=/my/path/to/tfrecords/files destination_dir=/my/model/output/path diff --git a/video_prediction_tools/HPC_scripts/visualize_postprocess_era5_template.sh b/video_prediction_tools/HPC_scripts/visualize_postprocess_era5_template.sh index be3e67c03f8384de39e9d193ad206e44695282df..e7f169337b5bddb47fc62116bce6b2af96991d7d 100644 --- a/video_prediction_tools/HPC_scripts/visualize_postprocess_era5_template.sh +++ b/video_prediction_tools/HPC_scripts/visualize_postprocess_era5_template.sh @@ -1,27 +1,41 @@ #!/bin/bash -x -#SBATCH --account=deepacf +## Controlling Batch-job: Need input +#SBATCH --account=<Project name> #SBATCH --nodes=1 -#SBATCH --ntasks=1 -##SBATCH --ntasks-per-node=1 +#SBATCH --ntasks=13 +##SBATCH --ntasks-per-node=13 #SBATCH --cpus-per-task=1 -#SBATCH --output=postprocess_era5-out.%j -#SBATCH --error=postprocess_era5-err.%j -#SBATCH --time=01:00:00 -#SBATCH --gres=gpu:1 -#SBATCH --partition=gpus +#SBATCH --output=Data_Preprocess_step1_era5-out.%j +#SBATCH --error=Data_Preprocess_step1era5-err.%j +#SBATCH --time=04:20:00 +#SBATCH --partition=batch +#SBATCH --gres=gpu:0 #SBATCH --mail-type=ALL #SBATCH --mail-user=me@somewhere.com -######### Template identifier (don't remove) ######### -echo "Do not run the template scripts" -exit 99 -######### Template identifier (don't remove) ######### +##Load basic Python module: Need input +#module load Python -# auxiliary variables -WORK_DIR="$(pwd)" -BASE_DIR=$(dirname "$WORK_DIR") -# Name of virtual environment -VIRT_ENV_NAME="my_venv" + +##Create and activate a virtual environment: Need input +#VENV_NAME=<my_venv> +#Python -m venv ../virtual_envs/${VENV_NAME} +#source ../virtual_envs/${VENV_NAME}/bin/activate + +## Install required packages +# set PYTHONPATH... +WORKING_DIR="$(pwd)" +BASE_DIR=="$(WORKING_DIR "$dir")" +export PYTHONPATH=${BASE_DIR}/virtual_envs/${VENV_NAME}/lib/python3.8/site-packages:$PYTHONPATH +export PYTHONPATH=${BASE_DIR}:$PYTHONPATH +export PYTHONPATH=${BASE_DIR}/utils:$PYTHONPATH +export PYTHONPATH=${BASE_DIR}/model_modules:$PYTHONPATH +export PYTHONPATH=${BASE_DIR}/postprocess:$PYTHONPATH +# ... install requirements +pip install --no-cache-dir -r ../env_setup/requirements.txt + +# Name of virtual environment +VENV_NAME=venv_hdfml # Name of container image (must be available in working directory) CONTAINER_IMG="${WORK_DIR}/tensorflow_21.09-tf1-py3.sif" WRAPPER="${BASE_DIR}/env_setup/wrapper_container.sh" @@ -37,8 +51,6 @@ if [[ ! -f ${WRAPPER} ]]; then exit 1 fi -# clean-up modules to avoid conflicts between host and container settings -module purge # declare directory-variables which will be modified by generate_runscript.py # Note: source_dir is only needed for retrieving the base-directory @@ -56,3 +68,4 @@ srun --mpi=pspmix --cpu-bind=none \ --results_dir ${results_dir} --batch_size 4 \ --num_stochastic_samples 1 ${lquick} \ > postprocess_era5-out_all."${SLURM_JOB_ID}" + diff --git a/video_prediction_tools/JSC_scripts/data_extraction_era5_template.sh b/video_prediction_tools/JSC_scripts/data_extraction_era5_template.sh new file mode 100644 index 0000000000000000000000000000000000000000..f856eb55e47eb89fa9dbdba96e78dbe050ecdfab --- /dev/null +++ b/video_prediction_tools/JSC_scripts/data_extraction_era5_template.sh @@ -0,0 +1,51 @@ +#!/bin/bash -x +## Controlling Batch-job +#SBATCH --account=deepacf +#SBATCH --nodes=1 +#SBATCH --ntasks=13 +##SBATCH --ntasks-per-node=13 +#SBATCH --cpus-per-task=1 +#SBATCH --output=data_extraction_era5-out.%j +#SBATCH --error=data_extraction_era5-err.%j +#SBATCH --time=04:20:00 +#SBATCH --partition=batch +#SBATCH --gres=gpu:0 +#SBATCH --mail-type=ALL +#SBATCH --mail-user=me@somewhere.com + +######### Template identifier (don't remove) ######### +echo "Do not run the template scripts" +exit 99 +######### Template identifier (don't remove) ######### + +jutil env activate -p deepacf + +# Name of virtual environment +VIRT_ENV_NAME="my_venv" + +# Loading mouldes +source ../env_setup/modules_preprocess+extract.sh +# Activate virtual environment if needed (and possible) +if [ -z ${VIRTUAL_ENV} ]; then + if [[ -f ../${VIRT_ENV_NAME}/bin/activate ]]; then + echo "Activating virtual environment..." + source ../${VIRT_ENV_NAME}/bin/activate + else + echo "ERROR: Requested virtual environment ${VIRT_ENV_NAME} not found..." + exit 1 + fi +fi + +# Declare path-variables (dest_dir will be set and configured automatically via generate_runscript.py) +source_dir=/my/path/to/era5 +destination_dir=/my/path/to/extracted/data +varmap_file=/my/path/to/varmapping/file + +years=( "2015" ) + +# Run data extraction +for year in "${years[@]}"; do + echo "Perform ERA5-data extraction for year ${year}" + srun python ../main_scripts/main_data_extraction.py --source_dir ${source_dir} --target_dir ${destination_dir} \ + --year ${year} --varslist_path ${varmap_file} +done diff --git a/video_prediction_tools/JSC_scripts/meta_postprocess_era5_template.sh b/video_prediction_tools/JSC_scripts/meta_postprocess_era5_template.sh new file mode 100644 index 0000000000000000000000000000000000000000..ec8b6eb42c0f0bef9dbc1d70701408b6fabda7f0 --- /dev/null +++ b/video_prediction_tools/JSC_scripts/meta_postprocess_era5_template.sh @@ -0,0 +1,31 @@ +#!/bin/bash -x +## Controlling Batch-job +#SBATCH --account=deepacf +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +##SBATCH --ntasks-per-node=13 +#SBATCH --cpus-per-task=1 +#SBATCH --output=meta_postprocess_era5-out.%j +#SBATCH --error=meta_postprocess_era5-err.%j +#SBATCH --time=00:20:00 +#SBATCH --partition=batch +#SBATCH --gres=gpu:0 +#SBATCH --mail-type=ALL +#SBATCH --mail-user=me@somewhere.com + +######### Template identifier (don't remove) ######### +echo "Do not run the template scripts" +exit 99 +######### Template identifier (don't remove) ######### +jutil env activate -p deepacf + + +# Declare input parameters +root_dir=/p/project/deepacf/deeprain/video_prediction_shared_folder/ +analysis_config=video_prediction_tools/meta_postprocess_config/meta_config.json +metric=mse +exp_id=test +enable_skill_scores=True + +srun python ../main_scripts/main_meta_postprocess.py --root_dir ${root_dir} --analysis_config ${analysis_config} \ + --metric ${metric} --exp_id ${exp_id} --enable_skill_scores ${enable_skill_scores} diff --git a/video_prediction_tools/JSC_scripts/preprocess_data_era5_step1_template.sh b/video_prediction_tools/JSC_scripts/preprocess_data_era5_step1_template.sh new file mode 100644 index 0000000000000000000000000000000000000000..80d4de5266bc57c944bd57ffa5359512b4f23a4b --- /dev/null +++ b/video_prediction_tools/JSC_scripts/preprocess_data_era5_step1_template.sh @@ -0,0 +1,59 @@ +#!/bin/bash -x +## Controlling Batch-job +#SBATCH --account=deepacf +#SBATCH --nodes=1 +#SBATCH --ntasks=13 +##SBATCH --ntasks-per-node=12 +#SBATCH --cpus-per-task=1 +#SBATCH --output=DataPreprocess_era5_step1-out.%j +#SBATCH --error=DataPreprocess_era5_step1-err.%j +#SBATCH --time=04:20:00 +#SBATCH --gres=gpu:0 +#SBATCH --partition=batch +#SBATCH --mail-type=ALL +#SBATCH --mail-user=me@somewhere.com + +######### Template identifier (don't remove) ######### +echo "Do not run the template scripts" +exit 99 +######### Template identifier (don't remove) ######### + +# Name of virtual environment +VIRT_ENV_NAME="my_venv" + +# Activate virtual environment if needed (and possible) +if [ -z ${VIRTUAL_ENV} ]; then + if [[ -f ../virtual_envs/${VIRT_ENV_NAME}/bin/activate ]]; then + echo "Activating virtual environment..." + source ../virtual_envs/${VIRT_ENV_NAME}/bin/activate + else + echo "ERROR: Requested virtual environment ${VIRT_ENV_NAME} not found..." + exit 1 + fi +fi +# Loading mouldes +source ../env_setup/modules_preprocess+extract.sh + + +# select years and variables for dataset and define target domain +years=( "2015" ) +variables=( "t2" "t2" "t2" ) +sw_corner=( -999.9 -999.9) +nyx=( -999 -999 ) + +# set some paths +# note, that destination_dir is adjusted during runtime based on the data +source_dir=/my/path/to/extracted/data/ +destination_dir=/my/path/to/pickle/files + +# execute Python-scripts +for year in "${years[@]}"; do + echo "start preprocessing data for year ${year}" + srun python ../main_scripts/main_preprocess_data_step1.py \ + --source_dir ${source_dir} --destination_dir ${destination_dir} --years "${year}" \ + --vars "${variables[0]}" "${variables[1]}" "${variables[2]}" \ + --sw_corner "${sw_corner[0]}" "${sw_corner[1]}" --nyx "${nyx[0]}" "${nyx[1]}" +done + + +#srun python ../../workflow_parallel_frame_prediction/DataPreprocess/mpi_split_data_multi_years.py --destination_dir ${destination_dir} --varnames T2 MSL gph500 diff --git a/video_prediction_tools/JSC_scripts/preprocess_data_era5_step2_template.sh b/video_prediction_tools/JSC_scripts/preprocess_data_era5_step2_template.sh new file mode 100644 index 0000000000000000000000000000000000000000..e0440dff5ab507f0ba475485781ab63283f8f4dc --- /dev/null +++ b/video_prediction_tools/JSC_scripts/preprocess_data_era5_step2_template.sh @@ -0,0 +1,57 @@ +#!/bin/bash -x +#SBATCH --account=deepacf +#SBATCH --nodes=1 +#SBATCH --ntasks=13 +##SBATCH --ntasks-per-node=13 +#SBATCH --cpus-per-task=1 +#SBATCH --output=DataPreprocess_era5_step2-out.%j +#SBATCH --error=DataPreprocess_era5_step2-err.%j +#SBATCH --time=04:00:00 +#SBATCH --gres=gpu:0 +#SBATCH --partition=batch +#SBATCH --mail-type=ALL +#SBATCH --mail-user=me@somewhere.com + +######### Template identifier (don't remove) ######### +echo "Do not run the template scripts" +exit 99 +######### Template identifier (don't remove) ######### + +# auxiliary variables +WORK_DIR="$(pwd)" +BASE_DIR=$(dirname "$WORK_DIR") +# Name of virtual environment +VIRT_ENV_NAME="my_venv" +# Name of container image (must be available in working directory) +CONTAINER_IMG="${WORK_DIR}/tensorflow_21.09-tf1-py3.sif" +WRAPPER="${BASE_DIR}/env_setup/wrapper_container.sh" + +# sanity checks +if [[ ! -f ${CONTAINER_IMG} ]]; then + echo "ERROR: Cannot find required TF1.15 container image '${CONTAINER_IMG}'." + exit 1 +fi + +if [[ ! -f ${WRAPPER} ]]; then + echo "ERROR: Cannot find wrapper-script '${WRAPPER}' for TF1.15 container image." + exit 1 +fi + +# clean-up modules to avoid conflicts between host and container settings +module purge + +# declare directory-variables which will be modified by config_runscript.py +source_dir=/my/path/to/pkl/files/ +destination_dir=/my/path/to/tfrecords/files + +sequence_length=20 +sequences_per_file=10 +# run Preprocessing (step 2 where Tf-records are generated) +# run postprocessing/generation of model results including evaluation metrics +export CUDA_VISIBLE_DEVICES=0 +## One node, single GPU +srun --mpi=pspmix --cpu-bind=none \ + singularity exec --nv "${CONTAINER_IMG}" "${WRAPPER}" ${VIRT_ENV_NAME} \ + python3 ../main_scripts/main_preprocess_data_step2.py -source_dir ${source_dir} -dest_dir ${destination_dir} \ + -sequence_length ${sequence_length} -sequences_per_file ${sequences_per_file} + diff --git a/video_prediction_tools/HPC_scripts/preprocess_data_moving_mnist_template.sh b/video_prediction_tools/JSC_scripts/preprocess_data_moving_mnist_template.sh similarity index 100% rename from video_prediction_tools/HPC_scripts/preprocess_data_moving_mnist_template.sh rename to video_prediction_tools/JSC_scripts/preprocess_data_moving_mnist_template.sh diff --git a/video_prediction_tools/JSC_scripts/train_model_era5_template.sh b/video_prediction_tools/JSC_scripts/train_model_era5_template.sh new file mode 100755 index 0000000000000000000000000000000000000000..8d9d7d0e8780cc5152f8e22106b878caa6ee8e83 --- /dev/null +++ b/video_prediction_tools/JSC_scripts/train_model_era5_template.sh @@ -0,0 +1,57 @@ +#!/bin/bash -x +#SBATCH --account=deepacf +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --output=train_model_era5-out.%j +#SBATCH --error=train_model_era5-err.%j +#SBATCH --time=24:00:00 +#SBATCH --gres=gpu:1 +#SBATCH --partition=some_partition +#SBATCH --mail-type=ALL +#SBATCH --mail-user=me@somewhere.com + +######### Template identifier (don't remove) ######### +echo "Do not run the template scripts" +exit 99 +######### Template identifier (don't remove) ######### + +# auxiliary variables +WORK_DIR="$(pwd)" +BASE_DIR=$(dirname "$WORK_DIR") +# Name of virtual environment +VIRT_ENV_NAME="my_venv" +# Name of container image (must be available in working directory) +CONTAINER_IMG="${WORK_DIR}/tensorflow_21.09-tf1-py3.sif" +WRAPPER="${BASE_DIR}/env_setup/wrapper_container.sh" + +# sanity checks +if [[ ! -f ${CONTAINER_IMG} ]]; then + echo "ERROR: Cannot find required TF1.15 container image '${CONTAINER_IMG}'." + exit 1 +fi + +if [[ ! -f ${WRAPPER} ]]; then + echo "ERROR: Cannot find wrapper-script '${WRAPPER}' for TF1.15 container image." + exit 1 +fi + +# clean-up modules to avoid conflicts between host and container settings +module purge + +# declare directory-variables which will be modified by generate_runscript.py +source_dir=/my/path/to/tfrecords/files +destination_dir=/my/model/output/path + +# valid identifiers for model-argument are: convLSTM, savp, mcnet and vae +model=convLSTM +datasplit_dict=${destination_dir}/data_split.json +model_hparams=${destination_dir}/model_hparams.json + +# run training in container +export CUDA_VISIBLE_DEVICES=0 +## One node, single GPU +srun --mpi=pspmix --cpu-bind=none \ + singularity exec --nv "${CONTAINER_IMG}" "${WRAPPER}" ${VIRT_ENV_NAME} \ + python3 "${BASE_DIR}"/main_scripts/main_train_models.py --input_dir ${source_dir} --datasplit_dict ${datasplit_dict} \ + --dataset era5 --model ${model} --model_hparams_dict ${model_hparams} --output_dir ${destination_dir}/ + diff --git a/video_prediction_tools/HPC_scripts/train_model_moving_mnist_template.sh b/video_prediction_tools/JSC_scripts/train_model_moving_mnist_template.sh similarity index 100% rename from video_prediction_tools/HPC_scripts/train_model_moving_mnist_template.sh rename to video_prediction_tools/JSC_scripts/train_model_moving_mnist_template.sh diff --git a/video_prediction_tools/JSC_scripts/visualize_postprocess_era5_template.sh b/video_prediction_tools/JSC_scripts/visualize_postprocess_era5_template.sh new file mode 100644 index 0000000000000000000000000000000000000000..be3e67c03f8384de39e9d193ad206e44695282df --- /dev/null +++ b/video_prediction_tools/JSC_scripts/visualize_postprocess_era5_template.sh @@ -0,0 +1,58 @@ +#!/bin/bash -x +#SBATCH --account=deepacf +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +##SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=1 +#SBATCH --output=postprocess_era5-out.%j +#SBATCH --error=postprocess_era5-err.%j +#SBATCH --time=01:00:00 +#SBATCH --gres=gpu:1 +#SBATCH --partition=gpus +#SBATCH --mail-type=ALL +#SBATCH --mail-user=me@somewhere.com + +######### Template identifier (don't remove) ######### +echo "Do not run the template scripts" +exit 99 +######### Template identifier (don't remove) ######### + +# auxiliary variables +WORK_DIR="$(pwd)" +BASE_DIR=$(dirname "$WORK_DIR") +# Name of virtual environment +VIRT_ENV_NAME="my_venv" +# Name of container image (must be available in working directory) +CONTAINER_IMG="${WORK_DIR}/tensorflow_21.09-tf1-py3.sif" +WRAPPER="${BASE_DIR}/env_setup/wrapper_container.sh" + +# sanity checks +if [[ ! -f ${CONTAINER_IMG} ]]; then + echo "ERROR: Cannot find required TF1.15 container image '${CONTAINER_IMG}'." + exit 1 +fi + +if [[ ! -f ${WRAPPER} ]]; then + echo "ERROR: Cannot find wrapper-script '${WRAPPER}' for TF1.15 container image." + exit 1 +fi + +# clean-up modules to avoid conflicts between host and container settings +module purge + +# declare directory-variables which will be modified by generate_runscript.py +# Note: source_dir is only needed for retrieving the base-directory +source_dir=/my/source/dir/ +checkpoint_dir=/my/trained/model/dir +results_dir=/my/results/dir +lquick="" + +# run postprocessing/generation of model results including evaluation metrics +export CUDA_VISIBLE_DEVICES=0 +## One node, single GPU +srun --mpi=pspmix --cpu-bind=none \ + singularity exec --nv "${CONTAINER_IMG}" "${WRAPPER}" ${VIRT_ENV_NAME} \ + python3 ../main_scripts/main_visualize_postprocess.py --checkpoint ${checkpoint_dir} --mode test \ + --results_dir ${results_dir} --batch_size 4 \ + --num_stochastic_samples 1 ${lquick} \ + > postprocess_era5-out_all."${SLURM_JOB_ID}" diff --git a/video_prediction_tools/HPC_scripts/visualize_postprocess_moving_mnist_template.sh b/video_prediction_tools/JSC_scripts/visualize_postprocess_moving_mnist_template.sh similarity index 100% rename from video_prediction_tools/HPC_scripts/visualize_postprocess_moving_mnist_template.sh rename to video_prediction_tools/JSC_scripts/visualize_postprocess_moving_mnist_template.sh diff --git a/video_prediction_tools/env_setup/requirements.txt b/video_prediction_tools/env_setup/requirements.txt index 28b7c6f83865095745ccab685b08c60aba8a71f9..35f6eb2439047f0697b04a0bac87bfb361fa0790 100755 --- a/video_prediction_tools/env_setup/requirements.txt +++ b/video_prediction_tools/env_setup/requirements.txt @@ -6,4 +6,8 @@ basemap==1.3.0 numpy==1.17.3 # although this numpy-version is in the container, we set it here to avoid any further installation scikit-image==0.18.1 opencv-python-headless==4.2.0.34 -netcdf4 +netcdf4==1.5.8 +metadata==0.2 +normalization==0.4 +utils==1.0.1 + diff --git a/video_prediction_tools/env_setup/requirements_noHPC.txt b/video_prediction_tools/env_setup/requirements_noHPC.txt deleted file mode 100755 index 44060174992ed2035c5c8ccd31eedbefce296d02..0000000000000000000000000000000000000000 --- a/video_prediction_tools/env_setup/requirements_noHPC.txt +++ /dev/null @@ -1,7 +0,0 @@ -opencv-python==4.2.0.34 -hickle -mpi4py -netCDF4 -numpy -h5py -tensorflow-gpu==1.13.1 \ No newline at end of file diff --git a/video_prediction_tools/nonHPC_scripts/data_extraction_era5_template.sh b/video_prediction_tools/nonHPC_scripts/data_extraction_era5_template.sh deleted file mode 100644 index e208fd69322cb82938482c745e311c0eb0d4fe11..0000000000000000000000000000000000000000 --- a/video_prediction_tools/nonHPC_scripts/data_extraction_era5_template.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -x - -######### Template identifier (don't remove) ######### -echo "Do not run the template scripts" -exit 99 -######### Template identifier (don't remove) ######### - -mpirun -np 4 python ../../workflow_parallel_frame_prediction/DataExtraction/mpi_stager_v2.py --source_dir /home/b.gong/data_era5/2017/ --destination_dir /home/${USER}/extractedData/2017 diff --git a/video_prediction_tools/nonHPC_scripts/preprocess_data_era5_step1_template.sh b/video_prediction_tools/nonHPC_scripts/preprocess_data_era5_step1_template.sh deleted file mode 100644 index 172763151557108664c8ba8c9b9028ea2aa5f4f0..0000000000000000000000000000000000000000 --- a/video_prediction_tools/nonHPC_scripts/preprocess_data_era5_step1_template.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -x - -######### Template identifier (don't remove) ######### -echo "Do not run the template scripts" -exit 99 -######### Template identifier (don't remove) ######### - -# select years for dataset -declare -a years=( - "2017" - ) - -max_year=`echo "${years[*]}" | sort -nr | head -n1` -min_year=`echo "${years[*]}" | sort -nr | tail -n1` -# set some paths -# note, that destination_dir is used during runtime to set a proper experiment directory -exp_id=xxx # experiment identifier is set by 'generate_workflow_runscripts.sh' -source_dir=${SAVE_DIR}/extractedData -destination_dir=${SAVE_DIR}/preprocessedData/era5-Y${min_year}to${max_year}M01to12 -script_dir=`pwd` - -for year in "${years[@]}"; - do - echo "Year $year" - echo "source_dir ${source_dir}/${year}" - mpirun -np 2 python ../../workflow_parallel_frame_prediction/DataPreprocess/mpi_stager_v2_process_netCDF.py \ - --source_dir ${source_dir} -scr_dir ${script_dir} -exp_dir ${exp_id} \ - --destination_dir ${destination_dir} --years ${years} --vars T2 MSL gph500 --lat_s 74 --lat_e 202 --lon_s 550 --lon_e 710 - done -python ../../workflow_parallel_frame_prediction/DataPreprocess/mpi_split_data_multi_years.py --destination_dir ${destination_dir} --varnames T2 MSL gph500 - - - - diff --git a/video_prediction_tools/nonHPC_scripts/preprocess_data_era5_step2_template.sh b/video_prediction_tools/nonHPC_scripts/preprocess_data_era5_step2_template.sh deleted file mode 100644 index fc92afd8fe7e8385162ba95a5e1ab3c6ceab599a..0000000000000000000000000000000000000000 --- a/video_prediction_tools/nonHPC_scripts/preprocess_data_era5_step2_template.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -x - -######### Template identifier (don't remove) ######### -echo "Do not run the template scripts" -exit 99 -######### Template identifier (don't remove) ######### - -# declare directory-variables which will be modified appropriately during Preprocessing (invoked by mpi_split_data_multi_years.py) -source_dir=/home/${USER}/preprocessedData/ -destination_dir=/home/${USER}/preprocessedData/ - - -python ../video_prediction/datasets/era5_dataset_v2.py ${source_dir}/hickle/splits ${destination_dir}/tfrecords -vars T2 MSL gph500 -height 128 -width 160 -seq_length 20 diff --git a/video_prediction_tools/nonHPC_scripts/train_model_era5_template.sh b/video_prediction_tools/nonHPC_scripts/train_model_era5_template.sh deleted file mode 100644 index c8fcf81f717433a10a8dd1e8652a743e5e8b5332..0000000000000000000000000000000000000000 --- a/video_prediction_tools/nonHPC_scripts/train_model_era5_template.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -x - -######### Template identifier (don't remove) ######### -echo "Do not run the template scripts" -exit 99 -######### Template identifier (don't remove) ######### - -# declare directory-variables which will be modified appropriately during Preprocessing (invoked by mpi_split_data_multi_years.py) -source_dir=/home/${USER}/preprocessedData/ -destination_dir=/home/${USER}/models/ - -# valid identifiers for model-argument are: convLSTM, savp, mcnet and vae -model=mcnet -model_hparams=../hparams/era5/model_hparams.json -destination_dir=${destination_dir}/${model}/"$(date +"%Y%m%dT%H%M")_"$USER"/" - -# run training -python ../scripts/train_dummy.py --input_dir ${source_dir}/tfrecords/ --dataset era5 --model ${model} --model_hparams_dict ${model_hparams} --output_dir ${destination_dir} - diff --git a/video_prediction_tools/nonHPC_scripts/visualize_postprocess_era5_template.sh b/video_prediction_tools/nonHPC_scripts/visualize_postprocess_era5_template.sh deleted file mode 100644 index 72768e241ce5ce612a6775668610ab1456436201..0000000000000000000000000000000000000000 --- a/video_prediction_tools/nonHPC_scripts/visualize_postprocess_era5_template.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -x - -######### Template identifier (don't remove) ######### -echo "Do not run the template scripts" -exit 99 -######### Template identifier (don't remove) ######### - -# declare directory-variables which will be modified appropriately during Preprocessing (invoked by mpi_split_data_multi_years.py) -source_dir=/home/${USER}/preprocessedData/ -checkpoint_dir=/home/${USER}/models/ -results_dir=/home/${USER}/results/ - -# for choosing the model -model=mcnet - -# execute respective Python-script -python -u ../scripts/generate_transfer_learning_finetune.py \ ---input_dir ${source_dir}/tfrecords \ ---dataset_hparams sequence_length=20 --checkpoint ${checkpoint_dir}/${model} \ ---mode test --results_dir ${results_dir} \ ---batch_size 2 --dataset era5 > generate_era5-out.out - -#srun python scripts/train.py --input_dir data/era5 --dataset era5 --model savp --model_hparams_dict hparams/kth/ours_savp/model_hparams.json --output_dir logs/era5/ours_savp