Skip to content
Snippets Groups Projects
Commit 2e2efc35 authored by Michael Langguth's avatar Michael Langguth
Browse files

Merge branch 'michael_issue#019_run_e4' into develop

parents 465bc8a5 3a160ed2
No related branches found
No related tags found
No related merge requests found
#!/bin/bash -x
#SBATCH --partition=casc-hw
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=8
#SBATCH --time=00:10:00
#SBATCH --output=train_unet-model-out.%j
#SBATCH --error=train_unet-model-err.%j
# auxiliary variables
WORK_DIR="$(pwd)"
BASE_DIR=$(dirname "$WORK_DIR")
# Name of virtual environment
VIRT_ENV_NAME="test"
CONTAINER_IMG="${BASE_DIR}/env_setup/tensorflow_22.02-tf2-py3.sif"
WRAPPER="${BASE_DIR}/env_setup/wrapper_container.sh"
# sanity checks
if [[ ! -f ${CONTAINER_IMG} ]]; then
echo "ERROR: Cannot find required TF2.7.0 container image '${CONTAINER_IMG}'."
exit 1
fi
if [[ ! -f ${WRAPPER} ]]; then
echo "ERROR: Cannot find wrapper-script '${WRAPPER}' for TF2.7.0 container image."
exit 1
fi
# purge modules to rely on settings in container
module purge
ml slurm
ml go-1.17.6/singularity-3.9.5
# declare directory-variables which will be modified by config_runscript.py
source_dir=/p/project/deepacf/maelstrom/data/downscaling_unet/
destination_dir=/p/project/deepacf/maelstrom/langguth1/downscaling_jsc_repo/downscaling_unet/trained_models/
#srun --mpi=pspmix --cpu-bind=none \
srun --cpu-bind=none \
singularity exec --nv "${CONTAINER_IMG}" "${WRAPPER}" ${VIRT_ENV_NAME} \
python3 ../main_scripts/main_train.py -in ${source_dir} -out ${destination_dir} -id ${SLURM_JOBID}
#!/bin/bash -x
#SBATCH --partition=ice-nc
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=1
#SBATCH --mem=40G
#SBATCH --time=01:00:00
#SBATCH --output=train_unet-model-out.%j
#SBATCH --error=train_unet-model-err.%j
# auxiliary variables
WORK_DIR="$(pwd)"
BASE_DIR=$(dirname "$WORK_DIR")
# Name of virtual environment
VIRT_ENV_NAME="test"
# Loading mouldes
#source ../env_setup/modules_train.sh
ml slurm
ml nvidia/cuda-11.2
export PYTHONPATH=${BASE_DIR}:$PYTHONPATH
export PYTHONPATH=${BASE_DIR}/utils:$PYTHONPATH
export PYTHONPATH=${BASE_DIR}/handle_data:$PYTHONPATH
export PYTHONPATH=${BASE_DIR}/models:$PYTHONPATH
export PYTHONPATH=${BASE_DIR}/postprocess:$PYTHONPATH
echo ${PYTHONPATH}
# Activate virtual environment if needed (and possible)
#if [ -z ${VIRTUAL_ENV} ]; then
# if [[ -f ../virtual_envs/${VIRT_ENV_NAME}/bin/activate ]]; then
# echo "Activating virtual environment..."
# source ../virtual_envs/${VIRT_ENV_NAME}/bin/activate
# else
# echo "ERROR: Requested virtual environment ${VIRT_ENV_NAME} not found..."
# exit 1
# fi
# fi
# declare directory-variables which will be modified by config_runscript.py
source_dir=/data/maelstrom/langguth1/
destination_dir=/data/maelstrom/langguth1/trained_models/
srun python3 ../main_scripts/main_train.py -in ${source_dir} -out ${destination_dir} -id ${SLURM_JOBID}
File added
File added
#!/usr/bin/env bash
# basic directory variables
ENV_SETUP_DIR=`pwd`
WORKING_DIR="$(dirname "$ENV_SETUP_DIR")"
EXE_DIR="$(basename "$ENV_SETUP_DIR")"
VENV_DIR=$WORKING_DIR/virtual_envs/$1
shift # replaces $1 by $2, so that $@ does not include the name of the virtual environment anymore
if ! [[ -d "${VENV_DIR}" ]]; then
echo "ERROR: Could not found virtual environment under ${VENV_DIR}!"
exit
fi
ml ml go-1.17.6/singularity-3.9.5
# unset PYTHONPATH and activate virtual environment
unset PYTHONPATH
source ${VENV_DIR}/bin/activate
# Control
echo "****** Check PYTHONPATH *****"
echo $PYTHONPATH
# MPI related environmental variables
export PMIX_SECURITY_MODE="native" # default would include munge which is unavailable
$@
......@@ -5,6 +5,8 @@ __update__ = "2022-02-01"
import os, sys
import argparse
from datetime import datetime as dt
print("Start with importing packages at {0}".format(dt.strftime(dt.now(), "%Y-%m-%d %H:%M:%S")))
from timeit import default_timer as timer
import json as js
import numpy as np
......@@ -37,6 +39,7 @@ def main(parser_args):
# initialize benchmarking object
bm_obj = BenchmarkCSV(os.path.join(os.getcwd(), "benchmark_training.csv"))
# read and normalize data for training
print("Start reading data from disk...")
data_obj = HandleUnetData(datadir, "training", purpose="train_aug")
data_obj.append_data("validation", purpose="val_aug")
......@@ -137,7 +140,13 @@ def main(parser_args):
js.dump(stat_info, jsf)
print("Finished job at {0}".format(dt.strftime(dt.now(), "%Y-%m-%d %H:%M:%S")))
if __name__ == "__main__":
# date format for logging
fmt_t = "%Y-%m-%d %H:%:%S"
parser = argparse.ArgumentParser()
parser.add_argument("--input_dir", "-in", dest="input_dir", type=str, required=True,
help="Directory where input netCDF-files are stored.")
......@@ -155,6 +164,8 @@ if __name__ == "__main__":
"(see Sha et al., 2020)")
args = parser.parse_args()
print("Start running main-task at {0}".format(dt.strftime(dt.now(), fmt_t)))
main(args)
print("Ended running main-task at {0}".format(dt.strftime(dt.now(), fmt_t)))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment