Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
esde
machine-learning
AMBS
Commits
5b3d7a79
Commit
5b3d7a79
authored
Dec 08, 2022
by
Bing Gong
Browse files
Merge branch 'bing_issue#188_restructure_ambs' into develop
parents
4ed09385
b445f890
Pipeline
#121747
failed with stages
in 17 seconds
Changes
153
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
Jupyter_Notebooks/Data_Preprocess_toy.ipynb
0 → 100644
View file @
5b3d7a79
This diff is collapsed.
Click to expand it.
foo.ipynb
0 → 100644
View file @
5b3d7a79
{
"cells": [
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"import xarray as xr\n",
"import numpy as np\n",
"\n",
"filenames_t850 = [\n",
" \"data_t850/temperature_850hPa_1979_5.625deg.nc\",\n",
" \"data_t850/temperature_850hPa_1980_5.625deg.nc\"\n",
"]\n",
"filenames_z500 = [\n",
" \"data_z500/geopotential_500hPa_1979_5.625deg.nc\",\n",
" \"data_z500/geopotential_500hPa_1980_5.625deg.nc\"\n",
"]\n",
"filenames = [*filenames_t850, *filenames_z500]\n",
"ds = xr.open_mfdataset(filenames, coords=\"minimal\", compat=\"override\")\n",
"ds = ds.drop_vars(\"level\")"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(32, 64, 2)"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"da = ds.to_array(dim=\"variables\").squeeze()\n",
"\n",
"dims = [\"time\", \"lat\", \"lon\", \"variables\"]\n",
"da = da.transpose(*dims)\n",
"\n",
"def generator(iterable):\n",
" iterator = iter(iterable)\n",
" yield from iterator\n",
"\n",
"da.shape[1:]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "ambs",
"language": "python",
"name": "ambs"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "341ba53bbba0a6f1cf5ae0d50bab29c5266302a4d2a8950e418cc5f54c6f95ff"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
%% Cell type:code id: tags:
```
python
import
xarray
as
xr
import
numpy
as
np
filenames_t850
=
[
"data_t850/temperature_850hPa_1979_5.625deg.nc"
,
"data_t850/temperature_850hPa_1980_5.625deg.nc"
]
filenames_z500
=
[
"data_z500/geopotential_500hPa_1979_5.625deg.nc"
,
"data_z500/geopotential_500hPa_1980_5.625deg.nc"
]
filenames
=
[
*
filenames_t850
,
*
filenames_z500
]
ds
=
xr
.
open_mfdataset
(
filenames
,
coords
=
"minimal"
,
compat
=
"override"
)
ds
=
ds
.
drop_vars
(
"level"
)
```
%% Cell type:code id: tags:
```
python
da
=
ds
.
to_array
(
dim
=
"variables"
).
squeeze
()
dims
=
[
"time"
,
"lat"
,
"lon"
,
"variables"
]
da
=
da
.
transpose
(
*
dims
)
def
generator
(
iterable
):
iterator
=
iter
(
iterable
)
yield
from
iterator
da
.
shape
[
1
:]
```
%% Output
(32, 64, 2)
%% Cell type:code id: tags:
```
python
```
test/run_pytest.sh
View file @
5b3d7a79
#!#bin/bash
# Name of virtual environment
#VIRT_ENV_NAME="vp_new_structure"
VIRT_ENV_NAME
=
"env_hdfml"
# Name of virtual environment
VIRT_ENV_NAME
=
"venv2_hdfml"
if
[
-z
${
VIRTUAL_ENV
}
]
;
then
if
[[
-f
../video_prediction_tools/
${
VIRT_ENV_NAME
}
/bin/activate
]]
;
then
...
...
@@ -21,6 +20,7 @@ fi
#python -m pytest test_prepare_era5_data.py
##Test for preprocess_step1
#python -m pytest test_process_netCDF_v2.py
#source ../video_prediction_tools/env_setup/modules_preprocess+extract.sh
source
../video_prediction_tools/env_setup/modules_train.sh
##Test for preprocess moving mnist
#python -m pytest test_prepare_moving_mnist_data.py
...
...
@@ -33,5 +33,5 @@ source ../video_prediction_tools/env_setup/modules_train.sh
#rm /p/project/deepacf/deeprain/video_prediction_shared_folder/models/test/*
#python -m pytest test_train_model_era5.py
#python -m pytest test_vanilla_vae_model.py
python
-m
pytest test_
visualize_postprocess
.py
python
-m
pytest test_
gzprcp_data
.py
#python -m pytest test_meta_postprocess.py
test/run_pytest_era5_data_preprocess.sh
0 → 100755
View file @
5b3d7a79
# Name of virtual environment
VIRT_ENV_NAME
=
"venv_hdfml"
CONTAINER_IMG
=
"../video_prediction_tools/HPC_scripts/tensorflow_21.09-tf1-py3.sif"
WRAPPER
=
"./wrapper_container.sh"
# sanity checks
if
[[
!
-f
${
CONTAINER_IMG
}
]]
;
then
echo
"ERROR: Cannot find required TF1.15 container image '
${
CONTAINER_IMG
}
'."
exit
1
fi
if
[[
!
-f
${
WRAPPER
}
]]
;
then
echo
"ERROR: Cannot find wrapper-script '
${
WRAPPER
}
' for TF1.15 container image."
exit
1
fi
#source ../video_prediction_tools/env_setup/modules_preprocess+extract.sh
singularity
exec
--nv
"
${
CONTAINER_IMG
}
"
"
${
WRAPPER
}
"
${
VIRT_ENV_NAME
}
python3
-m
pytest test_era5_data.py
test/test_era5_data.py
View file @
5b3d7a79
__email__
=
"b.gong@fz-juelich.de"
__author__
=
"Bing Gong, Scarlet Stadtler,Michael Langguth"
__author__
=
"Bing Gong"
from
video_prediction.datasets.era5_dataset
import
*
import
pytest
import
xarray
as
xr
import
os
import
tensorflow
as
tf
import
numpy
as
np
import
json
import
datetime
input_dir
=
"/p/project/deepacf/deeprain/video_prediction_shared_folder/preprocessedData/test"
datasplit_config
=
"/p/project/deepacf/deeprain/bing/ambs/video_prediction_tools/data_split/cv_test.json"
hparams_dict_config
=
"/p/project/deepacf/deeprain/bing/ambs/video_prediction_tools/hparams/era5/convLSTM/model_hparams.json"
sequences_per_file
=
10
mode
=
"val"
input_dir
=
"/p/project/deepacf/deeprain/video_prediction_shared_folder/test_data_roshni"
datasplit_config
=
"/p/project/deepacf/deeprain/bing/ambs/video_prediction_tools/data_split/test/cv_test.json"
hparams_dict_config
=
"/p/project/deepacf/deeprain/bing/ambs/video_prediction_tools/hparams/era5/convLSTM/model_hparams_template.json"
mode
=
"test"
@
pytest
.
fixture
(
scope
=
"module"
)
def
era5_dataset_case2
():
return
ERA5Dataset
(
input_dir
=
input_dir
,
mode
=
mode
,
datasplit_config
=
datasplit_config
,
hparams_dict_config
=
hparams_dict_config
,
seed
=
1234
)
def
test_init_era5_dataset
(
era5_dataset_case
2
):
assert
era5_dataset_case2
.
hparams
.
max_epochs
==
20
assert
era5_dataset_case2
.
mode
==
mode
def
era5_dataset_case
1
(
):
return
ERA5Dataset
(
input_dir
=
input_dir
,
datasplit_config
=
datasplit_config
,
hparams_dict_config
=
hparams_dict_config
,
mode
=
"test"
,
seed
=
1234
,
nsamples_ref
=
1000
)
def
test_get_tfrecords_filesnames
(
era5_dataset_case2
):
era5_dataset_case2
.
get_tfrecords_filesnames_base_datasplit
()
assert
era5_dataset_case2
.
filenames
[
0
]
==
os
.
path
.
join
(
input_dir
,
"tfrecords"
,
"sequence_Y_2017_M_2_0_to_9.tfrecords"
)
# def test_check_pkl_tfrecords_consistency(era5_dataset_case1):
def
test_get_example_info
(
era5_dataset_case2
):
era5_dataset_case2
.
get_tfrecords_filesnames_base_datasplit
()
era5_dataset_case2
.
get_example_info
()
assert
era5_dataset_case2
.
image_shape
[
0
]
==
160
assert
era5_dataset_case2
.
image_shape
[
1
]
==
128
assert
era5_dataset_case2
.
image_shape
[
2
]
==
3
def
test_init_era5_dataset
(
era5_dataset_case1
):
era5_dataset_case1
.
get_hparams
()
assert
era5_dataset_case1
.
max_epochs
==
20
assert
era5_dataset_case1
.
mode
==
mode
assert
era5_dataset_case1
.
batch_size
==
4
def
test_get_filenames_from_datasplit
(
era5_dataset_case1
):
flname
=
os
.
path
.
join
(
era5_dataset_case1
.
input_dir
,
"era5_vars4ambs_201901.nc"
)
n_files
=
len
(
era5_dataset_case1
.
filenames
)
check
=
flname
in
era5_dataset_case1
.
filenames
assert
check
==
True
assert
n_files
==
12
def
test_make_dataset
(
era5_dataset_case1
):
# Get the data from nc files directly
data_arr
=
era5_dataset_case1
.
load_data_from_nc
()
assert
len
(
data_arr
)
!=
0
ds
=
xr
.
open_mfdataset
(
era5_dataset_case1
.
filenames
)
len_dt
=
len
(
ds
[
"time"
].
values
)
# count number of images/samples in the test dataset
da
=
ds
.
to_array
(
dim
=
"variables"
).
squeeze
()
dims
=
[
"time"
,
"lat"
,
"lon"
]
data_arr
=
np
.
squeeze
(
da
.
values
)
#[vars,samples,lat,lon]
max_vars
,
min_vars
=
da
.
max
(
dim
=
dims
).
values
,
da
.
min
(
dim
=
dims
).
values
#three dimension
print
(
"data_arr shape"
,
data_arr
.
shape
)
#normalise the data for the first variable
def
norm_var
(
x
,
min_value
,
max_value
):
return
(
x
-
min_value
)
/
(
max_value
-
min_value
)
assert
np
.
max
(
data_arr
[
0
])
==
max_vars
[
0
]
#mannualy calculate the normalization of the data
dt_norm
=
norm_var
(
data_arr
[
0
],
np
.
min
(
data_arr
[
0
]),
np
.
max
(
data_arr
[
0
]))
print
(
"dt_norm"
,
dt_norm
.
shape
)
s1
=
dt_norm
[
0
]
#the first sample, first timestamp
s2
=
dt_norm
[
23
]
#the first sample, last timestamp
s3
=
dt_norm
[
1
]
# the second sample, first timestamp
s4
=
dt_norm
[
24
]
# the second sample, last timestamp
# Get the data from make_dataset function
test_dataset
=
era5_dataset_case1
.
make_dataset
()
test_iterator
=
test_dataset
.
make_one_shot_iterator
()
# The `Iterator.string_handle()` method returns a tensor that can be evaluated
# and used to feed the `handle` placeholder.
test_handle
=
test_iterator
.
string_handle
()
iterator
=
tf
.
data
.
Iterator
.
from_string_handle
(
test_handle
,
test_dataset
.
output_types
,
test_dataset
.
output_shapes
)
inputs
=
iterator
.
get_next
()
with
tf
.
Session
()
as
sess
:
sess
.
run
(
tf
.
global_variables_initializer
())
sess
.
run
(
tf
.
local_variables_initializer
())
#get the batch size samples from dataset
dt
=
sess
.
run
(
inputs
)
#[batch_size,sequence_len,n_vars,lon,lat]
dt
.
shape
[
0
]
==
4
dt
.
shape
[
1
]
==
24
print
(
"shape of dt"
,
dt
.
shape
)
s1t
=
dt
[
0
,
0
,
0
]
s2t
=
dt
[
0
,
23
,
0
]
#get the second sample from dataset
s3t
=
dt
[
1
,
0
,
0
]
s4t
=
dt
[
1
,
23
,
0
]
#s2t = sess.run(inputs)[0,:,0]
assert
np
.
sum
(
s1
-
s1t
)
<
0.0001
assert
np
.
sum
(
s2
-
s2t
)
<
0.0001
assert
np
.
sum
(
s3
-
s3t
)
<
0.0001
assert
np
.
sum
(
s4
-
s4t
)
<
0.0001
#compare the data from nc files and make_dataset
test/test_gzprcp_data.py
0 → 100644
View file @
5b3d7a79
__email__
=
"b.gong@fz-juelich.de"
from
video_prediction.datasets.gzprcp_dataset
import
*
import
pytest
import
tensorflow
as
tf
import
xarray
as
xr
input_dir
=
"/p/largedata/jjsc42/project/deeprain/project_data/10min_AWS_prcp"
datasplit_config
=
"/p/project/deepacf/deeprain/bing/ambs/video_prediction_tools/data_split/gzprcp/datasplit.json"
hparams_dict_config
=
"/p/project/deepacf/deeprain/bing/ambs/video_prediction_tools/hparams/gzprcp/convLSTM_gan/model_hparams_template.json"
sequences_per_file
=
10
mode
=
"test"
@
pytest
.
fixture
(
scope
=
"module"
)
def
gzprcp_dataset_case1
():
dataset
=
GzprcpDataset
(
input_dir
=
input_dir
,
datasplit_config
=
datasplit_config
,
hparams_dict_config
=
hparams_dict_config
,
mode
=
"test"
,
seed
=
1234
,
nsamples_ref
=
1000
)
dataset
.
get_hparams
()
dataset
.
get_filenames_from_datasplit
()
dataset
.
load_data_from_nc
()
return
dataset
def
test_init_gzprcp_dataset
(
gzprcp_dataset_case1
):
# gzprcp_dataset_case1.get_hparams()
print
(
'gzprcp_dataset_case1.max_epochs: {}'
.
format
(
gzprcp_dataset_case1
.
max_epochs
))
print
(
'gzprcp_dataset_case1.mode: {}'
.
format
(
gzprcp_dataset_case1
.
mode
))
print
(
'gzprcp_dataset_case1.batch_size: {}'
.
format
(
gzprcp_dataset_case1
.
batch_size
))
print
(
'gzprcp_dataset_case1.k: {}'
.
format
(
gzprcp_dataset_case1
.
k
))
print
(
'gzprcp_dataset_case1.filenames: {}'
.
format
(
gzprcp_dataset_case1
.
filenames
))
assert
gzprcp_dataset_case1
.
max_epochs
==
8
assert
gzprcp_dataset_case1
.
mode
==
mode
assert
gzprcp_dataset_case1
.
batch_size
==
32
assert
gzprcp_dataset_case1
.
k
==
0.01
# assert gzprcp_dataset_case1.filenames[0] == 'GZ_prcp_2019.nc'
def
test_load_data_from_nc
(
gzprcp_dataset_case1
):
train_tf_dataset
=
gzprcp_dataset_case1
.
make_dataset
()
train_iterator
=
train_tf_dataset
.
make_one_shot_iterator
()
# The `Iterator.string_handle()` method returns a tensor that can be evaluated
# and used to feed the `handle` placeholder.
train_handle
=
train_iterator
.
string_handle
()
iterator
=
tf
.
data
.
Iterator
.
from_string_handle
(
train_handle
,
train_tf_dataset
.
output_types
,
train_tf_dataset
.
output_shapes
)
inputs
=
iterator
.
get_next
()
with
tf
.
Session
()
as
sess
:
sess
.
run
(
tf
.
global_variables_initializer
())
sess
.
run
(
tf
.
local_variables_initializer
())
for
step
in
range
(
2
):
sess
.
run
(
inputs
)
# df = xr.open_mfdataset(era5_dataset_case1.filenames)
# if __name__ == '__main__':
# dataset = ERA5Dataset(input_dir: str = None, datasplit_config: str = None, hparams_dict_config: str = None,
# mode: str = "train", seed: int = None, nsamples_ref: int = None)
# for next_element in dataset.take(2):
# # time_s = time.time()
# # tf.print(next_element.shape)
# pass
test/test_prepare_era5_data.py
View file @
5b3d7a79
...
...
@@ -4,11 +4,8 @@ __author__ = "Bing Gong"
__date__
=
"2021-03-03"
from
data_preprocess.prepare_era5_data
import
*
import
pytest
import
numpy
as
np
import
json
import
os
year
=
"2007"
...
...
@@ -23,8 +20,6 @@ def dataExtraction_case1(year=year,job_name=job_name,src_dir=src_dir,target_dir=
return
ERA5DataExtraction
(
year
,
job_name
,
src_dir
,
target_dir
,
varslist_json
)
def
test_init
(
dataExtraction_case1
):
assert
dataExtraction_case1
.
job_name
==
1
assert
dataExtraction_case1
.
src_dir
==
src_dir
...
...
video_prediction_tools/HPC_scripts/
preprocess_data_era5_step1
_template.sh
→
video_prediction_tools/HPC_scripts/
data_extraction_weatherbench
_template.sh
100644 → 100755
View file @
5b3d7a79
#!/bin/bash -x
## Controlling Batch-job
#SBATCH --account=<your_project>
#SBATCH --account=deepacf
#SBATCH --nodes=1
#SBATCH --ntasks=13
##SBATCH --ntasks-per-node=12
#SBATCH --cpus-per-task=1
#SBATCH --output=DataPreprocess_era5_step1-out.%j
#SBATCH --error=DataPreprocess_era5_step1-err.%j
#SBATCH --time=04:20:00
#SBATCH --gres=gpu:0
#SBATCH --output=log_out.%j
#SBATCH --error=log_err.%j
#SBATCH --time=00:10:00
#SBATCH --partition=batch
#SBATCH --mail-type=ALL
#SBATCH --mail-user=me@somewhere.com
######### Template identifier (don't remove) #########
echo
"Do not run the template scripts"
exit
99
######### Template identifier (don't remove) #########
ml Stages/2022
ml GCCcore/.11.2.0
ml GCC/11.2.0
ml ParaStationMPI/5.5.0-1
ml Python/3.9.6
ml SciPy-bundle/2021.10
ml xarray/0.20.1
ml netcdf4-python/1.5.7
ml dask/2021.9.1
# Name of virtual environment
VIRT_ENV_NAME
=
"my_venv"
# Activate virtual environment if needed (and possible)
"""
if [ -z
${
VIRTUAL_ENV
}
]; then
if [[ -f ../virtual_envs/
${
VIRT_ENV_NAME
}
/bin/activate ]]; then
echo "
Activating virtual environment...
"
...
...
@@ -33,27 +39,25 @@ if [ -z ${VIRTUAL_ENV} ]; then
fi
# Loading modules
source ../env_setup/modules_preprocess+extract.sh
"""
source_dir
=
/p/scratch/deepacf/inbound_data/weatherbench
destination_dir
=
/p/project/deepacf/deeprain/video_prediction_shared_folder/weatherbench_test/extracted
data_extraction_dir
=
/p/project/deepacf/deeprain/grasse/ambs/video_prediction_tools/data_preprocess
variables
=
'[{"name":"temperature","lvl":[850],"interpolation":"p"},{"name":"geopotential","lvl":[500],"interpolation":"p"}]'
years
=(
"2013"
"2014"
"2015"
"2016"
"2017"
)
# select years and variables for dataset and define target domain
years
=(
"2015"
)
variables
=(
"t2"
"t2"
"t2"
)
sw_corner
=(
-999
.9
-999
.9
)
nyx
=(
-999
-999
)
cd
${
data_extraction_dir
}
# set some paths
# note, that destination_dir is adjusted during runtime based on the data
source_dir
=
/my/path/to/extracted/data/
destination_dir
=
/my/path/to/pickle/files
# execute Python-scripts
for
year
in
"
${
years
[@]
}
"
;
do
echo
"start preprocessing data for year
${
year
}
"
srun python ../main_scripts/main_preprocess_data_step1.py
\
--source_dir
${
source_dir
}
--destination_dir
${
destination_dir
}
--years
"
${
year
}
"
\
--vars
"
${
variables
[0]
}
"
"
${
variables
[1]
}
"
"
${
variables
[2]
}
"
\
--sw_corner
"
${
sw_corner
[0]
}
"
"
${
sw_corner
[1]
}
"
--nyx
"
${
nyx
[0]
}
"
"
${
nyx
[1]
}
"
done
# Name of virtual environment
venv_dir
=
".venv"
python
-m
venv
--system-site-packages
${
venv_dir
}
.
${
venv_dir
}
/bin/activate
#pip3 install --no-cache-dir pytz
#pip3 install --no-cache-dir python-dateutil
export
PYTHONPATH
=
${
data_extraction_dir
}
:
$PYTHONPATH
export
PYTHONPATH
=
"
${
data_extraction_dir
}
/.."
:
$PYTHONPATH
python3 ../main_scripts/main_data_extraction.py
${
source_dir
}
${
dest_dir
}
${
years
[@]
}
${
variables
}
#srun python ../../workflow_parallel_frame_prediction/DataPreprocess/mpi_split_data_multi_years.py --destination_dir ${destination_dir} --varnames T2 MSL gph500
rm
-r
${
venv_dir
}
video_prediction_tools/HPC_scripts/data_extraction_
era5_
template.sh
→
video_prediction_tools/HPC_scripts/
era5_
data_extraction_template.sh
View file @
5b3d7a79
...
...
@@ -3,13 +3,13 @@
#SBATCH --account=<your_project>
#SBATCH --nodes=1
#SBATCH --ntasks=13
##SBATCH --ntasks-per-node=1
3
##SBATCH --ntasks-per-node=1
2
#SBATCH --cpus-per-task=1
#SBATCH --output=
d
ata
_e
xtraction_era5-out.%j
#SBATCH --error=
d
ata
_e
xtraction_era5-err.%j
#SBATCH --output=
D
ata
E
xtraction_era5
_step1
-out.%j
#SBATCH --error=
D
ata
E
xtraction_era5
_step1
-err.%j
#SBATCH --time=04:20:00
#SBATCH --partition=batch
#SBATCH --gres=gpu:0
#SBATCH --partition=batch
#SBATCH --mail-type=ALL
#SBATCH --mail-user=me@somewhere.com
...
...
@@ -22,7 +22,7 @@ exit 99
VIRT_ENV_NAME
=
"my_venv"
# Activate virtual environment if needed (and possible)
if
[
-z
${
VIRTUAL_ENV
}
]
;
then
if
[
-z
"
${
VIRTUAL_ENV
}
"
]
;
then
if
[[
-f
../virtual_envs/
${
VIRT_ENV_NAME
}
/bin/activate
]]
;
then
echo
"Activating virtual environment..."
source
../virtual_envs/
${
VIRT_ENV_NAME
}
/bin/activate
...
...
@@ -34,16 +34,21 @@ fi
# Loading modules
source
../env_setup/modules_preprocess+extract.sh
# Declare path-variables (dest_dir will be set and configured automatically via generate_runscript.py)
source_dir
=
/my/path/to/era5
# select years and variables for dataset and define target domain
years
=(
2017
)
months
=(
"all"
)
var_dict
=
'{"2t": {"sf": ""}, "tcc": {"sf": ""}, "t": {"ml": "p85000."}}'
sw_corner
=(
38.4 0.0
)
nyx
=(
56 92
)
# set some paths
# note, that destination_dir is adjusted during runtime based on the data
source_dir
=
/my/path/to/era5/data
destination_dir
=
/my/path/to/extracted/data
varmap_file
=
/my/path/to/varmapping/file
years
=(
"2015"
)
# execute Python-script
srun python ../main_scripts/main_era5_data_extraction.py
-src_dir
"
${
source_dir
}
"
\
-dest_dir
"
${
destination_dir
}
"
-y
"
${
years
[@]
}
"
-m
"
${
months
[@]
}
"
\
-swc
"
${
sw_corner
[@]
}
"
-nyx
"
${
nyx
[@]
}
"
-v
"
${
var_dict
}
"
# Run data extraction
for
year
in
"
${
years
[@]
}
"
;
do
echo
"Perform ERA5-data extraction for year
${
year
}
"
srun python ../main_scripts/main_data_extraction.py
--source_dir
${
source_dir
}
--target_dir
${
destination_dir
}
\
--year
${
year
}
--varslist_path
${
varmap_file
}
done
video_prediction_tools/HPC_scripts/train_model_weatherbench_template.sh
0 → 100644
View file @
5b3d7a79
#!/bin/bash -x
#SBATCH --account=<your_project>
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --output=train_model_era5-out.%j
#SBATCH --error=train_model_era5-err.%j
#SBATCH --time=24:00:00
#SBATCH --gres=gpu:1
#SBATCH --partition=some_partition
#SBATCH --mail-type=ALL
#SBATCH --mail-user=me@somewhere.com
######### Template identifier (don't remove) #########
echo
"Do not run the template scripts"
exit
99
######### Template identifier (don't remove) #########
# auxiliary variables
WORK_DIR
=
"
$(
pwd
)
"
BASE_DIR
=
$(
dirname
"
$WORK_DIR
"
)
# Name of virtual environment
VIRT_ENV_NAME
=
"my_venv"
# !!! ADAPAT DEPENDING ON USAGE OF CONTAINER !!!
# For container usage, comment in the follwoing lines
# Name of container image (must be available in working directory)
CONTAINER_IMG
=
"
${
WORK_DIR
}
/tensorflow_21.09-tf1-py3.sif"
WRAPPER
=
"
${
BASE_DIR
}
/env_setup/wrapper_container.sh"
# sanity checks
if
[[
!
-f
${
CONTAINER_IMG
}
]]
;
then
echo
"ERROR: Cannot find required TF1.15 container image '
${
CONTAINER_IMG
}
'."
exit
1
fi
if
[[
!
-f
${
WRAPPER
}
]]
;
then
echo
"ERROR: Cannot find wrapper-script '
${
WRAPPER
}
' for TF1.15 container image."
exit
1
fi
# clean-up modules to avoid conflicts between host and container settings
module purge
# declare directory-variables which will be modified by generate_runscript.py
source_dir
=
/my/path/to/tfrecords/files
destination_dir
=
/my/model/output/path
# valid identifiers for model-argument are: convLSTM, savp, mcnet and vae
model
=
convLSTM
datasplit_dict
=
${
destination_dir
}
/data_split.json
model_hparams
=
${
destination_dir
}
/model_hparams.json
# run training in container
export
CUDA_VISIBLE_DEVICES
=
0
## One node, single GPU
srun
--mpi
=
pspmix
--cpu-bind
=
none
\
singularity
exec
--nv
"
${
CONTAINER_IMG
}
"
"
${
WRAPPER
}
"
${
VIRT_ENV_NAME
}
\
python3
"
${
BASE_DIR
}
"
/main_scripts/main_train_models.py
--input_dir
${
source_dir
}
--datasplit_dict
${
datasplit_dict
}
\
--dataset
weatherbench
--model
${
model
}
--model_hparams_dict
${
model_hparams
}
--output_dir
${
destination_dir
}
/
# WITHOUT container usage, comment in the follwoing lines (and uncomment the lines above)
# Activate virtual environment if needed (and possible)
#if [ -z ${VIRTUAL_ENV} ]; then
# if [[ -f ../virtual_envs/${VIRT_ENV_NAME}/bin/activate ]]; then