Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
MLAir
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
esde
machine-learning
MLAir
Merge requests
!514
Resolve "load era5 data from toar db"
Code
Review changes
Check out branch
Download
Patches
Plain diff
Merged
Resolve "load era5 data from toar db"
lukas_issue449_refac_load-era5-data-from-toar-db
into
develop
Overview
0
Commits
4
Pipelines
5
Changes
13
Merged
Ghost User
requested to merge
lukas_issue449_refac_load-era5-data-from-toar-db
into
develop
2 years ago
Overview
0
Commits
4
Pipelines
5
Changes
13
Expand
Closes
#449 (closed)
0
0
Merge request reports
Compare
develop
version 3
de954f6a
1 year ago
version 2
9b22c484
2 years ago
version 1
2bb2a975
2 years ago
develop (base)
and
latest version
latest version
7b9eb391
4 commits,
1 year ago
version 3
de954f6a
3 commits,
1 year ago
version 2
9b22c484
2 commits,
2 years ago
version 1
2bb2a975
1 commit,
2 years ago
13 files
+
190
−
172
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
Files
13
Search (e.g. *.vue) (Ctrl+P)
mlair/data_handler/data_handler_single_station.py
+
5
−
64
Options
@@ -336,7 +336,7 @@ class DataHandlerSingleStation(AbstractDataHandler):
os
.
remove
(
file_name
)
if
os
.
path
.
exists
(
meta_file
):
os
.
remove
(
meta_file
)
data
,
meta
=
self
.
download_data
(
file_name
,
meta_file
,
station
,
statistics_per_var
,
sampling
,
data
,
meta
=
data_sources
.
download_data
(
file_name
,
meta_file
,
station
,
statistics_per_var
,
sampling
,
store_data_locally
=
store_data_locally
,
data_origin
=
data_origin
,
time_dim
=
self
.
time_dim
,
target_dim
=
self
.
target_dim
,
iter_dim
=
self
.
iter_dim
)
logging
.
debug
(
f
"
loaded new data
"
)
@@ -350,75 +350,16 @@ class DataHandlerSingleStation(AbstractDataHandler):
except
FileNotFoundError
as
e
:
logging
.
debug
(
e
)
logging
.
debug
(
f
"
load new data
"
)
data
,
meta
=
self
.
download_data
(
file_name
,
meta_file
,
station
,
statistics_per_var
,
sampling
,
store_data_locally
=
store_data_locally
,
data_origin
=
data_origin
,
time_dim
=
self
.
time_dim
,
target_dim
=
self
.
target_dim
,
iter_dim
=
self
.
iter_dim
)
data
,
meta
=
data_sources
.
download_data
(
file_name
,
meta_file
,
station
,
statistics_per_var
,
sampling
,
store_data_locally
=
store_data_locally
,
data_origin
=
data_origin
,
time_dim
=
self
.
time_dim
,
target_dim
=
self
.
target_dim
,
iter_dim
=
self
.
iter_dim
)
logging
.
debug
(
"
loading finished
"
)
# create slices and check for negative concentration.
data
=
self
.
_slice_prep
(
data
,
start
=
start
,
end
=
end
)
data
=
self
.
check_for_negative_concentrations
(
data
)
return
data
,
meta
def
download_data
(
self
,
file_name
:
str
,
meta_file
:
str
,
station
,
statistics_per_var
,
sampling
,
store_data_locally
=
True
,
data_origin
:
Dict
=
None
,
time_dim
=
DEFAULT_TIME_DIM
,
target_dim
=
DEFAULT_TARGET_DIM
,
iter_dim
=
DEFAULT_ITER_DIM
)
->
[
xr
.
DataArray
,
pd
.
DataFrame
]:
"""
Download data from TOAR database using the JOIN interface or load local era5 data.
Data is transformed to a xarray dataset. If class attribute store_data_locally is true, data is additionally
stored locally using given names for file and meta file.
:param file_name: name of file to save data to (containing full path)
:param meta_file: name of the meta data file (also containing full path)
:return: downloaded data and its meta data
"""
df_all
=
{}
df_era5
,
df_toar
=
None
,
None
meta_era5
,
meta_toar
=
None
,
None
if
data_origin
is
not
None
:
era5_origin
=
filter_dict_by_value
(
data_origin
,
"
era5
"
,
True
)
era5_stats
=
select_from_dict
(
statistics_per_var
,
era5_origin
.
keys
())
toar_origin
=
filter_dict_by_value
(
data_origin
,
"
era5
"
,
False
)
toar_stats
=
select_from_dict
(
statistics_per_var
,
era5_origin
.
keys
(),
filter_cond
=
False
)
assert
len
(
era5_origin
)
+
len
(
toar_origin
)
==
len
(
data_origin
)
assert
len
(
era5_stats
)
+
len
(
toar_stats
)
==
len
(
statistics_per_var
)
else
:
era5_origin
,
toar_origin
=
None
,
None
era5_stats
,
toar_stats
=
statistics_per_var
,
statistics_per_var
# load data
if
era5_origin
is
not
None
and
len
(
era5_stats
)
>
0
:
# load era5 data
df_era5
,
meta_era5
=
data_sources
.
era5
.
load_era5
(
station_name
=
station
,
stat_var
=
era5_stats
,
sampling
=
sampling
,
data_origin
=
era5_origin
)
if
toar_origin
is
None
or
len
(
toar_stats
)
>
0
:
# load combined data from toar-data (v2 & v1)
df_toar
,
meta_toar
=
data_sources
.
toar_data
.
download_toar
(
station
=
station
,
toar_stats
=
toar_stats
,
sampling
=
sampling
,
data_origin
=
toar_origin
)
if
df_era5
is
None
and
df_toar
is
None
:
raise
data_sources
.
toar_data
.
EmptyQueryResult
(
f
"
No data available for era5 and toar-data
"
)
df
=
pd
.
concat
([
df_era5
,
df_toar
],
axis
=
1
,
sort
=
True
)
if
meta_era5
is
not
None
and
meta_toar
is
not
None
:
meta
=
meta_era5
.
combine_first
(
meta_toar
)
else
:
meta
=
meta_era5
if
meta_era5
is
not
None
else
meta_toar
meta
.
loc
[
"
data_origin
"
]
=
str
(
data_origin
)
meta
.
loc
[
"
statistics_per_var
"
]
=
str
(
statistics_per_var
)
df_all
[
station
[
0
]]
=
df
# convert df_all to xarray
xarr
=
{
k
:
xr
.
DataArray
(
v
,
dims
=
[
time_dim
,
target_dim
])
for
k
,
v
in
df_all
.
items
()}
xarr
=
xr
.
Dataset
(
xarr
).
to_array
(
dim
=
iter_dim
)
if
store_data_locally
is
True
:
# save locally as nc/csv file
xarr
.
to_netcdf
(
path
=
file_name
)
meta
.
to_csv
(
meta_file
)
return
xarr
,
meta
@staticmethod
def
check_station_meta
(
meta
,
station
,
data_origin
,
statistics_per_var
):
"""
Loading