Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
MLAir
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
esde
machine-learning
MLAir
Commits
ad30d036
Commit
ad30d036
authored
4 years ago
by
Felix Kleinert
Browse files
Options
Downloads
Patches
Plain Diff
Draft: inh. from DataHandlerSingleStation
parent
ebd31407
Branches
Branches containing commit
Tags
Tags containing commit
1 merge request
!259
Draft: Resolve "WRF-Datahandler should inherit from SingleStationDatahandler"
Pipeline
#61724
passed
4 years ago
Stage: test
Stage: docs
Stage: pages
Stage: deploy
Changes
1
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
mlair/data_handler/data_handler_wrf_chem.py
+90
-20
90 additions, 20 deletions
mlair/data_handler/data_handler_wrf_chem.py
with
90 additions
and
20 deletions
mlair/data_handler/data_handler_wrf_chem.py
+
90
−
20
View file @
ad30d036
...
@@ -10,16 +10,18 @@ import dask.array as da
...
@@ -10,16 +10,18 @@ import dask.array as da
import
os
import
os
from
mlair.helpers.geofunctions
import
haversine_dist
from
mlair.helpers.geofunctions
import
haversine_dist
from
mlair.helpers.helpers
import
convert2xrda
,
remove_items
from
mlair.helpers.helpers
import
convert2xrda
,
remove_items
from
mlair.helpers
import
TimeTrackingWrapper
from
typing
import
Tuple
,
Union
,
List
,
Dict
from
typing
import
Tuple
,
Union
,
List
,
Dict
from
mlair.data_handler.abstract_data_handler
import
AbstractDataHandler
from
mlair.data_handler.abstract_data_handler
import
AbstractDataHandler
from
mlair.data_handler.data_handler_single_station
import
DataHandlerSingleStation
import
logging
import
logging
import
cartopy.crs
as
ccrs
import
cartopy.crs
as
ccrs
float_np_xr
=
Union
[
float
,
np
.
ndarray
,
xr
.
DataArray
,
xr
.
Dataset
]
float_np_xr
=
Union
[
float
,
np
.
ndarray
,
xr
.
DataArray
,
xr
.
Dataset
]
class
WrfChemData
Handler
(
AbstractDataHandl
er
)
:
class
Base
WrfChemData
Load
er
:
DEFAULT_LOGICAL_TIME_COORD_NAME
=
'
Time
'
DEFAULT_LOGICAL_TIME_COORD_NAME
=
'
Time
'
DEFAULT_LOGICAL_X_COORD_NAME
=
'
x
'
DEFAULT_LOGICAL_X_COORD_NAME
=
'
x
'
DEFAULT_LOGICAL_Y_COORD_NAME
=
'
y
'
DEFAULT_LOGICAL_Y_COORD_NAME
=
'
y
'
...
@@ -32,11 +34,13 @@ class WrfChemDataHandler(AbstractDataHandler):
...
@@ -32,11 +34,13 @@ class WrfChemDataHandler(AbstractDataHandler):
DEFAULT_RECHUNK
=
{
"
XTIME
"
:
1
,
"
y
"
:
36
,
"
x
"
:
40
}
DEFAULT_RECHUNK
=
{
"
XTIME
"
:
1
,
"
y
"
:
36
,
"
x
"
:
40
}
def
__init__
(
self
,
data_path
:
str
,
common_file_starter
:
str
,
time_dim_name
:
str
=
DEFAULT_LOGICAL_TIME_COORD_NAME
,
def
__init__
(
self
,
data_path
:
str
,
common_file_starter
:
str
,
time_dim_name
:
str
=
DEFAULT_LOGICAL_TIME_COORD_NAME
,
rechunk_values
:
Dict
=
None
,
logical_x_coord_name
:
str
=
DEFAULT_LOGICAL_X_COORD_NAME
,
rechunk_values
:
Dict
=
None
,
logical_x_coord_name
:
str
=
DEFAULT_LOGICAL_X_COORD_NAME
,
logical_y_coord_name
:
str
=
DEFAULT_LOGICAL_Y_COORD_NAME
,
logical_y_coord_name
:
str
=
DEFAULT_LOGICAL_Y_COORD_NAME
,
logical_z_coord_name
:
str
=
DEFAULT_LOGICAL_Z_COORD_NAME
,
logical_z_coord_name
:
str
=
DEFAULT_LOGICAL_Z_COORD_NAME
,
physical_x_coord_name
:
str
=
DEFAULT_PHYSICAL_X_COORD_NAME
,
physical_x_coord_name
:
str
=
DEFAULT_PHYSICAL_X_COORD_NAME
,
physical_y_coord_name
:
str
=
DEFAULT_PHYSICAL_Y_COORD_NAME
physical_y_coord_name
:
str
=
DEFAULT_PHYSICAL_Y_COORD_NAME
,
physical_t_coord_name
:
str
=
DEFAULT_PHYSICAL_TIME_COORD_NAME
):
):
super
().
__init__
()
super
().
__init__
()
self
.
data_path
=
data_path
self
.
data_path
=
data_path
...
@@ -49,6 +53,7 @@ class WrfChemDataHandler(AbstractDataHandler):
...
@@ -49,6 +53,7 @@ class WrfChemDataHandler(AbstractDataHandler):
self
.
physical_x_coord_name
=
physical_x_coord_name
self
.
physical_x_coord_name
=
physical_x_coord_name
self
.
physical_y_coord_name
=
physical_y_coord_name
self
.
physical_y_coord_name
=
physical_y_coord_name
self
.
physical_t_coord_name
=
physical_t_coord_name
if
rechunk_values
is
None
:
if
rechunk_values
is
None
:
self
.
rechunk_values
=
{
self
.
time_dim_name
:
1
}
self
.
rechunk_values
=
{
self
.
time_dim_name
:
1
}
...
@@ -75,6 +80,13 @@ class WrfChemDataHandler(AbstractDataHandler):
...
@@ -75,6 +80,13 @@ class WrfChemDataHandler(AbstractDataHandler):
# data = data.assign_coords({'XTIME': data.XTIME.values})
# data = data.assign_coords({'XTIME': data.XTIME.values})
self
.
_data
=
data
self
.
_data
=
data
def
assign_coords
(
self
,
coords
,
**
coords_kwargs
):
"""
Assign coords to WrfChemDataHandler._data
"""
self
.
_data
=
self
.
_data
.
assign_coords
(
coords
,
**
coords_kwargs
)
def
rechunk_data
(
self
,
chunks
=
None
,
name_prefix
=
'
xarray-
'
,
token
=
None
,
lock
=
False
):
def
rechunk_data
(
self
,
chunks
=
None
,
name_prefix
=
'
xarray-
'
,
token
=
None
,
lock
=
False
):
self
.
_data
=
self
.
_data
.
chunk
(
chunks
=
chunks
,
name_prefix
=
name_prefix
,
token
=
token
,
lock
=
lock
)
self
.
_data
=
self
.
_data
.
chunk
(
chunks
=
chunks
,
name_prefix
=
name_prefix
,
token
=
token
,
lock
=
lock
)
...
@@ -114,7 +126,7 @@ class WrfChemDataHandler(AbstractDataHandler):
...
@@ -114,7 +126,7 @@ class WrfChemDataHandler(AbstractDataHandler):
return
dist
.
argmin
(
dim
)
return
dist
.
argmin
(
dim
)
class
DataHandler
SingleGridCo
u
lumn
(
WrfChemData
Handl
er
):
class
SingleGridColumn
WrfChemDataLoader
(
Base
WrfChemData
Load
er
):
DEFAULT_MODEL
=
"
WRF-Chem
"
DEFAULT_MODEL
=
"
WRF-Chem
"
DEFAULT_VAR_ALL_DICT
=
{
'
o3
'
:
'
dma8eu
'
,
'
relhum
'
:
'
average_values
'
,
'
temp
'
:
'
maximum
'
,
'
u
'
:
'
average_values
'
,
DEFAULT_VAR_ALL_DICT
=
{
'
o3
'
:
'
dma8eu
'
,
'
relhum
'
:
'
average_values
'
,
'
temp
'
:
'
maximum
'
,
'
u
'
:
'
average_values
'
,
'
v
'
:
'
average_values
'
,
'
no
'
:
'
dma8eu
'
,
'
no2
'
:
'
dma8eu
'
,
'
cloudcover
'
:
'
average_values
'
,
'
v
'
:
'
average_values
'
,
'
no
'
:
'
dma8eu
'
,
'
no2
'
:
'
dma8eu
'
,
'
cloudcover
'
:
'
average_values
'
,
...
@@ -146,6 +158,8 @@ class DataHandlerSingleGridCoulumn(WrfChemDataHandler):
...
@@ -146,6 +158,8 @@ class DataHandlerSingleGridCoulumn(WrfChemDataHandler):
self
.
external_coords_file
=
external_coords_file
self
.
external_coords_file
=
external_coords_file
self
.
open_data
()
self
.
open_data
()
self
.
assign_coords
(
{
self
.
physical_t_coord_name
:
(
self
.
time_dim_name
,
self
.
_data
[
self
.
physical_t_coord_name
].
values
)})
if
external_coords_file
is
not
None
:
if
external_coords_file
is
not
None
:
self
.
_apply_external_coordinates
()
self
.
_apply_external_coordinates
()
self
.
rechunk_data
(
self
.
rechunk_values
)
self
.
rechunk_data
(
self
.
rechunk_values
)
...
@@ -212,6 +226,62 @@ class DataHandlerSingleGridCoulumn(WrfChemDataHandler):
...
@@ -212,6 +226,62 @@ class DataHandlerSingleGridCoulumn(WrfChemDataHandler):
return
{
k
:
list
(
v
.
values
)
for
k
,
v
in
self
.
_nearest_coords
.
items
()}
return
{
k
:
list
(
v
.
values
)
for
k
,
v
in
self
.
_nearest_coords
.
items
()}
class
DataHandlerSingleGridCoulumn2
(
SingleGridColumnWrfChemDataLoader
,
DataHandlerSingleStation
):
def
__init__
(
self
,
common_file_starter
,
wrf_kwargs
=
None
,
**
kwargs
):
super
().
__init__
()
def
load_data
(
self
,
path
,
station
,
statistics_per_var
,
sampling
,
station_type
=
None
,
network
=
None
,
store_data_locally
=
False
,
data_origin
:
Dict
=
None
,
start
=
None
,
end
=
None
):
data
=
SingleGridColumnWrfChemDataLoader
((
lat_xr
,
lon_xr
),
data_path
=
'
/home/felix/Data/WRF-Chem/upload_aura_2021-02-24/2009_?/
'
,
common_file_starter
=
'
wrfout_d0
'
,
time_dim_name
=
'
Time
'
,
logical_x_coord_name
=
'
west_east
'
,
logical_y_coord_name
=
'
south_north
'
,
logical_z_coord_name
=
'
bottom_top
'
,
rechunk_values
=
{
'
Time
'
:
1
,
'
bottom_top
'
:
2
},
external_coords_file
=
'
/home/felix/Data/WRF-Chem/upload_aura_2021-02-24/coords.nc
'
,
)
#ToDo
data
,
meta
=
None
,
None
raise
NotImplementedError
#return data, meta
def
set_inputs_and_targets
(
self
):
# inputs = self._data.sel({self.target_dim: helpers.to_list(self.variables)})
# targets = self._data.sel(
# {self.target_dim: helpers.to_list(self.target_var)}) # ToDo: is it right to expand this dim??
# self.input_data = inputs
# self.target_data = targets
raise
NotImplementedError
def
make_samples
(
self
):
# self.make_history_window(self.target_dim, self.window_history_size, self.time_dim)
# self.make_labels(self.target_dim, self.target_var, self.time_dim, self.window_lead_time)
# self.make_observation(self.target_dim, self.target_var, self.time_dim)
# self.remove_nan(self.time_dim)
raise
NotImplementedError
@TimeTrackingWrapper
def
setup_samples
(
self
):
"""
Setup samples. This method prepares and creates samples X, and labels Y.
"""
# data, self.meta = self.load_data(self.path, self.station, self.statistics_per_var, self.sampling,
# self.station_type, self.network, self.store_data_locally, self.data_origin,
# self.start, self.end)
# self._data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method,
# limit=self.interpolation_limit)
# self.set_inputs_and_targets()
# if self.do_transformation is True:
# self.call_transform()
# self.make_samples()
raise
NotImplementedError
if
__name__
==
'
__main__
'
:
if
__name__
==
'
__main__
'
:
def
plot_map_proj
(
data
,
xlim
=
None
,
ylim
=
None
,
filename
=
None
,
point
=
None
):
def
plot_map_proj
(
data
,
xlim
=
None
,
ylim
=
None
,
filename
=
None
,
point
=
None
):
...
@@ -246,8 +316,8 @@ if __name__ == '__main__':
...
@@ -246,8 +316,8 @@ if __name__ == '__main__':
use_first_dummy_dataset
=
True
use_first_dummy_dataset
=
True
if
use_first_dummy_dataset
:
if
use_first_dummy_dataset
:
wrf_new
=
DataHandler
SingleGridCo
u
lumn
((
lat_xr
,
lon_xr
),
wrf_new
=
SingleGridColumn
WrfChemDataLoader
((
lat_xr
,
lon_xr
),
data_path
=
'
/home/felix/Data/WRF-Chem/upload_aura_2021-02-24/2009/
'
,
data_path
=
'
/home/felix/Data/WRF-Chem/upload_aura_2021-02-24/2009
_?
/
'
,
common_file_starter
=
'
wrfout_d0
'
,
common_file_starter
=
'
wrfout_d0
'
,
time_dim_name
=
'
Time
'
,
time_dim_name
=
'
Time
'
,
logical_x_coord_name
=
'
west_east
'
,
logical_x_coord_name
=
'
west_east
'
,
...
@@ -257,7 +327,7 @@ if __name__ == '__main__':
...
@@ -257,7 +327,7 @@ if __name__ == '__main__':
external_coords_file
=
'
/home/felix/Data/WRF-Chem/upload_aura_2021-02-24/coords.nc
'
,
external_coords_file
=
'
/home/felix/Data/WRF-Chem/upload_aura_2021-02-24/coords.nc
'
,
)
)
# wrf_gridcol =
DataHandler
SingleGridCo
u
lumn((lat_xr, lon_xr),
# wrf_gridcol = SingleGridColumn
WrfChemDataLoader
((lat_xr, lon_xr),
# data_path='/home/felix/Data/WRF-Chem/',
# data_path='/home/felix/Data/WRF-Chem/',
# common_file_starter='wrfout_d01_2010-',
# common_file_starter='wrfout_d01_2010-',
# time_dim_name='Time',
# time_dim_name='Time',
...
@@ -300,7 +370,7 @@ if __name__ == '__main__':
...
@@ -300,7 +370,7 @@ if __name__ == '__main__':
######################### # Larger 4D data
######################### # Larger 4D data
use_second_dummy_dataset
=
False
use_second_dummy_dataset
=
False
if
use_second_dummy_dataset
:
if
use_second_dummy_dataset
:
wrf_dh_4d
=
WrfChemData
Handl
er
(
data_path
=
'
/home/felix/Data/WRF-Chem/upload_aura/2009/2009
'
,
wrf_dh_4d
=
Base
WrfChemData
Load
er
(
data_path
=
'
/home/felix/Data/WRF-Chem/upload_aura/2009/2009
'
,
common_file_starter
=
'
wrfout_d01_2009
'
,
common_file_starter
=
'
wrfout_d01_2009
'
,
time_dim_name
=
'
Time
'
,
time_dim_name
=
'
Time
'
,
)
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment