Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
MLAir
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
esde
machine-learning
MLAir
Commits
1754830c
Commit
1754830c
authored
5 years ago
by
lukas leufen
Browse files
Options
Downloads
Patches
Plain Diff
update on data prep tests
parent
af1ecb8a
Branches
Branches containing commit
Tags
Tags containing commit
2 merge requests
!37
include new development
,
!33
Lukas issue036 feat local temp data storage
Pipeline
#29107
passed
5 years ago
Stage: test
Stage: pages
Stage: deploy
Changes
2
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/data_handling/data_preparation.py
+1
-1
1 addition, 1 deletion
src/data_handling/data_preparation.py
test/test_data_handling/test_data_preparation.py
+96
-10
96 additions, 10 deletions
test/test_data_handling/test_data_preparation.py
with
97 additions
and
11 deletions
src/data_handling/data_preparation.py
+
1
−
1
View file @
1754830c
...
@@ -108,7 +108,7 @@ class DataPrep(object):
...
@@ -108,7 +108,7 @@ class DataPrep(object):
check_dict
=
{
"
station_type
"
:
self
.
station_type
,
"
network_name
"
:
self
.
network
}
check_dict
=
{
"
station_type
"
:
self
.
station_type
,
"
network_name
"
:
self
.
network
}
for
(
k
,
v
)
in
check_dict
.
items
():
for
(
k
,
v
)
in
check_dict
.
items
():
if
self
.
meta
.
at
[
k
,
self
.
station
[
0
]]
!=
v
:
if
self
.
meta
.
at
[
k
,
self
.
station
[
0
]]
!=
v
:
logging
.
debug
(
f
"
meta data does not agree w
hic
h given request for
{
k
}
:
{
v
}
(requested) !=
"
logging
.
debug
(
f
"
meta data does not agree w
it
h given request for
{
k
}
:
{
v
}
(requested) !=
"
f
"
{
self
.
meta
.
at
[
k
,
self
.
station
[
0
]]
}
(local). Raise FileNotFoundError to trigger new
"
f
"
{
self
.
meta
.
at
[
k
,
self
.
station
[
0
]]
}
(local). Raise FileNotFoundError to trigger new
"
f
"
grapping from web.
"
)
f
"
grapping from web.
"
)
raise
FileNotFoundError
raise
FileNotFoundError
...
...
This diff is collapsed.
Click to expand it.
test/test_data_handling/test_data_preparation.py
+
96
−
10
View file @
1754830c
...
@@ -7,6 +7,8 @@ import xarray as xr
...
@@ -7,6 +7,8 @@ import xarray as xr
import
datetime
as
dt
import
datetime
as
dt
import
pandas
as
pd
import
pandas
as
pd
from
operator
import
itemgetter
from
operator
import
itemgetter
import
logging
from
src.helpers
import
PyTestRegex
class
TestDataPrep
:
class
TestDataPrep
:
...
@@ -17,6 +19,17 @@ class TestDataPrep:
...
@@ -17,6 +19,17 @@ class TestDataPrep:
station_type
=
'
background
'
,
test
=
'
testKWARGS
'
,
station_type
=
'
background
'
,
test
=
'
testKWARGS
'
,
statistics_per_var
=
{
'
o3
'
:
'
dma8eu
'
,
'
temp
'
:
'
maximum
'
})
statistics_per_var
=
{
'
o3
'
:
'
dma8eu
'
,
'
temp
'
:
'
maximum
'
})
@pytest.fixture
def
data_prep_no_init
(
self
):
d
=
object
.
__new__
(
DataPrep
)
d
.
path
=
os
.
path
.
join
(
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
)),
'
data
'
)
d
.
network
=
'
UBA
'
d
.
station
=
[
'
DEBW107
'
]
d
.
variables
=
[
'
o3
'
,
'
temp
'
]
d
.
station_type
=
"
background
"
d
.
kwargs
=
None
return
d
def
test_init
(
self
,
data
):
def
test_init
(
self
,
data
):
assert
data
.
path
==
os
.
path
.
join
(
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
)),
'
data
'
)
assert
data
.
path
==
os
.
path
.
join
(
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
)),
'
data
'
)
assert
data
.
network
==
'
AIRBASE
'
assert
data
.
network
==
'
AIRBASE
'
...
@@ -31,16 +44,79 @@ class TestDataPrep:
...
@@ -31,16 +44,79 @@ class TestDataPrep:
with
pytest
.
raises
(
NotImplementedError
):
with
pytest
.
raises
(
NotImplementedError
):
DataPrep
(
'
data/
'
,
'
dummy
'
,
'
DEBW107
'
,
[
'
o3
'
,
'
temp
'
])
DataPrep
(
'
data/
'
,
'
dummy
'
,
'
DEBW107
'
,
[
'
o3
'
,
'
temp
'
])
def
test_repr
(
self
):
def
test_download_data
(
self
,
data_prep_no_init
):
d
=
object
.
__new__
(
DataPrep
)
file_name
=
data_prep_no_init
.
_set_file_name
()
d
.
path
=
'
data/test
'
meta_file
=
data_prep_no_init
.
_set_meta_file_name
()
d
.
network
=
'
dummy
'
data_prep_no_init
.
kwargs
=
{
"
store_data_locally
"
:
False
}
d
.
station
=
[
'
DEBW107
'
]
data_prep_no_init
.
statistics_per_var
=
{
'
o3
'
:
'
dma8eu
'
,
'
temp
'
:
'
maximum
'
}
d
.
variables
=
[
'
o3
'
,
'
temp
'
]
data_prep_no_init
.
download_data
(
file_name
,
meta_file
)
d
.
station_type
=
"
traffic
"
assert
isinstance
(
data_prep_no_init
.
data
,
xr
.
DataArray
)
d
.
kwargs
=
None
assert
d
.
__repr__
().
rstrip
()
==
"
Dataprep(path=
'
data/test
'
, network=
'
dummy
'
, station=[
'
DEBW107
'
],
"
\
def
test_download_data_from_join
(
self
,
data_prep_no_init
):
"
variables=[
'
o3
'
,
'
temp
'
], station_type=traffic, **None)
"
.
rstrip
()
file_name
=
data_prep_no_init
.
_set_file_name
()
meta_file
=
data_prep_no_init
.
_set_meta_file_name
()
data_prep_no_init
.
kwargs
=
{
"
store_data_locally
"
:
False
}
data_prep_no_init
.
statistics_per_var
=
{
'
o3
'
:
'
dma8eu
'
,
'
temp
'
:
'
maximum
'
}
xarr
,
meta
=
data_prep_no_init
.
download_data_from_join
(
file_name
,
meta_file
)
assert
isinstance
(
xarr
,
xr
.
DataArray
)
assert
isinstance
(
meta
,
pd
.
DataFrame
)
def
test_check_station_meta
(
self
,
caplog
,
data_prep_no_init
):
caplog
.
set_level
(
logging
.
DEBUG
)
file_name
=
data_prep_no_init
.
_set_file_name
()
meta_file
=
data_prep_no_init
.
_set_meta_file_name
()
data_prep_no_init
.
kwargs
=
{
"
store_data_locally
"
:
False
}
data_prep_no_init
.
statistics_per_var
=
{
'
o3
'
:
'
dma8eu
'
,
'
temp
'
:
'
maximum
'
}
data_prep_no_init
.
download_data
(
file_name
,
meta_file
)
assert
data_prep_no_init
.
check_station_meta
()
is
None
data_prep_no_init
.
station_type
=
"
traffic
"
with
pytest
.
raises
(
FileNotFoundError
)
as
e
:
data_prep_no_init
.
check_station_meta
()
msg
=
"
meta data does not agree with given request for station_type: traffic (requested) != background (local)
"
assert
caplog
.
record_tuples
[
-
1
][:
-
1
]
==
(
'
root
'
,
10
)
assert
msg
in
caplog
.
record_tuples
[
-
1
][
-
1
]
def
test_load_data_overwrite_local_data
(
self
,
data_prep_no_init
):
data_prep_no_init
.
statistics_per_var
=
{
'
o3
'
:
'
dma8eu
'
,
'
temp
'
:
'
maximum
'
}
file_path
=
data_prep_no_init
.
_set_file_name
()
meta_file_path
=
data_prep_no_init
.
_set_meta_file_name
()
os
.
remove
(
file_path
)
os
.
remove
(
meta_file_path
)
assert
not
os
.
path
.
exists
(
file_path
)
assert
not
os
.
path
.
exists
(
meta_file_path
)
data_prep_no_init
.
kwargs
=
{
"
overwrite_local_data
"
:
True
}
data_prep_no_init
.
load_data
()
assert
os
.
path
.
exists
(
file_path
)
assert
os
.
path
.
exists
(
meta_file_path
)
t
=
os
.
stat
(
file_path
).
st_ctime
tm
=
os
.
stat
(
meta_file_path
).
st_ctime
data_prep_no_init
.
load_data
()
assert
os
.
path
.
exists
(
file_path
)
assert
os
.
path
.
exists
(
meta_file_path
)
assert
os
.
stat
(
file_path
).
st_ctime
>
t
assert
os
.
stat
(
meta_file_path
).
st_ctime
>
tm
assert
isinstance
(
data_prep_no_init
.
data
,
xr
.
DataArray
)
assert
isinstance
(
data_prep_no_init
.
meta
,
pd
.
DataFrame
)
def
test_load_data_keep_local_data
(
self
,
data_prep_no_init
):
data_prep_no_init
.
statistics_per_var
=
{
'
o3
'
:
'
dma8eu
'
,
'
temp
'
:
'
maximum
'
}
data_prep_no_init
.
station_type
=
None
data_prep_no_init
.
kwargs
=
{}
file_path
=
data_prep_no_init
.
_set_file_name
()
data_prep_no_init
.
load_data
()
assert
os
.
path
.
exists
(
file_path
)
t
=
os
.
stat
(
file_path
).
st_ctime
data_prep_no_init
.
load_data
()
assert
os
.
path
.
exists
(
data_prep_no_init
.
_set_file_name
())
assert
os
.
stat
(
file_path
).
st_ctime
==
t
assert
isinstance
(
data_prep_no_init
.
data
,
xr
.
DataArray
)
assert
isinstance
(
data_prep_no_init
.
meta
,
pd
.
DataFrame
)
def
test_repr
(
self
,
data_prep_no_init
):
path
=
os
.
path
.
join
(
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
)),
'
data
'
)
assert
data_prep_no_init
.
__repr__
().
rstrip
()
==
f
"
Dataprep(path=
'
{
path
}
'
, network=
'
UBA
'
,
"
\
f
"
station=[
'
DEBW107
'
], variables=[
'
o3
'
,
'
temp
'
],
"
\
f
"
station_type=background, **None)
"
.
rstrip
()
def
test_set_file_name_and_meta
(
self
):
def
test_set_file_name_and_meta
(
self
):
d
=
object
.
__new__
(
DataPrep
)
d
=
object
.
__new__
(
DataPrep
)
...
@@ -133,6 +209,16 @@ class TestDataPrep:
...
@@ -133,6 +209,16 @@ class TestDataPrep:
with
pytest
.
raises
(
NotImplementedError
):
with
pytest
.
raises
(
NotImplementedError
):
data
.
inverse_transform
()
data
.
inverse_transform
()
def
test_get_transformation_information
(
self
,
data
):
assert
(
None
,
None
,
None
)
==
data
.
get_transformation_information
(
"
o3
"
)
mean_test
=
data
.
data
.
mean
(
"
datetime
"
).
sel
(
variables
=
'
o3
'
).
values
std_test
=
data
.
data
.
std
(
"
datetime
"
).
sel
(
variables
=
'
o3
'
).
values
data
.
transform
(
'
datetime
'
)
mean
,
std
,
info
=
data
.
get_transformation_information
(
"
o3
"
)
assert
np
.
testing
.
assert_almost_equal
(
mean
,
mean_test
)
is
None
assert
np
.
testing
.
assert_almost_equal
(
std
,
std_test
)
is
None
assert
info
==
"
standardise
"
def
test_nan_remove_no_hist_or_label
(
self
,
data
):
def
test_nan_remove_no_hist_or_label
(
self
,
data
):
assert
data
.
history
is
None
assert
data
.
history
is
None
assert
data
.
label
is
None
assert
data
.
label
is
None
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment