Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
MLAir
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
esde
machine-learning
MLAir
Commits
b9092b37
Commit
b9092b37
authored
Feb 27, 2020
by
lukas leufen
Browse files
Options
Downloads
Plain Diff
use bootstrap skill score plot
parents
82a25fc8
8cdf9429
Branches
Branches containing commit
Tags
Tags containing commit
2 merge requests
!59
Develop
,
!52
implemented bootstraps
Pipeline
#30621
passed
Feb 27, 2020
Stage: test
Stage: pages
Stage: deploy
Changes
2
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/plotting/postprocessing_plotting.py
+70
-0
70 additions, 0 deletions
src/plotting/postprocessing_plotting.py
src/run_modules/post_processing.py
+20
-14
20 additions, 14 deletions
src/run_modules/post_processing.py
with
90 additions
and
14 deletions
src/plotting/postprocessing_plotting.py
+
70
−
0
View file @
b9092b37
...
@@ -479,6 +479,76 @@ class PlotCompetitiveSkillScore(RunEnvironment):
...
@@ -479,6 +479,76 @@ class PlotCompetitiveSkillScore(RunEnvironment):
plt
.
close
()
plt
.
close
()
class
PlotBootstrapSkillScore
(
RunEnvironment
):
"""
Create plot of climatological skill score after Murphy (1988) as box plot over all stations. A forecast time step
(called
"
ahead
"
) is separately shown to highlight the differences for each prediction time step. Either each single
term is plotted (score_only=False) or only the resulting scores CASE I to IV are displayed (score_only=True,
default). Y-axis is adjusted following the data and not hard coded. The plot is saved under plot_folder path with
name skill_score_clim_{extra_name_tag}{model_setup}.pdf and resolution of 500dpi.
"""
def
__init__
(
self
,
data
:
Dict
,
plot_folder
:
str
=
"
.
"
,
model_setup
:
str
=
""
):
"""
Sets attributes and create plot
:param data: dictionary with station names as keys and 2D xarrays as values, consist on axis ahead and terms.
:param plot_folder: path to save the plot (default: current directory)
:param model_setup: architecture type to specify plot name (default
"
CNN
"
)
"""
super
().
__init__
()
self
.
_labels
=
None
self
.
_data
=
self
.
_prepare_data
(
data
)
self
.
_plot
(
plot_folder
,
model_setup
)
def
_prepare_data
(
self
,
data
:
Dict
)
->
pd
.
DataFrame
:
"""
Shrink given data, if only scores are relevant. In any case, transform data to a plot friendly format. Also set
plot labels depending on the lead time dimensions.
:param data: dictionary with station names as keys and 2D xarrays as values
:return: pre-processed data set
"""
data
=
helpers
.
dict_to_xarray
(
data
,
"
station
"
)
self
.
_labels
=
[
str
(
i
)
+
"
d
"
for
i
in
data
.
coords
[
"
ahead
"
].
values
]
return
data
.
to_dataframe
(
"
data
"
).
reset_index
(
level
=
[
0
,
1
,
2
])
def
_label_add
(
self
,
score_only
:
bool
):
"""
Adds the phrase
"
terms and
"
if score_only is disabled or empty string (if score_only=True).
:param score_only: if false all terms are relevant, otherwise only CASE I to IV
:return: additional label
"""
return
""
if
score_only
else
"
terms and
"
def
_plot
(
self
,
plot_folder
,
model_setup
):
"""
Main plot function to plot climatological skill score.
:param plot_folder: path to save the plot
:param model_setup: architecture type to specify plot name
"""
fig
,
ax
=
plt
.
subplots
()
sns
.
boxplot
(
x
=
"
boot_var
"
,
y
=
"
data
"
,
hue
=
"
ahead
"
,
data
=
self
.
_data
,
ax
=
ax
,
whis
=
1.
,
palette
=
"
Blues_d
"
,
showmeans
=
True
,
meanprops
=
{
"
markersize
"
:
1
,
"
markeredgecolor
"
:
"
k
"
},
flierprops
=
{
"
marker
"
:
"
.
"
})
ax
.
axhline
(
y
=
0
,
color
=
"
grey
"
,
linewidth
=
.
5
)
ax
.
set
(
ylabel
=
f
"
skill score
"
,
xlabel
=
""
,
title
=
"
summary of all stations
"
)
handles
,
_
=
ax
.
get_legend_handles_labels
()
ax
.
legend
(
handles
,
self
.
_labels
)
plt
.
tight_layout
()
self
.
_save
(
plot_folder
,
model_setup
)
@staticmethod
def
_save
(
plot_folder
,
model_setup
):
"""
Standard save method to store plot locally. The name of this plot is dynamic. It includes the model setup like
'
CNN
'
and can additionally be adjusted using an extra name tag.
:param plot_folder: path to save the plot
:param model_setup: architecture type to specify plot name
"""
plot_name
=
os
.
path
.
join
(
plot_folder
,
f
"
skill_score_bootstrap_
{
model_setup
}
.pdf
"
)
logging
.
debug
(
f
"
... save plot to
{
plot_name
}
"
)
plt
.
savefig
(
plot_name
,
dpi
=
500
)
plt
.
close
(
'
all
'
)
class
PlotTimeSeries
(
RunEnvironment
):
class
PlotTimeSeries
(
RunEnvironment
):
def
__init__
(
self
,
stations
:
List
,
data_path
:
str
,
name
:
str
,
window_lead_time
:
int
=
None
,
plot_folder
:
str
=
"
.
"
,
def
__init__
(
self
,
stations
:
List
,
data_path
:
str
,
name
:
str
,
window_lead_time
:
int
=
None
,
plot_folder
:
str
=
"
.
"
,
...
...
This diff is collapsed.
Click to expand it.
src/run_modules/post_processing.py
+
20
−
14
View file @
b9092b37
...
@@ -18,7 +18,7 @@ from src.datastore import NameNotFoundInDataStore
...
@@ -18,7 +18,7 @@ from src.datastore import NameNotFoundInDataStore
from
src.helpers
import
TimeTracking
from
src.helpers
import
TimeTracking
from
src.model_modules.linear_model
import
OrdinaryLeastSquaredModel
from
src.model_modules.linear_model
import
OrdinaryLeastSquaredModel
from
src.plotting.postprocessing_plotting
import
PlotMonthlySummary
,
PlotStationMap
,
PlotClimatologicalSkillScore
,
\
from
src.plotting.postprocessing_plotting
import
PlotMonthlySummary
,
PlotStationMap
,
PlotClimatologicalSkillScore
,
\
PlotCompetitiveSkillScore
,
PlotTimeSeries
PlotCompetitiveSkillScore
,
PlotTimeSeries
,
PlotBootstrapSkillScore
from
src.plotting.postprocessing_plotting
import
plot_conditional_quantiles
from
src.plotting.postprocessing_plotting
import
plot_conditional_quantiles
from
src.run_modules.run_environment
import
RunEnvironment
from
src.run_modules.run_environment
import
RunEnvironment
...
@@ -38,6 +38,7 @@ class PostProcessing(RunEnvironment):
...
@@ -38,6 +38,7 @@ class PostProcessing(RunEnvironment):
self
.
target_var
=
self
.
data_store
.
get
(
"
target_var
"
,
"
general
"
)
self
.
target_var
=
self
.
data_store
.
get
(
"
target_var
"
,
"
general
"
)
self
.
_sampling
=
self
.
data_store
.
get
(
"
sampling
"
,
"
general
"
)
self
.
_sampling
=
self
.
data_store
.
get
(
"
sampling
"
,
"
general
"
)
self
.
skill_scores
=
None
self
.
skill_scores
=
None
self
.
bootstrap_skill_scores
=
None
self
.
_run
()
self
.
_run
()
def
_run
(
self
):
def
_run
(
self
):
...
@@ -49,9 +50,9 @@ class PostProcessing(RunEnvironment):
...
@@ -49,9 +50,9 @@ class PostProcessing(RunEnvironment):
self
.
make_prediction
()
self
.
make_prediction
()
logging
.
info
(
"
take a look on the next reported time measure. If this increases a lot, one should think to
"
logging
.
info
(
"
take a look on the next reported time measure. If this increases a lot, one should think to
"
"
skip make_prediction() whenever it is possible to save time.
"
)
"
skip make_prediction() whenever it is possible to save time.
"
)
#
self.skill_scores = self.c
alculate_skill_score
s()
self
.
bootstrap_
skill_scores
=
self
.
c
reate_boot_strap
s
()
#
self.
plot
()
self
.
skill_scores
=
self
.
calculate_skill_scores
()
self
.
create_boot_straps
()
self
.
plot
()
def
create_boot_straps
(
self
):
def
create_boot_straps
(
self
):
...
@@ -60,7 +61,7 @@ class PostProcessing(RunEnvironment):
...
@@ -60,7 +61,7 @@ class PostProcessing(RunEnvironment):
bootstrap_path
=
self
.
data_store
.
get
(
"
bootstrap_path
"
,
"
general
"
)
bootstrap_path
=
self
.
data_store
.
get
(
"
bootstrap_path
"
,
"
general
"
)
forecast_path
=
self
.
data_store
.
get
(
"
forecast_path
"
,
"
general
"
)
forecast_path
=
self
.
data_store
.
get
(
"
forecast_path
"
,
"
general
"
)
window_lead_time
=
self
.
data_store
.
get
(
"
window_lead_time
"
,
"
general
"
)
window_lead_time
=
self
.
data_store
.
get
(
"
window_lead_time
"
,
"
general
"
)
bootstraps
=
BootStraps
(
self
.
test_data
,
bootstrap_path
,
2
)
bootstraps
=
BootStraps
(
self
.
test_data
,
bootstrap_path
,
2
0
)
with
TimeTracking
(
name
=
"
boot predictions
"
):
with
TimeTracking
(
name
=
"
boot predictions
"
):
bootstrap_predictions
=
self
.
model
.
predict_generator
(
generator
=
bootstraps
.
boot_strap_generator
(),
bootstrap_predictions
=
self
.
model
.
predict_generator
(
generator
=
bootstraps
.
boot_strap_generator
(),
steps
=
bootstraps
.
get_boot_strap_generator_length
())
steps
=
bootstraps
.
get_boot_strap_generator_length
())
...
@@ -72,18 +73,19 @@ class PostProcessing(RunEnvironment):
...
@@ -72,18 +73,19 @@ class PostProcessing(RunEnvironment):
ind
=
np
.
all
(
bootstrap_meta
==
[
boot
,
station
],
axis
=
1
)
ind
=
np
.
all
(
bootstrap_meta
==
[
boot
,
station
],
axis
=
1
)
length
=
sum
(
ind
)
length
=
sum
(
ind
)
sel
=
bootstrap_predictions
[
ind
].
reshape
((
length
,
window_lead_time
,
1
))
sel
=
bootstrap_predictions
[
ind
].
reshape
((
length
,
window_lead_time
,
1
))
coords
=
(
range
(
length
),
range
(
window_lead_time
))
coords
=
(
range
(
length
),
range
(
1
,
window_lead_time
+
1
))
tmp
=
xr
.
DataArray
(
sel
,
coords
=
(
*
coords
,
[
boot
]),
dims
=
[
"
index
"
,
"
window
"
,
"
type
"
])
tmp
=
xr
.
DataArray
(
sel
,
coords
=
(
*
coords
,
[
boot
]),
dims
=
[
"
index
"
,
"
ahead
"
,
"
type
"
])
file_name
=
os
.
path
.
join
(
forecast_path
,
f
"
bootstraps_
{
boot
}
_
{
station
}
.nc
"
)
file_name
=
os
.
path
.
join
(
forecast_path
,
f
"
bootstraps_
{
boot
}
_
{
station
}
.nc
"
)
tmp
.
to_netcdf
(
file_name
)
tmp
.
to_netcdf
(
file_name
)
labels
=
bootstraps
.
get_labels
(
station
).
reshape
((
length
,
window_lead_time
,
1
))
labels
=
bootstraps
.
get_labels
(
station
).
reshape
((
length
,
window_lead_time
,
1
))
file_name
=
os
.
path
.
join
(
forecast_path
,
f
"
bootstraps_labels_
{
station
}
.nc
"
)
file_name
=
os
.
path
.
join
(
forecast_path
,
f
"
bootstraps_labels_
{
station
}
.nc
"
)
labels
=
xr
.
DataArray
(
labels
,
coords
=
(
*
coords
,
[
"
obs
"
]),
dims
=
[
"
index
"
,
"
window
"
,
"
type
"
])
labels
=
xr
.
DataArray
(
labels
,
coords
=
(
*
coords
,
[
"
obs
"
]),
dims
=
[
"
index
"
,
"
ahead
"
,
"
type
"
])
labels
.
to_netcdf
(
file_name
)
labels
.
to_netcdf
(
file_name
)
# file_name = os.path.join(forecast_path, f"bootstraps_orig.nc")
# file_name = os.path.join(forecast_path, f"bootstraps_orig.nc")
# orig = xr.open_dataarray(file_name)
# orig = xr.open_dataarray(file_name)
# calc skill scores
# calc skill scores
skill_scores
=
statistics
.
SkillScores
(
None
)
skill_scores
=
statistics
.
SkillScores
(
None
)
score
=
{}
score
=
{}
...
@@ -92,17 +94,20 @@ class PostProcessing(RunEnvironment):
...
@@ -92,17 +94,20 @@ class PostProcessing(RunEnvironment):
labels
=
xr
.
open_dataarray
(
file_name
)
labels
=
xr
.
open_dataarray
(
file_name
)
shape
=
labels
.
shape
shape
=
labels
.
shape
orig
=
bootstraps
.
get_orig_prediction
(
forecast_path
,
f
"
forecasts_norm_
{
station
}
_test.nc
"
).
reshape
(
shape
)
orig
=
bootstraps
.
get_orig_prediction
(
forecast_path
,
f
"
forecasts_norm_
{
station
}
_test.nc
"
).
reshape
(
shape
)
orig
=
xr
.
DataArray
(
orig
,
coords
=
(
range
(
shape
[
0
]),
range
(
shape
[
1
]),
[
"
orig
"
]),
dims
=
[
"
index
"
,
"
window
"
,
"
type
"
])
orig
=
xr
.
DataArray
(
orig
,
coords
=
(
range
(
shape
[
0
]),
range
(
1
,
shape
[
1
]
+
1
),
[
"
orig
"
]),
dims
=
[
"
index
"
,
"
ahead
"
,
"
type
"
])
s
core
[
station
]
=
{}
s
kill
=
pd
.
DataFrame
(
columns
=
range
(
1
,
window_lead_time
+
1
))
for
boot
in
variables
:
for
boot
in
variables
:
file_name
=
os
.
path
.
join
(
forecast_path
,
f
"
bootstraps_
{
boot
}
_
{
station
}
.nc
"
)
file_name
=
os
.
path
.
join
(
forecast_path
,
f
"
bootstraps_
{
boot
}
_
{
station
}
.nc
"
)
boot_data
=
xr
.
open_dataarray
(
file_name
)
boot_data
=
xr
.
open_dataarray
(
file_name
)
boot_data
=
boot_data
.
combine_first
(
labels
)
boot_data
=
boot_data
.
combine_first
(
labels
)
boot_data
=
boot_data
.
combine_first
(
orig
)
boot_data
=
boot_data
.
combine_first
(
orig
)
score
[
station
][
boot
]
=
skill_scores
.
general_skill_score
(
boot_data
,
forecast_name
=
boot
,
reference_name
=
"
orig
"
)
boot_scores
=
[]
for
iahead
in
range
(
window_lead_time
):
# plot
data
=
boot_data
.
sel
(
ahead
=
iahead
+
1
)
boot_scores
.
append
(
skill_scores
.
general_skill_score
(
data
,
forecast_name
=
boot
,
reference_name
=
"
orig
"
))
skill
.
loc
[
boot
]
=
np
.
array
(
boot_scores
)
score
[
station
]
=
xr
.
DataArray
(
skill
,
dims
=
[
"
boot_var
"
,
"
ahead
"
])
return
score
def
_load_model
(
self
):
def
_load_model
(
self
):
try
:
try
:
...
@@ -128,6 +133,7 @@ class PostProcessing(RunEnvironment):
...
@@ -128,6 +133,7 @@ class PostProcessing(RunEnvironment):
PlotClimatologicalSkillScore
(
self
.
skill_scores
[
1
],
plot_folder
=
self
.
plot_path
,
score_only
=
False
,
PlotClimatologicalSkillScore
(
self
.
skill_scores
[
1
],
plot_folder
=
self
.
plot_path
,
score_only
=
False
,
extra_name_tag
=
"
all_terms_
"
,
model_setup
=
"
CNN
"
)
extra_name_tag
=
"
all_terms_
"
,
model_setup
=
"
CNN
"
)
PlotCompetitiveSkillScore
(
self
.
skill_scores
[
0
],
plot_folder
=
self
.
plot_path
,
model_setup
=
"
CNN
"
)
PlotCompetitiveSkillScore
(
self
.
skill_scores
[
0
],
plot_folder
=
self
.
plot_path
,
model_setup
=
"
CNN
"
)
PlotBootstrapSkillScore
(
self
.
bootstrap_skill_scores
,
plot_folder
=
self
.
plot_path
,
model_setup
=
"
CNN
"
)
PlotTimeSeries
(
self
.
test_data
.
stations
,
path
,
r
"
forecasts_%s_test.nc
"
,
plot_folder
=
self
.
plot_path
,
sampling
=
self
.
_sampling
)
PlotTimeSeries
(
self
.
test_data
.
stations
,
path
,
r
"
forecasts_%s_test.nc
"
,
plot_folder
=
self
.
plot_path
,
sampling
=
self
.
_sampling
)
def
calculate_test_score
(
self
):
def
calculate_test_score
(
self
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment