Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
MLAir
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
esde
machine-learning
MLAir
Commits
9973cc26
Commit
9973cc26
authored
4 years ago
by
leufen1
Browse files
Options
Downloads
Patches
Plain Diff
competitive skill scores can now be calculated for an arbitrary number of models
parent
1a3d9079
Branches
Branches containing commit
Tags
Tags containing commit
3 merge requests
!253
include current develop
,
!252
Resolve "release v1.3.0"
,
!196
Resolve "Competitor Models"
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
mlair/helpers/statistics.py
+62
-33
62 additions, 33 deletions
mlair/helpers/statistics.py
with
62 additions
and
33 deletions
mlair/helpers/statistics.py
+
62
−
33
View file @
9973cc26
...
...
@@ -8,8 +8,9 @@ __date__ = '2019-10-23'
import
numpy
as
np
import
xarray
as
xr
import
pandas
as
pd
from
typing
import
Union
,
Tuple
,
Dict
from
typing
import
Union
,
Tuple
,
Dict
,
List
from
matplotlib
import
pyplot
as
plt
import
itertools
from
mlair.helpers
import
to_list
,
remove_items
...
...
@@ -178,10 +179,33 @@ class SkillScores:
skill_scores_clim = skill_scores_class.climatological_skill_scores(external_data, window_lead_time=3)
"""
models_default
=
[
"
cnn
"
,
"
persi
"
,
"
ols
"
]
def
__init__
(
self
,
internal_data
:
Data
):
def
__init__
(
self
,
internal_data
:
Data
,
models
=
None
,
observation_name
=
"
obs
"
):
"""
Set internal data.
"""
self
.
internal_data
=
internal_data
self
.
models
=
self
.
set_model_names
(
models
)
self
.
observation_name
=
observation_name
def
set_model_names
(
self
,
models
:
List
[
str
])
->
List
[
str
]:
"""
Either use given models or use defaults.
"""
if
models
is
None
:
models
=
self
.
models_default
return
self
.
_reorder
(
models
)
@staticmethod
def
_reorder
(
model_list
:
List
[
str
])
->
List
[
str
]:
"""
Set elements persi and obs at the very end of given list.
"""
for
element
in
[
"
persi
"
,
"
obs
"
]:
if
element
in
model_list
:
model_list
.
append
(
model_list
.
pop
(
model_list
.
index
(
element
)))
return
model_list
def
get_model_name_combinations
(
self
):
"""
Return all combinations of two models as tuple and string.
"""
combinations
=
list
(
itertools
.
combinations
(
self
.
models
,
2
))
combination_strings
=
[
f
"
{
first
}
-
{
second
}
"
for
(
first
,
second
)
in
combinations
]
return
combinations
,
combination_strings
def
skill_scores
(
self
,
window_lead_time
:
int
)
->
pd
.
DataFrame
:
"""
...
...
@@ -192,16 +216,19 @@ class SkillScores:
:return: skill score for each comparison and forecast step
"""
ahead_names
=
list
(
range
(
1
,
window_lead_time
+
1
))
skill_score
=
pd
.
DataFrame
(
index
=
[
'
cnn-persi
'
,
'
ols-persi
'
,
'
cnn-ols
'
,
'
cnn-competitor
'
])
combinations
,
combination_strings
=
self
.
get_model_name_combinations
()
skill_score
=
pd
.
DataFrame
(
index
=
combination_strings
)
for
iahead
in
ahead_names
:
data
=
self
.
internal_data
.
sel
(
ahead
=
iahead
)
skill_score
[
iahead
]
=
[
self
.
general_skill_score
(
data
,
forecast_name
=
"
CNN
"
,
reference_name
=
"
persi
"
),
self
.
general_skill_score
(
data
,
forecast_name
=
"
OLS
"
,
reference_name
=
"
persi
"
),
self
.
general_skill_score
(
data
,
forecast_name
=
"
CNN
"
,
reference_name
=
"
OLS
"
),
self
.
general_skill_score
(
data
,
forecast_name
=
"
CNN
"
,
reference_name
=
"
competitor
"
)]
skill_score
[
iahead
]
=
[
self
.
general_skill_score
(
data
,
forecast_name
=
first
,
reference_name
=
second
,
observation_name
=
self
.
observation_name
)
for
(
first
,
second
)
in
combinations
]
return
skill_score
def
climatological_skill_scores
(
self
,
external_data
:
Data
,
window_lead_time
:
int
)
->
xr
.
DataArray
:
def
climatological_skill_scores
(
self
,
external_data
:
Data
,
window_lead_time
:
int
,
forecast_name
:
str
=
"
cnn
"
)
->
xr
.
DataArray
:
"""
Calculate climatological skill scores according to Murphy (1988).
...
...
@@ -210,6 +237,7 @@ class SkillScores:
:param external_data: external data
:param window_lead_time: interested time step of forecast horizon to select data
:param forecast_name: name of the forecast to use for this calculation (must be available in `data`)
:return: all CASES as well as all terms
"""
...
...
@@ -225,30 +253,28 @@ class SkillScores:
data
=
self
.
internal_data
.
sel
(
ahead
=
iahead
)
skill_score
.
loc
[[
"
CASE I
"
,
"
AI
"
,
"
BI
"
,
"
CI
"
],
iahead
]
=
np
.
stack
(
self
.
_climatological_skill_score
(
data
,
mu_type
=
1
,
forecast_name
=
"
CNN
"
).
values
.
flatten
())
data
,
mu_type
=
1
,
forecast_name
=
forecast_name
,
observation_name
=
self
.
observation_name
).
values
.
flatten
())
skill_score
.
loc
[[
"
CASE II
"
,
"
AII
"
,
"
BII
"
],
iahead
]
=
np
.
stack
(
self
.
_climatological_skill_score
(
data
,
mu_type
=
2
,
forecast_name
=
"
CNN
"
).
values
.
flatten
())
data
,
mu_type
=
2
,
forecast_name
=
forecast_name
,
observation_name
=
self
.
observation_name
).
values
.
flatten
())
if
external_data
is
not
None
:
skill_score
.
loc
[[
"
CASE III
"
,
"
AIII
"
],
iahead
]
=
np
.
stack
(
self
.
_climatological_skill_score
(
data
,
mu_type
=
3
,
forecast_name
=
"
CNN
"
,
data
,
mu_type
=
3
,
forecast_name
=
forecast_name
,
observation_name
=
self
.
observation_name
,
external_data
=
external_data
).
values
.
flatten
())
skill_score
.
loc
[[
"
CASE IV
"
,
"
AIV
"
,
"
BIV
"
,
"
CIV
"
],
iahead
]
=
np
.
stack
(
self
.
_climatological_skill_score
(
data
,
mu_type
=
4
,
forecast_name
=
"
CNN
"
,
data
,
mu_type
=
4
,
forecast_name
=
forecast_name
,
observation_name
=
self
.
observation_name
,
external_data
=
external_data
).
values
.
flatten
())
return
skill_score
def
_climatological_skill_score
(
self
,
data
,
mu_type
=
1
,
observation_name
=
"
obs
"
,
forecast_name
=
"
CNN
"
,
external_data
=
None
):
def
_climatological_skill_score
(
self
,
data
,
observation_name
,
forecast_name
,
mu_type
=
1
,
external_data
=
None
):
kwargs
=
{
"
external_data
"
:
external_data
}
if
external_data
is
not
None
else
{}
return
self
.
__getattribute__
(
f
"
skill_score_mu_case_
{
mu_type
}
"
)(
data
,
observation_name
,
forecast_name
,
**
kwargs
)
@staticmethod
def
general_skill_score
(
data
:
Data
,
observation_name
:
str
=
"
obs
"
,
forecast_name
:
str
=
"
CNN
"
,
reference_name
:
str
=
"
persi
"
)
->
np
.
ndarray
:
def
general_skill_score
(
data
:
Data
,
observation_name
:
str
,
forecast_name
:
str
,
reference_name
:
str
)
->
np
.
ndarray
:
r
"""
Calculate general skill score based on mean squared error.
...
...
@@ -285,49 +311,51 @@ class SkillScores:
:returns: Terms AI, BI, and CI, internal data without nans and mean, variance, correlation and its p-value
"""
data
=
data
.
loc
[...,
[
observation_name
,
forecast_name
]
]
.
drop
(
"
ahead
"
)
data
=
data
.
sel
(
type
=
[
observation_name
,
forecast_name
]
)
.
drop
(
"
ahead
"
)
data
=
data
.
dropna
(
"
index
"
)
mean
=
data
.
mean
(
"
index
"
)
sigma
=
np
.
sqrt
(
data
.
var
(
"
index
"
))
r
,
p
=
stats
.
pearsonr
(
data
.
loc
[...,
forecast_name
],
data
.
loc
[...
,
observation_name
])
r
,
p
=
stats
.
pearsonr
(
*
[
data
.
sel
(
type
=
n
).
values
.
flatten
()
for
n
in
[
forecast_name
,
observation_name
]
]
)
AI
=
np
.
array
(
r
**
2
)
BI
=
((
r
-
(
sigma
.
loc
[...,
forecast_name
]
/
sigma
.
loc
[...,
observation_name
]))
**
2
).
values
CI
=
(((
mean
.
loc
[...,
forecast_name
]
-
mean
.
loc
[...,
observation_name
])
/
sigma
.
loc
[
...,
observation_name
])
**
2
).
values
BI
=
((
r
-
(
sigma
.
sel
(
type
=
forecast_name
,
drop
=
True
)
/
sigma
.
sel
(
type
=
observation_name
,
drop
=
True
)))
**
2
).
values
CI
=
(((
mean
.
sel
(
type
=
forecast_name
,
drop
=
True
)
-
mean
.
sel
(
type
=
observation_name
,
drop
=
True
))
/
sigma
.
sel
(
type
=
observation_name
,
drop
=
True
))
**
2
).
values
suffix
=
{
"
mean
"
:
mean
,
"
sigma
"
:
sigma
,
"
r
"
:
r
,
"
p
"
:
p
}
return
AI
,
BI
,
CI
,
data
,
suffix
def
skill_score_mu_case_1
(
self
,
data
,
observation_name
=
"
obs
"
,
forecast_name
=
"
CNN
"
):
def
skill_score_mu_case_1
(
self
,
data
,
observation_name
,
forecast_name
):
"""
Calculate CASE I.
"""
AI
,
BI
,
CI
,
data
,
_
=
self
.
skill_score_pre_calculations
(
data
,
observation_name
,
forecast_name
)
skill_score
=
np
.
array
(
AI
-
BI
-
CI
)
return
pd
.
DataFrame
({
"
skill_score
"
:
[
skill_score
],
"
AI
"
:
[
AI
],
"
BI
"
:
[
BI
],
"
CI
"
:
[
CI
]}).
to_xarray
().
to_array
()
def
skill_score_mu_case_2
(
self
,
data
,
observation_name
=
"
obs
"
,
forecast_name
=
"
CNN
"
):
def
skill_score_mu_case_2
(
self
,
data
,
observation_name
,
forecast_name
):
"""
Calculate CASE II.
"""
AI
,
BI
,
CI
,
data
,
suffix
=
self
.
skill_score_pre_calculations
(
data
,
observation_name
,
forecast_name
)
monthly_mean
=
self
.
create_monthly_mean_from_daily_data
(
data
)
data
=
xr
.
concat
([
data
,
monthly_mean
],
dim
=
"
type
"
)
sigma
=
suffix
[
"
sigma
"
]
sigma_monthly
=
np
.
sqrt
(
monthly_mean
.
var
())
r
,
p
=
stats
.
pearsonr
(
data
.
loc
[...,
observation_name
],
data
.
loc
[...
,
observation_name
+
"
X
"
])
r
,
p
=
stats
.
pearsonr
(
*
[
data
.
sel
(
type
=
n
).
values
.
flatten
()
for
n
in
[
observation_name
,
observation_name
+
"
X
"
]
]
)
AII
=
np
.
array
(
r
**
2
)
BII
=
((
r
-
sigma_monthly
/
sigma
.
loc
[
observation_name
]
)
**
2
).
values
BII
=
((
r
-
sigma_monthly
/
sigma
.
sel
(
type
=
observation_name
,
drop
=
True
)
)
**
2
).
values
skill_score
=
np
.
array
((
AI
-
BI
-
CI
-
AII
+
BII
)
/
(
1
-
AII
+
BII
))
return
pd
.
DataFrame
({
"
skill_score
"
:
[
skill_score
],
"
AII
"
:
[
AII
],
"
BII
"
:
[
BII
]}).
to_xarray
().
to_array
()
def
skill_score_mu_case_3
(
self
,
data
,
observation_name
=
"
obs
"
,
forecast_name
=
"
CNN
"
,
external_data
=
None
):
def
skill_score_mu_case_3
(
self
,
data
,
observation_name
,
forecast_name
,
external_data
=
None
):
"""
Calculate CASE III.
"""
AI
,
BI
,
CI
,
data
,
suffix
=
self
.
skill_score_pre_calculations
(
data
,
observation_name
,
forecast_name
)
mean
,
sigma
=
suffix
[
"
mean
"
],
suffix
[
"
sigma
"
]
AIII
=
(((
external_data
.
mean
().
values
-
mean
.
loc
[
observation_name
])
/
sigma
.
loc
[
observation_name
])
**
2
).
values
AIII
=
(((
external_data
.
mean
().
values
-
mean
.
sel
(
type
=
observation_name
,
drop
=
True
))
/
sigma
.
sel
(
type
=
observation_name
,
drop
=
True
))
**
2
).
values
skill_score
=
np
.
array
((
AI
-
BI
-
CI
+
AIII
)
/
1
+
AIII
)
return
pd
.
DataFrame
({
"
skill_score
"
:
[
skill_score
],
"
AIII
"
:
[
AIII
]}).
to_xarray
().
to_array
()
def
skill_score_mu_case_4
(
self
,
data
,
observation_name
=
"
obs
"
,
forecast_name
=
"
CNN
"
,
external_data
=
None
):
def
skill_score_mu_case_4
(
self
,
data
,
observation_name
,
forecast_name
,
external_data
=
None
):
"""
Calculate CASE IV.
"""
AI
,
BI
,
CI
,
data
,
suffix
=
self
.
skill_score_pre_calculations
(
data
,
observation_name
,
forecast_name
)
monthly_mean_external
=
self
.
create_monthly_mean_from_daily_data
(
external_data
,
columns
=
data
.
type
.
values
,
...
...
@@ -338,12 +366,13 @@ class SkillScores:
mean_external
=
monthly_mean_external
.
mean
()
sigma_external
=
np
.
sqrt
(
monthly_mean_external
.
var
())
#
r_mu, p_mu = stats.
s
pear
manr(data.loc[..., [observation_name, observation_name+'X']])
r_mu
,
p_mu
=
stats
.
pearsonr
(
data
.
loc
[...,
observation_name
],
data
.
loc
[...
,
observation_name
+
"
X
"
])
r_mu
,
p_mu
=
stats
.
pear
sonr
(
*
[
data
.
sel
(
type
=
n
).
values
.
flatten
()
for
n
in
[
observation_name
,
observation_name
+
"
X
"
]
]
)
AIV
=
np
.
array
(
r_mu
**
2
)
BIV
=
((
r_mu
-
sigma_external
/
sigma
.
loc
[
observation_name
])
**
2
).
values
CIV
=
(((
mean_external
-
mean
.
loc
[
observation_name
])
/
sigma
.
loc
[
observation_name
])
**
2
).
values
BIV
=
((
r_mu
-
sigma_external
/
sigma
.
sel
(
type
=
observation_name
,
drop
=
True
))
**
2
).
values
CIV
=
(((
mean_external
-
mean
.
sel
(
type
=
observation_name
,
drop
=
True
))
/
sigma
.
sel
(
type
=
observation_name
,
drop
=
True
))
**
2
).
values
skill_score
=
np
.
array
((
AI
-
BI
-
CI
-
AIV
+
BIV
+
CIV
)
/
(
1
-
AIV
+
BIV
+
CIV
))
return
pd
.
DataFrame
(
{
"
skill_score
"
:
[
skill_score
],
"
AIV
"
:
[
AIV
],
"
BIV
"
:
[
BIV
],
"
CIV
"
:
CIV
}).
to_xarray
().
to_array
()
...
...
@@ -369,7 +398,7 @@ class SkillScores:
mu
=
data
.
groupby
(
"
index.month
"
).
mean
()
for
month
in
mu
.
month
:
monthly_mean
[
monthly_mean
.
index
.
dt
.
month
==
month
,
:]
=
mu
[
mu
.
month
==
month
].
values
monthly_mean
[
monthly_mean
.
index
.
dt
.
month
==
month
,
:]
=
mu
[
mu
.
month
==
month
].
values
.
flatten
()
return
monthly_mean
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment