Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
MLAir
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
esde
machine-learning
MLAir
Merge requests
!318
Resolve "release v1.4.0"
Code
Review changes
Check out branch
Download
Patches
Plain diff
Merged
Resolve "release v1.4.0"
release_v1.4.0
into
master
Overview
0
Commits
229
Pipelines
3
Changes
4
Merged
Ghost User
requested to merge
release_v1.4.0
into
master
3 years ago
Overview
0
Commits
229
Pipelines
3
Changes
4
Expand
Closes
#317 (closed)
Edited
3 years ago
by
Ghost User
0
0
Merge request reports
Viewing commit
54c5f1c1
Prev
Next
Show latest version
4 files
+
285
−
103
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
Files
4
Search (e.g. *.vue) (Ctrl+P)
54c5f1c1
added new bootstrap method "zero mean" and type "single input" and "variable"
· 54c5f1c1
leufen1
authored
3 years ago
mlair/data_handler/bootstraps.py
+
136
−
36
Options
@@ -15,69 +15,156 @@ __date__ = '2020-02-07'
import
os
from
collections
import
Iterator
,
Iterable
from
itertools
import
chain
from
typing
import
Union
,
List
import
numpy
as
np
import
xarray
as
xr
from
mlair.data_handler.abstract_data_handler
import
AbstractDataHandler
from
mlair.helpers.helpers
import
to_list
class
BootstrapIterator
(
Iterator
):
_position
:
int
=
None
def
__init__
(
self
,
data
:
"
BootStraps
"
):
def
__init__
(
self
,
data
:
"
BootStraps
"
,
method
):
assert
isinstance
(
data
,
BootStraps
)
self
.
_data
=
data
self
.
_dimension
=
data
.
bootstrap_dimension
self
.
_collection
=
self
.
_data
.
bootstraps
()
self
.
boot_dim
=
"
boots
"
self
.
_method
=
method
self
.
_collection
=
self
.
create_collection
(
self
.
_data
.
data
,
self
.
_dimension
)
self
.
_position
=
0
def
__next__
(
self
):
"""
Return next element or stop iteration.
"""
raise
NotImplementedError
@classmethod
def
create_collection
(
cls
,
data
,
dim
):
raise
NotImplementedError
def
_reshape
(
self
,
d
):
if
isinstance
(
d
,
list
):
return
list
(
map
(
lambda
x
:
self
.
_reshape
(
x
),
d
))
# return list(map(lambda x: np.rollaxis(x, -1, 0).reshape(x.shape[0] * x.shape[-1], *x.shape[1:-1]), d))
else
:
shape
=
d
.
shape
return
np
.
rollaxis
(
d
,
-
1
,
0
).
reshape
(
shape
[
0
]
*
shape
[
-
1
],
*
shape
[
1
:
-
1
])
def
_to_numpy
(
self
,
d
):
if
isinstance
(
d
,
list
):
return
list
(
map
(
lambda
x
:
self
.
_to_numpy
(
x
),
d
))
else
:
return
d
.
values
def
apply_bootstrap_method
(
self
,
data
:
np
.
ndarray
)
->
Union
[
np
.
ndarray
,
List
[
np
.
ndarray
]]:
"""
Apply predefined bootstrap method from given data.
:param data: data to apply bootstrap method on
:return: processed data as numpy array
"""
if
isinstance
(
data
,
list
):
return
list
(
map
(
lambda
x
:
self
.
apply_bootstrap_method
(
x
.
values
),
data
))
else
:
return
self
.
_method
.
apply
(
data
)
class
BootstrapIteratorSingleInput
(
BootstrapIterator
):
_position
:
int
=
None
def
__init__
(
self
,
*
args
):
super
().
__init__
(
*
args
)
def
__next__
(
self
):
"""
Return next element or stop iteration.
"""
try
:
index
,
dimension
=
self
.
_collection
[
self
.
_position
]
nboot
=
self
.
_data
.
number_of_bootstraps
_X
,
_Y
=
self
.
_data
.
data
.
get_data
(
as_numpy
=
False
)
_X
=
list
(
map
(
lambda
x
:
x
.
expand_dims
({
'
boots
'
:
range
(
nboot
)},
axis
=-
1
),
_X
))
_Y
=
_Y
.
expand_dims
({
"
boots
"
:
range
(
nboot
)},
axis
=-
1
)
_X
=
list
(
map
(
lambda
x
:
x
.
expand_dims
({
self
.
boot_dim
:
range
(
nboot
)},
axis
=-
1
),
_X
))
_Y
=
_Y
.
expand_dims
({
self
.
boot_dim
:
range
(
nboot
)},
axis
=-
1
)
single_variable
=
_X
[
index
].
sel
({
self
.
_dimension
:
[
dimension
]})
shuffled_variable
=
self
.
shuffle
(
single_variable
.
values
)
shuffled_data
=
xr
.
DataArray
(
shuffled_variable
,
coords
=
single_variable
.
coords
,
dims
=
single_variable
.
dims
)
_X
[
index
]
=
shuffled_data
.
combine_first
(
_X
[
index
]).
reindex_like
(
_X
[
index
])
bootstrapped_variable
=
self
.
apply_bootstrap_method
(
single_variable
.
values
)
bootstrapped_data
=
xr
.
DataArray
(
bootstrapped_variable
,
coords
=
single_variable
.
coords
,
dims
=
single_variable
.
dims
)
_X
[
index
]
=
bootstrapped_data
.
combine_first
(
_X
[
index
]).
reindex_like
(
_X
[
index
])
self
.
_position
+=
1
except
IndexError
:
raise
StopIteration
()
_X
,
_Y
=
self
.
_to_numpy
(
_X
),
self
.
_to_numpy
(
_Y
)
return
self
.
_reshape
(
_X
),
self
.
_reshape
(
_Y
),
(
index
,
dimension
)
@staticmethod
def
_reshape
(
d
):
if
isinstance
(
d
,
list
):
return
list
(
map
(
lambda
x
:
np
.
rollaxis
(
x
,
-
1
,
0
).
reshape
(
x
.
shape
[
0
]
*
x
.
shape
[
-
1
],
*
x
.
shape
[
1
:
-
1
]),
d
))
else
:
shape
=
d
.
shape
return
np
.
rollaxis
(
d
,
-
1
,
0
).
reshape
(
shape
[
0
]
*
shape
[
-
1
],
*
shape
[
1
:
-
1
])
@classmethod
def
create_collection
(
cls
,
data
,
dim
):
l
=
[]
for
i
,
x
in
enumerate
(
data
.
get_X
(
as_numpy
=
False
)):
l
.
append
(
list
(
map
(
lambda
y
:
(
i
,
y
),
x
.
indexes
[
dim
])))
return
list
(
chain
(
*
l
))
@staticmethod
def
_to_numpy
(
d
):
if
isinstance
(
d
,
list
):
return
list
(
map
(
lambda
x
:
x
.
values
,
d
))
else
:
return
d
.
values
@staticmethod
def
shuffle
(
data
:
np
.
ndarray
)
->
np
.
ndarray
:
"""
Shuffle randomly from given data (draw elements with replacement).
class
BootstrapIteratorVariable
(
BootstrapIterator
):
:param data: data to shuffle
:return: shuffled data as numpy array
"""
def
__init__
(
self
,
*
args
):
super
().
__init__
(
*
args
)
def
__next__
(
self
):
"""
Return next element or stop iteration.
"""
try
:
dimension
=
self
.
_collection
[
self
.
_position
]
nboot
=
self
.
_data
.
number_of_bootstraps
_X
,
_Y
=
self
.
_data
.
data
.
get_data
(
as_numpy
=
False
)
_X
=
list
(
map
(
lambda
x
:
x
.
expand_dims
({
self
.
boot_dim
:
range
(
nboot
)},
axis
=-
1
),
_X
))
_Y
=
_Y
.
expand_dims
({
self
.
boot_dim
:
range
(
nboot
)},
axis
=-
1
)
for
index
in
range
(
len
(
_X
)):
single_variable
=
_X
[
index
].
sel
({
self
.
_dimension
:
[
dimension
]})
bootstrapped_variable
=
self
.
apply_bootstrap_method
(
single_variable
.
values
)
bootstrapped_data
=
xr
.
DataArray
(
bootstrapped_variable
,
coords
=
single_variable
.
coords
,
dims
=
single_variable
.
dims
)
_X
[
index
]
=
bootstrapped_data
.
combine_first
(
_X
[
index
]).
transpose
(
*
_X
[
index
].
dims
)
self
.
_position
+=
1
except
IndexError
:
raise
StopIteration
()
_X
,
_Y
=
self
.
_to_numpy
(
_X
),
self
.
_to_numpy
(
_Y
)
return
self
.
_reshape
(
_X
),
self
.
_reshape
(
_Y
),
(
None
,
dimension
)
@classmethod
def
create_collection
(
cls
,
data
,
dim
):
l
=
set
()
for
i
,
x
in
enumerate
(
data
.
get_X
(
as_numpy
=
False
)):
l
.
update
(
x
.
indexes
[
dim
].
to_list
())
return
to_list
(
l
)
class
BootstrapIteratorBranch
(
BootstrapIterator
):
def
__init__
(
self
,
*
args
):
super
().
__init__
(
*
args
)
def
__next__
(
self
):
pass
# TODO: implement here: permute entire branch at once
class
ShuffleBootstraps
:
@staticmethod
def
apply
(
data
):
size
=
data
.
shape
return
np
.
random
.
choice
(
data
.
reshape
(
-
1
,
),
size
=
size
)
class
MeanBootstraps
:
def
__init__
(
self
,
mean
):
self
.
_mean
=
mean
def
apply
(
self
,
data
):
return
np
.
ones_like
(
data
)
*
self
.
_mean
class
BootStraps
(
Iterable
):
"""
Main class to perform bootstrap operations.
@@ -89,10 +176,19 @@ class BootStraps(Iterable):
this variable). The tuple is interesting if X consists on mutliple input streams X_i (e.g. two or more stations)
because it shows which variable of which input X_i has been bootstrapped. All bootstrap combinations can be
retrieved by calling the .bootstraps() method. Further more, by calling the .get_orig_prediction() this class
imitates according to the set number of bootstraps the original prediction
imitates according to the set number of bootstraps the original prediction.
As bootstrap method, this class can currently make use of the ShuffleBoostraps class that uses drawing with
replacement to destroy the variables information by keeping its statistical properties. Use `bootstrap=
"
shuffle
"
` to
call this method. Another method is the zero mean bootstrapping triggered by `bootstrap=
"
zero_mean
"
` and performed
by the MeanBootstraps class. This method destroy the variable
'
s information by a mode collapse to constant value of
zero. In case, the variable is normalized with a zero mean, this is equivalent to a mode collapse to the variable
'
s
mean value. Statistics in general are not conserved in this case, but the mean value of course. A custom mean value
for bootstrapping is currently not supported.
"""
def
__init__
(
self
,
data
:
AbstractDataHandler
,
number_of_bootstraps
:
int
=
10
,
bootstrap_dimension
:
str
=
"
variables
"
):
bootstrap_dimension
:
str
=
"
variables
"
,
bootstrap_type
=
"
singleinput
"
,
bootstrap_method
=
"
shuffle
"
):
"""
Create iterable class to be ready to iter.
@@ -100,20 +196,24 @@ class BootStraps(Iterable):
:param number_of_bootstraps: the number of bootstrap realisations
"""
self
.
data
=
data
self
.
number_of_bootstraps
=
number_of_bootstraps
self
.
number_of_bootstraps
=
number_of_bootstraps
if
bootstrap_method
==
"
shuffle
"
else
1
self
.
bootstrap_dimension
=
bootstrap_dimension
self
.
bootstrap_method
=
{
"
shuffle
"
:
ShuffleBootstraps
(),
"
zero_mean
"
:
MeanBootstraps
(
mean
=
0
)}.
get
(
bootstrap_method
)
# todo adjust number of bootstraps if mean bootstrapping
self
.
BootstrapIterator
=
{
"
singleinput
"
:
BootstrapIteratorSingleInput
,
"
branch
"
:
BootstrapIteratorBranch
,
"
variable
"
:
BootstrapIteratorVariable
}.
get
(
bootstrap_type
,
BootstrapIteratorSingleInput
)
def
__iter__
(
self
):
return
BootstrapIterator
(
self
)
return
self
.
BootstrapIterator
(
self
,
self
.
bootstrap_method
)
def
__len__
(
self
):
return
len
(
self
.
bootstraps
(
))
return
len
(
self
.
BootstrapIterator
.
create_collection
(
self
.
data
,
self
.
bootstrap_dimension
))
def
bootstraps
(
self
):
l
=
[]
for
i
,
x
in
enumerate
(
self
.
data
.
get_X
(
as_numpy
=
False
)):
l
.
append
(
list
(
map
(
lambda
y
:
(
i
,
y
),
x
.
indexes
[
'
variables
'
])))
return
list
(
chain
(
*
l
))
return
self
.
BootstrapIterator
.
create_collection
(
self
.
data
,
self
.
bootstrap_dimension
)
def
get_orig_prediction
(
self
,
path
:
str
,
file_name
:
str
,
prediction_name
:
str
=
"
CNN
"
)
->
np
.
ndarray
:
"""
Loading