Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
MLAir
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
esde
machine-learning
MLAir
Commits
3dd830af
Commit
3dd830af
authored
5 years ago
by
lukas leufen
Browse files
Options
Downloads
Patches
Plain Diff
more tests and little doc
parent
b7bdbd4b
No related branches found
No related tags found
3 merge requests
!59
Develop
,
!52
implemented bootstraps
,
!34
Pair issue048 feat create shuffled data
Pipeline
#29244
passed
5 years ago
Stage: test
Stage: pages
Stage: deploy
Changes
2
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/data_handling/bootstraps.py
+30
-13
30 additions, 13 deletions
src/data_handling/bootstraps.py
test/test_data_handling/test_bootstraps.py
+23
-5
23 additions, 5 deletions
test/test_data_handling/test_bootstraps.py
with
53 additions
and
18 deletions
src/data_handling/bootstraps.py
+
30
−
13
View file @
3dd830af
...
...
@@ -22,17 +22,22 @@ class BootStraps(RunEnvironment):
self
.
create_shuffled_data
()
def
create_shuffled_data
(
self
):
"""
Create shuffled data. Use original test data, add dimension
'
boots
'
with length number of bootstraps and insert
randomly selected variables. If there is a suitable local file for requested window size and number of
bootstraps, no additional file will be created inside this function.
"""
variables_str
=
'
_
'
.
join
(
sorted
(
self
.
test_data
.
variables
))
window
=
self
.
test_data
.
window_history_size
for
station
in
self
.
test_data
.
stations
:
valid
,
max_
nboot
=
self
.
valid_bootstrap_file
(
station
,
variables_str
,
window
)
valid
,
nboot
=
self
.
valid_bootstrap_file
(
station
,
variables_str
,
window
)
if
not
valid
:
logging
.
info
(
f
'
create bootstap data for
{
station
}
'
)
hist
,
_
=
self
.
test_data
[
station
]
data
=
hist
.
copy
()
file_name
=
f
"
{
station
}
_
{
variables_str
}
_hist
{
window
}
_nboots
{
max_
nboot
}
_shuffled.nc
"
file_name
=
f
"
{
station
}
_
{
variables_str
}
_hist
{
window
}
_nboots
{
nboot
}
_shuffled.nc
"
file_path
=
os
.
path
.
join
(
self
.
bootstrap_path
,
file_name
)
data
=
data
.
expand_dims
({
'
boots
'
:
range
(
max_
nboot
)},
axis
=-
1
)
data
=
data
.
expand_dims
({
'
boots
'
:
range
(
nboot
)},
axis
=-
1
)
shuffled_variable
=
np
.
full
(
data
.
shape
,
np
.
nan
)
for
i
,
var
in
enumerate
(
data
.
coords
[
'
variables
'
]):
single_variable
=
data
.
sel
(
variables
=
var
).
values
...
...
@@ -41,25 +46,37 @@ class BootStraps(RunEnvironment):
shuffled_data
.
to_netcdf
(
file_path
)
def
valid_bootstrap_file
(
self
,
station
,
variables
,
window
):
str_re
=
re
.
compile
(
f
"
{
station
}
_
{
variables
}
_hist(\d+)_nboots(\d+)_shuffled*
"
)
dir_list
=
os
.
listdir
(
self
.
bootstrap_path
)
"""
Compare local bootstrap file with given settings for station, variables, window and number of bootstraps. If a
match was found, this method returns a tuple (True, None). In any other case, it returns (False, max_nboot),
where max_nboot is the highest boot number found in the local storage. A match is defined so that the window
length is ge than given window size form args and the number of boots is also ge than the given number of boots
from this class. Furthermore, this functions deletes local files, if the match the station pattern but don
'
t fit
the window and bootstrap condition. This is performed, because it is assumed, that the corresponding file will
be created with a longer or at least same window size and numbers of bootstraps.
:param station:
:param variables:
:param window:
:return:
"""
regex
=
re
.
compile
(
rf
"
{
station
}
_
{
variables
}
_hist(\d+)_nboots(\d+)_shuffled*
"
)
max_nboot
=
self
.
number_bootstraps
for
file
in
dir_list
:
match
=
str_re
.
match
(
file
)
for
file
in
os
.
listdir
(
self
.
bootstrap_path
)
:
match
=
regex
.
match
(
file
)
if
match
:
window_
existing
=
int
(
match
.
group
(
1
))
nboot_
existing
=
int
(
match
.
group
(
2
))
max_nboot
=
max
([
max_nboot
,
nboot_
existing
])
if
(
window_
existing
>=
window
)
and
(
nboot_
existing
>=
self
.
number_bootstraps
):
window_
file
=
int
(
match
.
group
(
1
))
nboot_
file
=
int
(
match
.
group
(
2
))
max_nboot
=
max
([
max_nboot
,
nboot_
file
])
if
(
window_
file
>=
window
)
and
(
nboot_
file
>=
self
.
number_bootstraps
):
return
True
,
None
else
:
os
.
remove
(
os
.
path
.
join
(
self
.
bootstrap_path
,
file
))
return
False
,
max_nboot
def
shuffle_single_variable
(
self
,
data
):
@staticmethod
def
shuffle_single_variable
(
data
:
np
.
ndarray
)
->
np
.
ndarray
:
orig_shape
=
data
.
shape
size
=
orig_shape
# size = (*orig_shape, self.number_bootstraps)
return
np
.
random
.
choice
(
data
.
reshape
(
-
1
,),
size
=
size
)
...
...
This diff is collapsed.
Click to expand it.
test/test_data_handling/test_bootstraps.py
+
23
−
5
View file @
3dd830af
...
...
@@ -4,20 +4,27 @@ from src.data_handling.bootstraps import BootStraps
import
pytest
import
os
import
numpy
as
np
class
TestBootstraps
:
@pytest.fixture
def
boot_no_init
(
self
):
def
path
(
self
):
path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"
data
"
)
if
not
os
.
path
.
exists
(
path
):
os
.
makedirs
(
path
)
return
path
@pytest.fixture
def
boot_no_init
(
self
,
path
):
obj
=
object
.
__new__
(
BootStraps
)
super
(
BootStraps
,
obj
).
__init__
()
obj
.
number_bootstraps
=
50
obj
.
bootstrap_path
=
path
return
obj
def
test_valid_bootstrap_file
(
self
,
boot_no_init
):
path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"
data
"
)
os
.
makedirs
(
path
)
boot_no_init
.
bootstrap_path
=
path
def
test_valid_bootstrap_file
(
self
,
path
,
boot_no_init
):
station
=
"
TESTSTATION
"
variables
=
"
var1_var2_var3
"
window
=
5
...
...
@@ -44,3 +51,14 @@ class TestBootstraps:
os
.
mknod
(
os
.
path
.
join
(
path
,
f
"
{
station
}
_
{
variables
}
_hist5_nboots60_shuffled.dat
"
))
boot_no_init
.
number_bootstraps
=
50
assert
boot_no_init
.
valid_bootstrap_file
(
station
,
variables
,
20
)
==
(
False
,
60
)
def
test_shuffle_single_variale
(
self
,
boot_no_init
):
data
=
np
.
array
([[
1
,
2
,
3
],
[
1
,
2
,
3
],
[
1
,
2
,
3
],
[
1
,
2
,
3
]])
res
=
boot_no_init
.
shuffle_single_variable
(
data
)
assert
res
.
shape
==
data
.
shape
assert
res
.
max
()
==
data
.
max
()
assert
res
.
min
()
==
data
.
min
()
assert
set
(
np
.
unique
(
res
)).
issubset
({
1
,
2
,
3
})
def
test_create_shuffled_data
(
self
):
pass
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment