Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
ozone-imputation
Manage
Activity
Members
Plan
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Analyze
Contributor analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
esde
machine-learning
ozone-imputation
Commits
d5eaccae
Commit
d5eaccae
authored
3 years ago
by
Clara Betancourt
Browse files
Options
Downloads
Plain Diff
Merge branch 'clara_issue09_gather_materials_for_presentation_before_DLR' into devel
parents
81ca62f6
48664dd8
No related branches found
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
source/experiments/cs_time_resolved_ozone.py
+10
-12
10 additions, 12 deletions
source/experiments/cs_time_resolved_ozone.py
source/visualizations/preanalysis_plots.py
+92
-25
92 additions, 25 deletions
source/visualizations/preanalysis_plots.py
with
102 additions
and
37 deletions
source/experiments/cs_time_resolved_ozone.py
+
10
−
12
View file @
d5eaccae
...
@@ -35,35 +35,33 @@ x_train = data.x[data.train_mask].numpy()
...
@@ -35,35 +35,33 @@ x_train = data.x[data.train_mask].numpy()
y_train
=
data
.
y
[
data
.
train_mask
].
numpy
().
reshape
(
-
1
)
y_train
=
data
.
y
[
data
.
train_mask
].
numpy
().
reshape
(
-
1
)
x_val
=
data
.
x
[
data
.
val_mask
].
numpy
()
x_val
=
data
.
x
[
data
.
val_mask
].
numpy
()
y_val
=
data
.
y
[
data
.
val_mask
].
numpy
().
reshape
(
-
1
)
y_val
=
data
.
y
[
data
.
val_mask
].
numpy
().
reshape
(
-
1
)
# model = pkl.load(open(tro.rf_path, 'rb'))
model
=
pkl
.
load
(
open
(
tro
.
rf_path
,
'
rb
'
))
# pdb.set_trace()
y_val_hat
=
model
.
predict
(
x_val
)
# y_val_hat = model.predict(x_val)
# y_val_hat = np.full_like(y_val, fill_value=np.mean(y_train))
y_val_hat
=
np
.
full_like
(
y_val
,
fill_value
=
np
.
mean
(
y_train
))
rmse
=
(
mean_squared_error
(
y_val
,
y_val_hat
))
**
.
5
rmse
=
(
mean_squared_error
(
y_val
,
y_val_hat
))
**
.
5
r2
=
r2_score
(
y_val
,
y_val_hat
)
r2
=
r2_score
(
y_val
,
y_val_hat
)
print
(
'
======================
'
)
print
(
'
======================
'
)
print
(
'
Baseline results:
'
)
print
(
'
Baseline results:
'
)
print
(
f
'
RMSE:
{
rmse
:
.
2
f
}
, R2:
{
r2
:
.
2
f
}
'
)
print
(
f
'
RMSE:
{
rmse
:
.
3
f
}
, R2:
{
r2
:
.
3
f
}
'
)
(
'
======================
'
)
(
'
======================
'
)
print
(
'
Correct and smooth
'
)
print
(
'
Correct and smooth
'
)
cs
=
CorrectAndSmooth
(
num_correction_layers
=
1
0
,
correction_alpha
=
.
75
,
cs
=
CorrectAndSmooth
(
num_correction_layers
=
2
0
,
correction_alpha
=
.
75
,
num_smoothing_layers
=
1
0
,
smoothing_alpha
=
0.
4
,
num_smoothing_layers
=
2
0
,
smoothing_alpha
=
0.
1
,
autoscale
=
True
)
# autoscale is misleading...
autoscale
=
True
)
# autoscale is misleading...
x
=
data
.
x
.
numpy
()
x
=
data
.
x
.
numpy
()
#
y_hat = model.predict(x)
y_hat
=
model
.
predict
(
x
)
y_hat
=
np
.
full_like
(
data
.
y
.
numpy
(),
fill_value
=
np
.
mean
(
data
.
y
[
data
.
train_mask
].
numpy
()))
#
y_hat = np.full_like(data.y.numpy(), fill_value=np.mean(data.y[data.train_mask].numpy()))
y_hat
=
torch
.
tensor
(
y_hat
,
dtype
=
torch
.
float32
).
view
(
-
1
,
1
)
y_hat
=
torch
.
tensor
(
y_hat
,
dtype
=
torch
.
float32
).
view
(
-
1
,
1
)
y_soft
=
cs
.
correct
(
y_soft
=
y_hat
,
y_true
=
data
.
y
[
data
.
train_mask
],
y_soft
=
cs
.
correct
(
y_soft
=
y_hat
,
y_true
=
data
.
y
[
data
.
train_mask
],
mask
=
data
.
train_mask
,
edge_index
=
data
.
edge_index
,
mask
=
data
.
train_mask
,
edge_index
=
data
.
edge_index
,
edge_weight
=
data
.
edge_weight
)
edge_weight
=
data
.
edge_weight
)
y_val_soft
=
y_soft
[
data
.
val_mask
].
numpy
()
y_val_soft
=
y_soft
[
data
.
val_mask
].
numpy
()
# pdb.set_trace()
rmse
=
(
mean_squared_error
(
y_val
,
y_val_soft
))
**
.
5
rmse
=
(
mean_squared_error
(
y_val
,
y_val_soft
))
**
.
5
r2
=
r2_score
(
y_val
,
y_val_soft
)
r2
=
r2_score
(
y_val
,
y_val_soft
)
print
(
f
'
After correct:
'
)
print
(
f
'
After correct:
'
)
print
(
f
'
RMSE:
{
rmse
:
.
2
f
}
, R2:
{
r2
:
.
2
f
}
'
)
print
(
f
'
RMSE:
{
rmse
:
.
3
f
}
, R2:
{
r2
:
.
3
f
}
'
)
y_soft2
=
cs
.
smooth
(
y_soft
=
y_soft
,
y_true
=
data
.
y
[
data
.
train_mask
],
y_soft2
=
cs
.
smooth
(
y_soft
=
y_soft
,
y_true
=
data
.
y
[
data
.
train_mask
],
mask
=
data
.
train_mask
,
edge_index
=
data
.
edge_index
,
mask
=
data
.
train_mask
,
edge_index
=
data
.
edge_index
,
...
@@ -72,7 +70,7 @@ y_val_soft2 = y_soft2[data.val_mask].numpy()
...
@@ -72,7 +70,7 @@ y_val_soft2 = y_soft2[data.val_mask].numpy()
rmse
=
(
mean_squared_error
(
y_val
,
y_val_soft2
))
**
.
5
rmse
=
(
mean_squared_error
(
y_val
,
y_val_soft2
))
**
.
5
r2
=
r2_score
(
y_val
,
y_val_soft2
)
r2
=
r2_score
(
y_val
,
y_val_soft2
)
print
(
f
'
After smooth:
'
)
print
(
f
'
After smooth:
'
)
print
(
f
'
RMSE:
{
rmse
:
.
2
f
}
, R2:
{
r2
:
.
2
f
}
'
)
print
(
f
'
RMSE:
{
rmse
:
.
3
f
}
, R2:
{
r2
:
.
3
f
}
'
)
exit
()
exit
()
print
(
'
Incoming node degree vs. error in test set:
'
)
print
(
'
Incoming node degree vs. error in test set:
'
)
...
...
This diff is collapsed.
Click to expand it.
source/visualizations/preanalysis_plots.py
+
92
−
25
View file @
d5eaccae
...
@@ -16,8 +16,8 @@ import matplotlib.pyplot as plt
...
@@ -16,8 +16,8 @@ import matplotlib.pyplot as plt
# own
# own
import
settings
import
settings
# from utils import query_db
from
preprocessing.aqbench
import
AQBenchGraph
from
preprocessing.aqbench
import
AQBenchGraph
from
preprocessing.time_resolved
import
TimeResolvedOzone
def
time_series_lenght
():
def
time_series_lenght
():
...
@@ -95,19 +95,34 @@ def missing_values():
...
@@ -95,19 +95,34 @@ def missing_values():
"""
"""
color the time series according to missing values
color the time series according to missing values
"""
"""
# find all time series
print
(
'
missing values...
'
)
file_list
=
[
f
for
f
in
os
.
listdir
(
settings
.
resources_dir
)
if
f
.
startswith
(
'
hourly_
'
)]
# read in data
tro
=
TimeResolvedOzone
()
x_df
=
pd
.
read_csv
(
tro
.
x_path
,
index_col
=
0
)
y_df
=
pd
.
read_csv
(
tro
.
y_path
,
index_col
=
0
)
reg_df
=
pd
.
read_csv
(
tro
.
reg_path
,
index_col
=
0
)
print
(
x_df
.
columns
)
# reshape to 2d field
n_stations
=
len
(
np
.
unique
(
reg_df
.
station_id
))
n_timesteps
=
len
(
np
.
unique
(
reg_df
.
datetime
))
y_2d
=
y_df
.
values
.
reshape
(
n_stations
,
n_timesteps
)
print
(
f
'
stations:
{
n_stations
}
'
)
print
(
f
'
timesteps:
{
n_timesteps
}
'
)
print
(
f
'
min:
{
np
.
nanmin
(
y_2d
)
}
'
)
print
(
f
'
max:
{
np
.
nanmax
(
y_2d
)
}
'
)
print
(
f
'
mean:
{
np
.
nanmean
(
y_2d
)
}
'
)
print
(
f
'
missing:
{
np
.
count_nonzero
(
np
.
isnan
(
y_2d
))
/
(
n_stations
*
n_timesteps
)
*
100
}
'
)
for
f
in
file_list
:
print
(
f
'
Plotting missing values for
{
f
}
...
'
)
# info
# info
var
=
f
.
lstrip
(
'
hourly_
'
).
strip
(
'
.csv
'
)
var
=
'
o3
'
df
=
pd
.
read_csv
(
settings
.
resources_dir
+
f
,
index_col
=
0
)
# plot the data
# plot the data
plt
.
figure
()
z
=
int
(
n_timesteps
/
n_stations
)
plt
.
imshow
(
df
.
values
.
T
,
aspect
=
100
,
interpolation
=
'
none
'
)
plt
.
figure
(
figsize
=
(
z
*
3
,
3
))
plt
.
imshow
(
y_2d
[
0
:
n_stations
,
0
:
z
*
n_stations
],
interpolation
=
'
none
'
)
plt
.
yticks
([])
plt
.
yticks
([])
plt
.
xticks
([])
plt
.
xticks
([])
ax
=
plt
.
gca
()
ax
=
plt
.
gca
()
...
@@ -116,9 +131,59 @@ def missing_values():
...
@@ -116,9 +131,59 @@ def missing_values():
ax
.
spines
[
'
top
'
].
set_visible
(
False
)
ax
.
spines
[
'
top
'
].
set_visible
(
False
)
ax
.
spines
[
'
bottom
'
].
set_visible
(
False
)
ax
.
spines
[
'
bottom
'
].
set_visible
(
False
)
plt
.
savefig
(
settings
.
output_dir
+
f
'
missing_
{
var
}
.pdf
'
)
path
=
f
'
{
settings
.
output_dir
}
missing_
{
var
}
.png
'
plt
.
savefig
(
path
,
dpi
=
250
,
bbox_inches
=
'
tight
'
,
pad_inches
=
0
)
print
(
f
'
saved to
{
path
}
'
)
plt
.
close
()
plt
.
close
()
bins
=
np
.
arange
(
-
5
,
120
,
5
,
dtype
=
int
)
plt
.
hist
(
y_df
.
values
,
bins
=
bins
,
log
=
True
)
plt
.
grid
()
path
=
f
'
{
settings
.
output_dir
}
hist_
{
var
}
.png
'
plt
.
savefig
(
path
)
print
(
f
'
saved to
{
path
}
'
)
def
visualize_masks
():
"""
Showing the data split.
"""
# read in data
tro
=
TimeResolvedOzone
()
mask_df
=
pd
.
read_csv
(
tro
.
mask_path
,
index_col
=
0
)
reg_df
=
pd
.
read_csv
(
tro
.
reg_path
,
index_col
=
0
)
# prepare data
n_stations
=
len
(
np
.
unique
(
reg_df
.
station_id
))
n_timesteps
=
len
(
np
.
unique
(
reg_df
.
datetime
))
missing_o3_mask
=
mask_df
.
missing_o3_mask
.
values
.
reshape
(
n_stations
,
n_timesteps
)
val_mask
=
mask_df
.
val_mask
.
values
.
reshape
(
n_stations
,
n_timesteps
)
test_mask
=
mask_df
.
test_mask
.
values
.
reshape
(
n_stations
,
n_timesteps
)
data
=
np
.
zeros
((
n_stations
,
n_timesteps
))
data
[
missing_o3_mask
]
=
3.
data
[
val_mask
]
=
2.
data
[
test_mask
]
=
1.
# plot
z
=
int
(
n_timesteps
/
n_stations
)
plt
.
figure
(
figsize
=
(
z
*
3
,
3
))
plt
.
imshow
(
data
[
0
:
n_stations
,
0
:
z
*
n_stations
],
interpolation
=
'
none
'
,
cmap
=
'
Accent
'
)
plt
.
yticks
([])
plt
.
xticks
([])
ax
=
plt
.
gca
()
ax
.
spines
[
'
right
'
].
set_visible
(
False
)
ax
.
spines
[
'
left
'
].
set_visible
(
False
)
ax
.
spines
[
'
top
'
].
set_visible
(
False
)
ax
.
spines
[
'
bottom
'
].
set_visible
(
False
)
# save
path
=
f
'
{
settings
.
output_dir
}
masks.png
'
plt
.
savefig
(
path
,
dpi
=
250
,
bbox_inches
=
'
tight
'
,
pad_inches
=
0
)
print
(
f
'
saved to
{
path
}
'
)
plt
.
close
()
def
visualize_graph
():
def
visualize_graph
():
"""
"""
...
@@ -160,10 +225,12 @@ if __name__ == '__main__':
...
@@ -160,10 +225,12 @@ if __name__ == '__main__':
"""
"""
time_series_lenght_
=
False
time_series_lenght_
=
False
missing_values_
=
True
missing_values_
=
True
visualize_masks_
=
False
visualize_graph_
=
False
visualize_graph_
=
False
if
time_series_lenght_
:
time_series_lenght
()
if
time_series_lenght_
:
time_series_lenght
()
if
missing_values_
:
missing_values
()
if
missing_values_
:
missing_values
()
if
visualize_masks_
:
visualize_masks
()
if
visualize_graph_
:
visualize_graph
()
if
visualize_graph_
:
visualize_graph
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment