Skip to content
Snippets Groups Projects
Commit d5eaccae authored by Clara Betancourt's avatar Clara Betancourt
Browse files

Merge branch 'clara_issue09_gather_materials_for_presentation_before_DLR' into devel

parents 81ca62f6 48664dd8
No related branches found
No related tags found
No related merge requests found
...@@ -35,35 +35,33 @@ x_train = data.x[data.train_mask].numpy() ...@@ -35,35 +35,33 @@ x_train = data.x[data.train_mask].numpy()
y_train = data.y[data.train_mask].numpy().reshape(-1) y_train = data.y[data.train_mask].numpy().reshape(-1)
x_val = data.x[data.val_mask].numpy() x_val = data.x[data.val_mask].numpy()
y_val = data.y[data.val_mask].numpy().reshape(-1) y_val = data.y[data.val_mask].numpy().reshape(-1)
# model = pkl.load(open(tro.rf_path, 'rb')) model = pkl.load(open(tro.rf_path, 'rb'))
# pdb.set_trace() y_val_hat = model.predict(x_val)
# y_val_hat = model.predict(x_val) # y_val_hat = np.full_like(y_val, fill_value=np.mean(y_train))
y_val_hat = np.full_like(y_val, fill_value=np.mean(y_train))
rmse = (mean_squared_error(y_val, y_val_hat))**.5 rmse = (mean_squared_error(y_val, y_val_hat))**.5
r2 = r2_score(y_val, y_val_hat) r2 = r2_score(y_val, y_val_hat)
print('======================') print('======================')
print('Baseline results:') print('Baseline results:')
print(f'RMSE: {rmse:.2f}, R2: {r2:.2f}') print(f'RMSE: {rmse:.3f}, R2: {r2:.3f}')
('======================') ('======================')
print('Correct and smooth') print('Correct and smooth')
cs = CorrectAndSmooth(num_correction_layers=10, correction_alpha=.75, cs = CorrectAndSmooth(num_correction_layers=20, correction_alpha=.75,
num_smoothing_layers=10, smoothing_alpha=0.4, num_smoothing_layers=20, smoothing_alpha=0.1,
autoscale=True) # autoscale is misleading... autoscale=True) # autoscale is misleading...
x = data.x.numpy() x = data.x.numpy()
# y_hat = model.predict(x) y_hat = model.predict(x)
y_hat = np.full_like(data.y.numpy(), fill_value=np.mean(data.y[data.train_mask].numpy())) # y_hat = np.full_like(data.y.numpy(), fill_value=np.mean(data.y[data.train_mask].numpy()))
y_hat = torch.tensor(y_hat, dtype=torch.float32).view(-1, 1) y_hat = torch.tensor(y_hat, dtype=torch.float32).view(-1, 1)
y_soft = cs.correct(y_soft=y_hat, y_true=data.y[data.train_mask], y_soft = cs.correct(y_soft=y_hat, y_true=data.y[data.train_mask],
mask=data.train_mask, edge_index=data.edge_index, mask=data.train_mask, edge_index=data.edge_index,
edge_weight=data.edge_weight) edge_weight=data.edge_weight)
y_val_soft = y_soft[data.val_mask].numpy() y_val_soft = y_soft[data.val_mask].numpy()
# pdb.set_trace()
rmse = (mean_squared_error(y_val, y_val_soft))**.5 rmse = (mean_squared_error(y_val, y_val_soft))**.5
r2 = r2_score(y_val, y_val_soft) r2 = r2_score(y_val, y_val_soft)
print(f'After correct:') print(f'After correct:')
print(f'RMSE: {rmse:.2f}, R2: {r2:.2f}') print(f'RMSE: {rmse:.3f}, R2: {r2:.3f}')
y_soft2 = cs.smooth(y_soft=y_soft, y_true=data.y[data.train_mask], y_soft2 = cs.smooth(y_soft=y_soft, y_true=data.y[data.train_mask],
mask=data.train_mask, edge_index=data.edge_index, mask=data.train_mask, edge_index=data.edge_index,
...@@ -72,7 +70,7 @@ y_val_soft2 = y_soft2[data.val_mask].numpy() ...@@ -72,7 +70,7 @@ y_val_soft2 = y_soft2[data.val_mask].numpy()
rmse = (mean_squared_error(y_val, y_val_soft2))**.5 rmse = (mean_squared_error(y_val, y_val_soft2))**.5
r2 = r2_score(y_val, y_val_soft2) r2 = r2_score(y_val, y_val_soft2)
print(f'After smooth:') print(f'After smooth:')
print(f'RMSE: {rmse:.2f}, R2: {r2:.2f}') print(f'RMSE: {rmse:.3f}, R2: {r2:.3f}')
exit() exit()
print('Incoming node degree vs. error in test set:') print('Incoming node degree vs. error in test set:')
......
...@@ -16,8 +16,8 @@ import matplotlib.pyplot as plt ...@@ -16,8 +16,8 @@ import matplotlib.pyplot as plt
# own # own
import settings import settings
# from utils import query_db
from preprocessing.aqbench import AQBenchGraph from preprocessing.aqbench import AQBenchGraph
from preprocessing.time_resolved import TimeResolvedOzone
def time_series_lenght(): def time_series_lenght():
...@@ -95,19 +95,34 @@ def missing_values(): ...@@ -95,19 +95,34 @@ def missing_values():
""" """
color the time series according to missing values color the time series according to missing values
""" """
# find all time series print('missing values...')
file_list = [f for f in os.listdir(settings.resources_dir)
if f.startswith('hourly_')] # read in data
tro = TimeResolvedOzone()
x_df = pd.read_csv(tro.x_path, index_col=0)
y_df = pd.read_csv(tro.y_path, index_col=0)
reg_df = pd.read_csv(tro.reg_path, index_col=0)
print(x_df.columns)
# reshape to 2d field
n_stations = len(np.unique(reg_df.station_id))
n_timesteps = len(np.unique(reg_df.datetime))
y_2d = y_df.values.reshape(n_stations, n_timesteps)
print(f'stations: {n_stations}')
print(f'timesteps: {n_timesteps}')
print(f'min: {np.nanmin(y_2d)}')
print(f'max: {np.nanmax(y_2d)}')
print(f'mean: {np.nanmean(y_2d)}')
print(f'missing: {np.count_nonzero(np.isnan(y_2d))/(n_stations*n_timesteps)*100}')
for f in file_list:
print(f'Plotting missing values for {f}...')
# info # info
var = f.lstrip('hourly_').strip('.csv') var = 'o3'
df = pd.read_csv(settings.resources_dir+f, index_col=0)
# plot the data # plot the data
plt.figure() z = int(n_timesteps/n_stations)
plt.imshow(df.values.T, aspect=100, interpolation='none') plt.figure(figsize=(z*3, 3))
plt.imshow(y_2d[0:n_stations,0:z*n_stations], interpolation='none')
plt.yticks([]) plt.yticks([])
plt.xticks([]) plt.xticks([])
ax = plt.gca() ax = plt.gca()
...@@ -116,9 +131,59 @@ def missing_values(): ...@@ -116,9 +131,59 @@ def missing_values():
ax.spines['top'].set_visible(False) ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False) ax.spines['bottom'].set_visible(False)
plt.savefig(settings.output_dir+f'missing_{var}.pdf') path = f'{settings.output_dir}missing_{var}.png'
plt.savefig(path, dpi=250, bbox_inches='tight', pad_inches=0)
print(f'saved to {path}')
plt.close() plt.close()
bins = np.arange(-5, 120, 5, dtype=int)
plt.hist(y_df.values, bins=bins, log=True)
plt.grid()
path = f'{settings.output_dir}hist_{var}.png'
plt.savefig(path)
print(f'saved to {path}')
def visualize_masks():
"""
Showing the data split.
"""
# read in data
tro = TimeResolvedOzone()
mask_df = pd.read_csv(tro.mask_path, index_col=0)
reg_df = pd.read_csv(tro.reg_path, index_col=0)
# prepare data
n_stations = len(np.unique(reg_df.station_id))
n_timesteps = len(np.unique(reg_df.datetime))
missing_o3_mask = mask_df.missing_o3_mask.values.reshape(n_stations, n_timesteps)
val_mask = mask_df.val_mask.values.reshape(n_stations, n_timesteps)
test_mask = mask_df.test_mask.values.reshape(n_stations, n_timesteps)
data = np.zeros((n_stations, n_timesteps))
data[missing_o3_mask] = 3.
data[val_mask] = 2.
data[test_mask] = 1.
# plot
z = int(n_timesteps/n_stations)
plt.figure(figsize=(z*3, 3))
plt.imshow(data[0:n_stations,0:z*n_stations], interpolation='none',
cmap='Accent')
plt.yticks([])
plt.xticks([])
ax = plt.gca()
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
# save
path = f'{settings.output_dir}masks.png'
plt.savefig(path, dpi=250, bbox_inches='tight', pad_inches=0)
print(f'saved to {path}')
plt.close()
def visualize_graph(): def visualize_graph():
""" """
...@@ -160,10 +225,12 @@ if __name__ == '__main__': ...@@ -160,10 +225,12 @@ if __name__ == '__main__':
""" """
time_series_lenght_ = False time_series_lenght_ = False
missing_values_ = True missing_values_ = True
visualize_masks_ = False
visualize_graph_ = False visualize_graph_ = False
if time_series_lenght_: time_series_lenght() if time_series_lenght_: time_series_lenght()
if missing_values_: missing_values() if missing_values_: missing_values()
if visualize_masks_: visualize_masks()
if visualize_graph_: visualize_graph() if visualize_graph_: visualize_graph()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment