Skip to content
Snippets Groups Projects
Commit 48664dd8 authored by Clara Betancourt's avatar Clara Betancourt
Browse files

meeting finished

parent 81ca62f6
No related branches found
No related tags found
No related merge requests found
...@@ -35,35 +35,33 @@ x_train = data.x[data.train_mask].numpy() ...@@ -35,35 +35,33 @@ x_train = data.x[data.train_mask].numpy()
y_train = data.y[data.train_mask].numpy().reshape(-1) y_train = data.y[data.train_mask].numpy().reshape(-1)
x_val = data.x[data.val_mask].numpy() x_val = data.x[data.val_mask].numpy()
y_val = data.y[data.val_mask].numpy().reshape(-1) y_val = data.y[data.val_mask].numpy().reshape(-1)
# model = pkl.load(open(tro.rf_path, 'rb')) model = pkl.load(open(tro.rf_path, 'rb'))
# pdb.set_trace() y_val_hat = model.predict(x_val)
# y_val_hat = model.predict(x_val) # y_val_hat = np.full_like(y_val, fill_value=np.mean(y_train))
y_val_hat = np.full_like(y_val, fill_value=np.mean(y_train))
rmse = (mean_squared_error(y_val, y_val_hat))**.5 rmse = (mean_squared_error(y_val, y_val_hat))**.5
r2 = r2_score(y_val, y_val_hat) r2 = r2_score(y_val, y_val_hat)
print('======================') print('======================')
print('Baseline results:') print('Baseline results:')
print(f'RMSE: {rmse:.2f}, R2: {r2:.2f}') print(f'RMSE: {rmse:.3f}, R2: {r2:.3f}')
('======================') ('======================')
print('Correct and smooth') print('Correct and smooth')
cs = CorrectAndSmooth(num_correction_layers=10, correction_alpha=.75, cs = CorrectAndSmooth(num_correction_layers=20, correction_alpha=.75,
num_smoothing_layers=10, smoothing_alpha=0.4, num_smoothing_layers=20, smoothing_alpha=0.1,
autoscale=True) # autoscale is misleading... autoscale=True) # autoscale is misleading...
x = data.x.numpy() x = data.x.numpy()
# y_hat = model.predict(x) y_hat = model.predict(x)
y_hat = np.full_like(data.y.numpy(), fill_value=np.mean(data.y[data.train_mask].numpy())) # y_hat = np.full_like(data.y.numpy(), fill_value=np.mean(data.y[data.train_mask].numpy()))
y_hat = torch.tensor(y_hat, dtype=torch.float32).view(-1, 1) y_hat = torch.tensor(y_hat, dtype=torch.float32).view(-1, 1)
y_soft = cs.correct(y_soft=y_hat, y_true=data.y[data.train_mask], y_soft = cs.correct(y_soft=y_hat, y_true=data.y[data.train_mask],
mask=data.train_mask, edge_index=data.edge_index, mask=data.train_mask, edge_index=data.edge_index,
edge_weight=data.edge_weight) edge_weight=data.edge_weight)
y_val_soft = y_soft[data.val_mask].numpy() y_val_soft = y_soft[data.val_mask].numpy()
# pdb.set_trace()
rmse = (mean_squared_error(y_val, y_val_soft))**.5 rmse = (mean_squared_error(y_val, y_val_soft))**.5
r2 = r2_score(y_val, y_val_soft) r2 = r2_score(y_val, y_val_soft)
print(f'After correct:') print(f'After correct:')
print(f'RMSE: {rmse:.2f}, R2: {r2:.2f}') print(f'RMSE: {rmse:.3f}, R2: {r2:.3f}')
y_soft2 = cs.smooth(y_soft=y_soft, y_true=data.y[data.train_mask], y_soft2 = cs.smooth(y_soft=y_soft, y_true=data.y[data.train_mask],
mask=data.train_mask, edge_index=data.edge_index, mask=data.train_mask, edge_index=data.edge_index,
...@@ -72,7 +70,7 @@ y_val_soft2 = y_soft2[data.val_mask].numpy() ...@@ -72,7 +70,7 @@ y_val_soft2 = y_soft2[data.val_mask].numpy()
rmse = (mean_squared_error(y_val, y_val_soft2))**.5 rmse = (mean_squared_error(y_val, y_val_soft2))**.5
r2 = r2_score(y_val, y_val_soft2) r2 = r2_score(y_val, y_val_soft2)
print(f'After smooth:') print(f'After smooth:')
print(f'RMSE: {rmse:.2f}, R2: {r2:.2f}') print(f'RMSE: {rmse:.3f}, R2: {r2:.3f}')
exit() exit()
print('Incoming node degree vs. error in test set:') print('Incoming node degree vs. error in test set:')
... ...
......
...@@ -16,8 +16,8 @@ import matplotlib.pyplot as plt ...@@ -16,8 +16,8 @@ import matplotlib.pyplot as plt
# own # own
import settings import settings
# from utils import query_db
from preprocessing.aqbench import AQBenchGraph from preprocessing.aqbench import AQBenchGraph
from preprocessing.time_resolved import TimeResolvedOzone
def time_series_lenght(): def time_series_lenght():
...@@ -95,19 +95,34 @@ def missing_values(): ...@@ -95,19 +95,34 @@ def missing_values():
""" """
color the time series according to missing values color the time series according to missing values
""" """
# find all time series print('missing values...')
file_list = [f for f in os.listdir(settings.resources_dir)
if f.startswith('hourly_')] # read in data
tro = TimeResolvedOzone()
x_df = pd.read_csv(tro.x_path, index_col=0)
y_df = pd.read_csv(tro.y_path, index_col=0)
reg_df = pd.read_csv(tro.reg_path, index_col=0)
print(x_df.columns)
# reshape to 2d field
n_stations = len(np.unique(reg_df.station_id))
n_timesteps = len(np.unique(reg_df.datetime))
y_2d = y_df.values.reshape(n_stations, n_timesteps)
print(f'stations: {n_stations}')
print(f'timesteps: {n_timesteps}')
print(f'min: {np.nanmin(y_2d)}')
print(f'max: {np.nanmax(y_2d)}')
print(f'mean: {np.nanmean(y_2d)}')
print(f'missing: {np.count_nonzero(np.isnan(y_2d))/(n_stations*n_timesteps)*100}')
for f in file_list:
print(f'Plotting missing values for {f}...')
# info # info
var = f.lstrip('hourly_').strip('.csv') var = 'o3'
df = pd.read_csv(settings.resources_dir+f, index_col=0)
# plot the data # plot the data
plt.figure() z = int(n_timesteps/n_stations)
plt.imshow(df.values.T, aspect=100, interpolation='none') plt.figure(figsize=(z*3, 3))
plt.imshow(y_2d[0:n_stations,0:z*n_stations], interpolation='none')
plt.yticks([]) plt.yticks([])
plt.xticks([]) plt.xticks([])
ax = plt.gca() ax = plt.gca()
...@@ -116,9 +131,59 @@ def missing_values(): ...@@ -116,9 +131,59 @@ def missing_values():
ax.spines['top'].set_visible(False) ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False) ax.spines['bottom'].set_visible(False)
plt.savefig(settings.output_dir+f'missing_{var}.pdf') path = f'{settings.output_dir}missing_{var}.png'
plt.savefig(path, dpi=250, bbox_inches='tight', pad_inches=0)
print(f'saved to {path}')
plt.close() plt.close()
bins = np.arange(-5, 120, 5, dtype=int)
plt.hist(y_df.values, bins=bins, log=True)
plt.grid()
path = f'{settings.output_dir}hist_{var}.png'
plt.savefig(path)
print(f'saved to {path}')
def visualize_masks():
"""
Showing the data split.
"""
# read in data
tro = TimeResolvedOzone()
mask_df = pd.read_csv(tro.mask_path, index_col=0)
reg_df = pd.read_csv(tro.reg_path, index_col=0)
# prepare data
n_stations = len(np.unique(reg_df.station_id))
n_timesteps = len(np.unique(reg_df.datetime))
missing_o3_mask = mask_df.missing_o3_mask.values.reshape(n_stations, n_timesteps)
val_mask = mask_df.val_mask.values.reshape(n_stations, n_timesteps)
test_mask = mask_df.test_mask.values.reshape(n_stations, n_timesteps)
data = np.zeros((n_stations, n_timesteps))
data[missing_o3_mask] = 3.
data[val_mask] = 2.
data[test_mask] = 1.
# plot
z = int(n_timesteps/n_stations)
plt.figure(figsize=(z*3, 3))
plt.imshow(data[0:n_stations,0:z*n_stations], interpolation='none',
cmap='Accent')
plt.yticks([])
plt.xticks([])
ax = plt.gca()
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
# save
path = f'{settings.output_dir}masks.png'
plt.savefig(path, dpi=250, bbox_inches='tight', pad_inches=0)
print(f'saved to {path}')
plt.close()
def visualize_graph(): def visualize_graph():
""" """
...@@ -160,10 +225,12 @@ if __name__ == '__main__': ...@@ -160,10 +225,12 @@ if __name__ == '__main__':
""" """
time_series_lenght_ = False time_series_lenght_ = False
missing_values_ = True missing_values_ = True
visualize_masks_ = False
visualize_graph_ = False visualize_graph_ = False
if time_series_lenght_: time_series_lenght() if time_series_lenght_: time_series_lenght()
if missing_values_: missing_values() if missing_values_: missing_values()
if visualize_masks_: visualize_masks()
if visualize_graph_: visualize_graph() if visualize_graph_: visualize_graph()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment