diff --git a/source/models/simple.py b/source/models/simple.py
index 07024b68058f02846d0c57aa03854ba89a7a8ce5..1b3ff7ed28e59d28b82301237486e6b57d91f702 100644
--- a/source/models/simple.py
+++ b/source/models/simple.py
@@ -20,7 +20,8 @@ import torch
 
 # sklearn
 from sklearn.preprocessing import StandardScaler
-from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import LinearRegression
+from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
 from sklearn.metrics import mean_squared_error, r2_score
 from sklearn.neighbors import NearestNeighbors
 
@@ -40,7 +41,7 @@ class SpatiotemporalMean:
     """
     A model that just returns the mean of all labels
     """
-    def __init__(self, use_val=False):
+    def __init__(self, use_val=False, cv_seed=None):
         """
         Initializing the class
 
@@ -104,7 +105,7 @@ class SpatialMean:
     If there is no measurement at all, return the mean of
     the CAMS reanalyses of that time step.
     """
-    def __init__(self, use_val=False):
+    def __init__(self, use_val=False, cv_seed=None):
         """
         Initializing the class
 
@@ -185,11 +186,100 @@ class SpatialMean:
         self.y_hat = torch.load(self.prediction_path)
 
 
+class Linear:
+    """
+    Training and using random forests.
+    """
+    def __init__(self, use_val=False, cv_seed=None):
+        """
+        Initializing the class
+
+        use_val = True means the validation set is used for fitting
+        """
+        self.use_val = use_val
+        self.cv_seed = cv_seed
+        if use_val:
+            prediction_file = 'linear_predictions_useval.pt'
+        else:
+            prediction_file = 'linear_predictions.pt'
+        if cv_seed is not None:
+            directory = settings.resources_dir + 'cross_validation/' + \
+                             str(self.cv_seed) + '/' + 'models/'
+        else:
+            directory = settings.resources_dir + 'models/'
+        self.prediction_path = directory + prediction_file
+
+    def __str__(self):
+        """
+        The name of the simple model.
+        """
+        return 'Linear'
+
+    def read_predictions(self):
+        """
+        Predictions were saved for future use
+        """
+        if not os.path.exists(self.prediction_path):
+            self.save_predictions()
+
+        self.y_hat = torch.load(self.prediction_path)
+
+    def save_predictions(self, features=None, print_=True):
+        """
+        Train the linear model, then save model and predictions
+
+        features can be a list of features to use
+        print_ indicates whether to print info or not
+        """
+        if print_:
+            print('getting linear predictions...')
+        # read in data
+        ug = UBAGraph()
+        x_df = pd.read_csv(ug.x_path, index_col=0)
+        y_df = pd.read_csv(ug.y_path, index_col=0)
+        if self.cv_seed is not None:
+            p1 = settings.resources_dir
+            p2 = settings.resources_dir + f'cross_validation/{self.cv_seed}/'
+            ug.mask_path = ug.mask_path.replace(p1, p2)
+        mask_df = pd.read_csv(ug.mask_path, index_col=0)
+
+        # use standard features from feature selection if no features given
+        if not features:
+            features = ['cams_o3', 'nightlight', 'day_of_year', 'alt',
+                        'hour_of_day', 'pbl_height', 'temperature',
+                        'day_of_week', 'relative_alt', 'cams_no2',
+                        'type', 'relative_humidity', 'wind_v',
+                        'cams_nox_emissions', 'population_density',
+                        'type_of_area', 'cloud_cover', 'wind_u']
+
+        # prepare data
+        x_df = x_df[features]
+        x_df = get_one_hot(x_df)
+        x = x_df.values
+        if self.use_val:
+            mask = mask_df.train_mask | mask_df.val_mask
+        else:
+            mask = mask_df.train_mask
+        x_train = x_df[mask].values
+        y_train = y_df[mask].values.reshape(-1)
+
+        # fit model
+        rf = LinearRegression()
+        rf.fit(x_train, y_train)
+
+        # save predictions
+        y_hat = rf.predict(x)
+        y_hat_pytorch = torch.tensor(y_hat.astype(np.float32)).view(-1, 1)
+        torch.save(y_hat_pytorch, self.prediction_path)
+        if print_:
+            print(f'written to {self.prediction_path}')
+
+
 class Cams:
     """
     Use the modeled ozone values from CAMS
     """
-    def __init__(self, use_val=False):
+    def __init__(self, use_val=False, cv_seed=None):
         """
         Initializing the class
 
@@ -459,6 +549,99 @@ class RandomForest:
             print(f'written to {self.prediction_path}')
 
 
+class GradientBoostingTrees:
+    """
+    Training and using gradient boosting trees.
+    """
+    def __init__(self, use_val=False, cv_seed=None):
+        """
+        Initializing the class
+
+        use_val = True means the validation set is used for fitting
+        """
+        self.use_val = use_val
+        self.cv_seed = cv_seed
+        if use_val:
+            prediction_file = 'gradientboostingtrees_predictions_useval.pt'
+        else:
+            prediction_file = 'gradientboostingtrees_predictions.pt'
+        if cv_seed is not None:
+            directory = settings.resources_dir + 'cross_validation/' + \
+                             str(self.cv_seed) + '/' + 'models/'
+        else:
+            directory = settings.resources_dir + 'models/'
+        self.prediction_path = directory + prediction_file
+
+    def __str__(self):
+        """
+        The name of the simple model.
+        """
+        return 'GradientBoostingTrees'
+
+    def read_predictions(self):
+        """
+        Predictions were saved for future use
+        """
+        if not os.path.exists(self.prediction_path):
+            self.save_predictions()
+
+        self.y_hat = torch.load(self.prediction_path)
+
+    def save_predictions(self, print_=True, n_estimators=300,
+                         learning_rate=0.1, max_depth=3):
+        """
+        Train the random gradient boosting, then save predictions
+
+        print_ indicates whether to print info or not
+        """
+        if print_:
+            print('getting gradient boosted predictions...')
+        # read in data
+        ug = UBAGraph()
+        x_df = pd.read_csv(ug.x_path, index_col=0)
+        y_df = pd.read_csv(ug.y_path, index_col=0)
+        if self.cv_seed is not None:
+            p1 = settings.resources_dir
+            p2 = settings.resources_dir + f'cross_validation/{self.cv_seed}/'
+            ug.mask_path = ug.mask_path.replace(p1, p2)
+        mask_df = pd.read_csv(ug.mask_path, index_col=0)
+
+        # use standard features from feature selection
+        features = ['cams_o3', 'nightlight', 'day_of_year', 'alt',
+                    'hour_of_day', 'pbl_height', 'temperature',
+                    'day_of_week', 'relative_alt', 'cams_no2',
+                    'type', 'relative_humidity', 'wind_v',
+                    'cams_nox_emissions', 'population_density',
+                    'type_of_area', 'cloud_cover', 'wind_u']
+
+        # prepare data
+        x_df = x_df[features]
+        x_df = get_one_hot(x_df)
+        x = x_df.values
+        if self.use_val:
+            mask = mask_df.train_mask | mask_df.val_mask
+        else:
+            mask = mask_df.train_mask
+        x_train = x_df[mask].values
+        y_train = y_df[mask].values.reshape(-1)
+
+        # fit model
+        rf = GradientBoostingRegressor(
+                                       n_estimators=n_estimators,
+                                       learning_rate=learning_rate,
+                                       max_depth=max_depth,
+                                       random_state=settings.random_seed,
+                                       )
+        rf.fit(x_train, y_train)
+
+        # save predictions
+        y_hat = rf.predict(x)
+        y_hat_pytorch = torch.tensor(y_hat.astype(np.float32)).view(-1, 1)
+        torch.save(y_hat_pytorch, self.prediction_path)
+        if print_:
+            print(f'written to {self.prediction_path}')
+
+
 def tune_nnh():
     """
     tuning the samples parameter of NearestNeighborHybrid
@@ -484,6 +667,37 @@ def tune_nnh():
         print('\n\n\n')
 
 
+def tune_gradient_boosted_trees():
+    """
+    tuning the samples parameter of NearestNeighborHybrid
+    """
+    print('tune gradient boosted tree model...')
+
+    # UBA Graph data
+    ug = UBAGraph()
+    ug.get_dataset()
+    mask_df = pd.read_csv(ug.mask_path, index_col=0)
+    y = ug[0].y.numpy().reshape(-1)[mask_df['val_mask']]
+
+    # for n_estimators in [100, 200, 300, 400, 500]:
+    #     print('\nn_estimators=', n_estimators)
+    # for max_depth=3 in [4, 6, 8, 10]:
+    #     print('\nmax_depth=', max_depth)
+    for learning_rate in [0.2, 0.25, 0.05, 0.1, 0.15]:
+        print('\n learning_rate=', learning_rate)
+        gbt = GradientBoostingTrees()
+        # gbt.save_predictions(n_estimators=n_estimators)
+        # gbt.save_predictions(max_depth=max_depth)
+        gbt.save_predictions(learning_rate=learning_rate)
+        gbt.read_predictions()
+        y_hat = gbt.y_hat.numpy().reshape(-1)[mask_df['val_mask']]
+        rmse = (mean_squared_error(y, y_hat))**.5
+        r2 = r2_score(y, y_hat)
+
+        print(f'  val RMSE: {rmse:.3f}, val R2: {r2:.3f}')
+        print('\n')
+
+
 def select_rf_features():
     """
     Choose features for the simple model.
@@ -587,8 +801,9 @@ def evaluate_models(cv_seed=None):
         models = [NearestNeighborHybrid, RandomForest]
         use_vals = [True]
     else:
-        models = [SpatiotemporalMean, SpatialMean, Cams,
-                  NearestNeighborHybrid, RandomForest]
+        models = [SpatiotemporalMean, SpatialMean, Linear,
+                  Cams, NearestNeighborHybrid, RandomForest,
+                  GradientBoostingTrees]
         use_vals = [False, True]
 
     specs = [(model, use_val) for model in models for use_val in use_vals]
@@ -633,34 +848,45 @@ def tune_and_save_models():
     Also save the predictions where the validation set is used
     for training.
     """
-    model = SpatiotemporalMean()
-    model.save_predictions()
-    model = SpatiotemporalMean(use_val=True)
-    model.save_predictions()
-
-    model = SpatialMean()
-    model.save_predictions()
-    model = SpatialMean(use_val=True)
-    model.save_predictions()
-
-    model = Cams()
-    model.save_predictions()
-    model = Cams(use_val=True)
-    model.save_predictions()
-
-    tune_nnh()
-    model = NearestNeighborHybrid()
-    model.save_predictions()
-    model = NearestNeighborHybrid(use_val=True)
-    model.save_predictions()
-
-    select_rf_features()
-    model = RandomForest()
-    model.save_predictions()
-    model = RandomForest(use_val=True)
-    model.save_predictions()
-
-    evaluate_models()
+    # model = SpatiotemporalMean()
+    # model.save_predictions()
+    # model = SpatiotemporalMean(use_val=True)
+    # model.save_predictions()
+
+    # model = SpatialMean()
+    # model.save_predictions()
+    # model = SpatialMean(use_val=True)
+    # model.save_predictions()
+
+    # model = Linear()
+    # model.save_predictions()
+    # model = Linear(use_val=True)
+    # model.save_predictions()
+
+    # model = Cams()
+    # model.save_predictions()
+    # model = Cams(use_val=True)
+    # model.save_predictions()
+
+    # tune_nnh()
+    # model = NearestNeighborHybrid()
+    # model.save_predictions()
+    # model = NearestNeighborHybrid(use_val=True)
+    # model.save_predictions()
+
+    # select_rf_features()
+    # model = RandomForest()
+    # model.save_predictions()
+    # model = RandomForest(use_val=True)
+    # model.save_predictions()
+
+    tune_gradient_boosted_trees()
+    # model = GradientBoostingTrees()
+    # model.save_predictions()
+    # model = GradientBoostingTrees(use_val=True)
+    # model.save_predictions()
+
+    # evaluate_models()
 
 
 def save_models_for_cross_validation():
@@ -683,5 +909,5 @@ def save_models_for_cross_validation():
 
 
 if __name__ == '__main__':
-    # tune_and_save_models()
-    save_models_for_cross_validation()
+    tune_and_save_models()
+    # save_models_for_cross_validation()
diff --git a/source/postprocessing/evaluation_tools.py b/source/postprocessing/evaluation_tools.py
index a1526d58ad7b140cc97fb227b69e2d0a059eca84..7482970a564de26c993856a18375d00ba2e731fb 100644
--- a/source/postprocessing/evaluation_tools.py
+++ b/source/postprocessing/evaluation_tools.py
@@ -364,9 +364,11 @@ def get_characteristics_df(how='read', cv_seed=None):
         lin_file = 'nearestneighborhybrid_predictions_useval.pt'
         stm_cs_file = 'SpatiotemporalMean_cs_predictions_useval.pyt'
         sm_cs_file = 'SpatialMean_cs_predictions_useval.pyt'
+        cams_cs_file = 'Cams_cs_predictions_useval.pyt'
         nnh_cs_file = 'NearestNeighborHybrid_cs_predictions_useval.pyt'
         rf_cs_file = 'RandomForest_cs_predictions_useval.pyt'
         lin_imp = torch.load(model_path+lin_file).numpy().reshape(-1)
+        cams_cs_imp = torch.load(model_path+cams_cs_file).numpy().reshape(-1)
         nnh_cs_imp = torch.load(model_path+nnh_cs_file).numpy().reshape(-1)
         rf_cs_imp = torch.load(model_path+rf_cs_file).numpy().reshape(-1)
         if cv_seed is None:
@@ -374,8 +376,8 @@ def get_characteristics_df(how='read', cv_seed=None):
             sm_cs_imp = torch.load(model_path+sm_cs_file).numpy().reshape(-1)
 
         # set up df
-        columns = ['y_true', 'y_imputed',
-                   'y_stm_cs', 'y_sm_cs', 'y_nnh_cs', 'y_rf_cs',
+        columns = ['y_true', 'y_imputed', 'y_stm_cs',
+                    'y_sm_cs', 'y_cams_cs', 'y_nnh_cs', 'y_rf_cs',
                    'gap_len', 'correlated',
                    'station_id', 'n_neighbors', 'test_mask']
         df = pd.DataFrame(columns=columns, index=y_true_df.index)
@@ -383,6 +385,7 @@ def get_characteristics_df(how='read', cv_seed=None):
         # columns that we can take from other data frames
         df.y_true = y_true_df.y
         df.y_nnh_cs = nnh_cs_imp
+        df.y_cams_cs = cams_cs_imp
         df.y_rf_cs = rf_cs_imp
         df.station_id = reg_df.station_id
         df.test_mask = mask_df.test_mask
@@ -403,8 +406,8 @@ def get_characteristics_df(how='read', cv_seed=None):
         for gap_idx, gap_row in gap_df_filtered.iterrows():
             if gap_idx % 5000 == 0:
                 print(f'{gap_idx/len(gap_df_filtered)*100:.2f} %')
-                df.to_csv(save_path)
-                print(f'intermediate save to {save_path}')
+                # df.to_csv(save_path)
+                # print(f'intermediate save to {save_path}')
             start_idx = gap_row.start_idx
             end_idx = gap_row.start_idx + gap_row.len - 1
             len_ = gap_row.len
@@ -500,8 +503,8 @@ if __name__ == '__main__':
     test_bootstrap_ = False
     count_exceedances_ = False
     number_of_neighbors_ = False
-    get_characteristics_df_ = False
-    evaluate_cross_validation_ = True
+    get_characteristics_df_ = True
+    evaluate_cross_validation_ = False
 
     if index_of_agreement_:
         # test index of agreement should be.959
@@ -546,8 +549,8 @@ if __name__ == '__main__':
     if get_characteristics_df_:
         # df = get_characteristics_df(how='read')
         df = get_characteristics_df(how='prepare')
-        for cv_seed in settings.cv_seeds:
-            get_characteristics_df(how='prepare', cv_seed=cv_seed)
+        # for cv_seed in settings.cv_seeds:
+        #     get_characteristics_df(how='prepare', cv_seed=cv_seed)
 
     if evaluate_cross_validation_:
         evaluate_cross_validation()
diff --git a/source/visualizations/plots_for_paper.py b/source/visualizations/plots_for_paper.py
index 1ef6c310218e0d33fd2d1afd47114aa4a2c8a7e5..172ea57748f28378a082b9c99170bcf11342b0e6 100644
--- a/source/visualizations/plots_for_paper.py
+++ b/source/visualizations/plots_for_paper.py
@@ -690,8 +690,8 @@ def n_neighbors_vs_r2():
     df = get_characteristics_df()
     n_list = np.unique(df.n_neighbors).tolist()
     r2_n_dict = {}
-    models = ['y_stm_cs', 'y_sm_cs', 'y_nnh_cs', 'y_rf_cs']
-    colors = ['rosybrown', 'lightcoral', 'firebrick', 'blue']
+    models = ['y_stm_cs', 'y_sm_cs', 'y_cams_cs', 'y_nnh_cs', 'y_rf_cs']
+    colors = ['rosybrown', 'lightcoral', 'red', 'firebrick', 'blue']
     for model in models:
         r2_n_list = []
         for n in n_list:
@@ -722,7 +722,7 @@ def n_neighbors_vs_r2():
         ax.set_ylim(-.1, 1.1)
     ax.set_xlabel('# neighbors')
     ax.set_ylabel('R2 score')
-    ax.legend()
+    # ax.legend()
 
     # save
     n_neigh_vs_r2_path = settings.output_dir + 'n_neigh_vs_r2.png'
@@ -741,8 +741,8 @@ def gap_len_vs_r2():
     df = get_characteristics_df()
     len_list = list(range(1, 51))
     r2_len_dict = {}
-    models = ['y_stm_cs', 'y_sm_cs', 'y_nnh_cs', 'y_rf_cs']
-    colors = ['rosybrown', 'lightcoral', 'firebrick', 'blue']
+    models = ['y_stm_cs', 'y_sm_cs', 'y_cams_cs', 'y_nnh_cs', 'y_rf_cs']
+    colors = ['rosybrown', 'lightcoral', 'red', 'firebrick', 'blue']
     for model in models:
         r2_len_list = []
         for len_ in len_list:
@@ -776,7 +776,7 @@ def gap_len_vs_r2():
             linestyle='--')
     plt.xlim(-.9, 30)
     plt.ylim(-.3, 1.1)
-    plt.legend()
+    # plt.legend()
     plt.xlabel('gap length')
     plt.ylabel('R2 score')
 
@@ -899,12 +899,12 @@ if __name__ == '__main__':
     station_ids_on_map_ = False
     o3_value_matrix_ = False
     mask_matrix_ = False
-    time_series_ = True
+    time_series_ = False
     basic_statistics_and_histogram_ = False
     exceedances_ = False
     true_vs_imputed_ = False
-    n_neighbors_vs_r2_ = False
-    gap_len_vs_r2_ = False
+    n_neighbors_vs_r2_ = True
+    gap_len_vs_r2_ = True
     imputed_dataset_matrix_ = False
     modeled_time_series_ = False