diff --git a/prepare.sh b/prepare.sh
index 79d250cfb400a771d4ae9acc9ccef3b9b5bf59fd..79b9776c7aa1c98ae01953a9bcb3db1aa9367882 100755
--- a/prepare.sh
+++ b/prepare.sh
@@ -5,7 +5,7 @@
 # venv is created in this directory.
 
 # check if we are really in the ozone-mapping directory
-S="ozone-mapping"
+S="aq-bench"
 if [[ $(pwd) == *$S ]]
 then
     echo "Prepare..."
diff --git a/source/dataset_retrieval.py b/source/dataset_retrieval.py
index b1dc6e037f9f7d1020acf90a73f4a0157a50fd09..9b0501ed9430457e8b0b04f1967df6d6e98dec0f 100644
--- a/source/dataset_retrieval.py
+++ b/source/dataset_retrieval.py
@@ -193,7 +193,8 @@ class AQbench():
         save_data_to_file(self.data, self.data_dir+'AQbench.csv')
         logging.warning("""Do not forget to improve the population density
                            of id 4589 and throw out station id 4587
-                           because of very high ozone values reported there""")
+                           because of very high ozone values reported there.
+                           Also, drop station without metadata.""")
         logging.info('AQbench complete')
 
 
@@ -418,6 +419,43 @@ class MetricsRow():
               + ' ' + print_metric + ': ' + str(self.row[print_metric]))
 
 
+def data_capture():
+    """
+    A simple function to look at the data capture of our metrics.
+    """
+    import pdb
+    # df = pd.read_csv(resources_dir+'yearly_metrics.csv')
+    # df['hourly_samples'] = [0] * len(df)
+    # df['capture'] = [0] * len(df)
+    df = pd.read_csv(resources_dir+'intermediate_at_cap5500.csv')
+    for idx, row in df.iterrows():
+        if idx > 5500:
+            id_tuple = row['o3_series_id']
+            id_string_list = [str(id_) for id_ in eval(id_tuple)]
+            query = f"""
+                SELECT
+                datetime, value
+                FROM o3_hourly
+                WHERE id IN ({','.join(id_string_list)})
+                AND datetime between '2010-01-01 00:00:00'
+                                 AND '2014-12-31 23:59:59';
+                """
+            result = query_db(query)
+            result.drop_duplicates(subset='datetime', inplace=True,
+                                      ignore_index=True)
+            count = len(result)
+            df.loc[idx, 'hourly_samples'] = count
+            df.loc[idx, 'capture'] = count / 43824
+            print(count, count/43824)
+            if (idx > 1) and (idx % 500 == 0):
+                df.to_csv(resources_dir+f'intermediate_at_cap{idx}.csv',
+                          index=False)
+
+    df.to_csv(resources_dir+f'yearly_metrics_cap.csv',
+              index=False)
+
+    pdb.set_trace()
+
 def full_aqbench():
     """
     start one retrieval.
@@ -457,4 +495,5 @@ if __name__ == '__main__':
                   logging.StreamHandler()])
 
     # start retrieval
-    full_aqbench()
+    # full_aqbench()
+    data_capture()