diff --git a/prepare.sh b/prepare.sh index 79d250cfb400a771d4ae9acc9ccef3b9b5bf59fd..79b9776c7aa1c98ae01953a9bcb3db1aa9367882 100755 --- a/prepare.sh +++ b/prepare.sh @@ -5,7 +5,7 @@ # venv is created in this directory. # check if we are really in the ozone-mapping directory -S="ozone-mapping" +S="aq-bench" if [[ $(pwd) == *$S ]] then echo "Prepare..." diff --git a/source/dataset_retrieval.py b/source/dataset_retrieval.py index b32a62238b93ec1cd06ee5d7e306b38b6ef8b1de..9b0501ed9430457e8b0b04f1967df6d6e98dec0f 100644 --- a/source/dataset_retrieval.py +++ b/source/dataset_retrieval.py @@ -419,6 +419,43 @@ class MetricsRow(): + ' ' + print_metric + ': ' + str(self.row[print_metric])) +def data_capture(): + """ + A simple function to look at the data capture of our metrics. + """ + import pdb + # df = pd.read_csv(resources_dir+'yearly_metrics.csv') + # df['hourly_samples'] = [0] * len(df) + # df['capture'] = [0] * len(df) + df = pd.read_csv(resources_dir+'intermediate_at_cap5500.csv') + for idx, row in df.iterrows(): + if idx > 5500: + id_tuple = row['o3_series_id'] + id_string_list = [str(id_) for id_ in eval(id_tuple)] + query = f""" + SELECT + datetime, value + FROM o3_hourly + WHERE id IN ({','.join(id_string_list)}) + AND datetime between '2010-01-01 00:00:00' + AND '2014-12-31 23:59:59'; + """ + result = query_db(query) + result.drop_duplicates(subset='datetime', inplace=True, + ignore_index=True) + count = len(result) + df.loc[idx, 'hourly_samples'] = count + df.loc[idx, 'capture'] = count / 43824 + print(count, count/43824) + if (idx > 1) and (idx % 500 == 0): + df.to_csv(resources_dir+f'intermediate_at_cap{idx}.csv', + index=False) + + df.to_csv(resources_dir+f'yearly_metrics_cap.csv', + index=False) + + pdb.set_trace() + def full_aqbench(): """ start one retrieval. @@ -458,4 +495,5 @@ if __name__ == '__main__': logging.StreamHandler()]) # start retrieval - full_aqbench() + # full_aqbench() + data_capture()