From adc7fe1e32f936cf4f33fcd3e46878d8884ba8bc Mon Sep 17 00:00:00 2001 From: Clara Betancourt <c.betancourt@fz-juelich.de> Date: Mon, 19 Apr 2021 15:55:10 +0200 Subject: [PATCH] data capture evaluations --- prepare.sh | 2 +- source/dataset_retrieval.py | 40 ++++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/prepare.sh b/prepare.sh index 79d250c..79b9776 100755 --- a/prepare.sh +++ b/prepare.sh @@ -5,7 +5,7 @@ # venv is created in this directory. # check if we are really in the ozone-mapping directory -S="ozone-mapping" +S="aq-bench" if [[ $(pwd) == *$S ]] then echo "Prepare..." diff --git a/source/dataset_retrieval.py b/source/dataset_retrieval.py index b32a622..9b0501e 100644 --- a/source/dataset_retrieval.py +++ b/source/dataset_retrieval.py @@ -419,6 +419,43 @@ class MetricsRow(): + ' ' + print_metric + ': ' + str(self.row[print_metric])) +def data_capture(): + """ + A simple function to look at the data capture of our metrics. + """ + import pdb + # df = pd.read_csv(resources_dir+'yearly_metrics.csv') + # df['hourly_samples'] = [0] * len(df) + # df['capture'] = [0] * len(df) + df = pd.read_csv(resources_dir+'intermediate_at_cap5500.csv') + for idx, row in df.iterrows(): + if idx > 5500: + id_tuple = row['o3_series_id'] + id_string_list = [str(id_) for id_ in eval(id_tuple)] + query = f""" + SELECT + datetime, value + FROM o3_hourly + WHERE id IN ({','.join(id_string_list)}) + AND datetime between '2010-01-01 00:00:00' + AND '2014-12-31 23:59:59'; + """ + result = query_db(query) + result.drop_duplicates(subset='datetime', inplace=True, + ignore_index=True) + count = len(result) + df.loc[idx, 'hourly_samples'] = count + df.loc[idx, 'capture'] = count / 43824 + print(count, count/43824) + if (idx > 1) and (idx % 500 == 0): + df.to_csv(resources_dir+f'intermediate_at_cap{idx}.csv', + index=False) + + df.to_csv(resources_dir+f'yearly_metrics_cap.csv', + index=False) + + pdb.set_trace() + def full_aqbench(): """ start one retrieval. @@ -458,4 +495,5 @@ if __name__ == '__main__': logging.StreamHandler()]) # start retrieval - full_aqbench() + # full_aqbench() + data_capture() -- GitLab