diff --git a/German_background_stations.json b/German_background_stations.json index 2997eefbaa9a72f4e94b940b6d0ebb7f6a34370d..9e3b89cd06df62442d582758062815ac2ab8bc7c 100755 --- a/German_background_stations.json +++ b/German_background_stations.json @@ -1 +1,334 @@ -["DENW094", "DEBW029", "DENI052", "DENI063", "DEBY109", "DEUB022", "DESN001", "DEUB013", "DETH016", "DEBY002", "DEBY005", "DEBY099", "DEUB038", "DEBE051", "DEBE056", "DEBE062", "DEBE032", "DEBE034", "DEBE010", "DEHE046", "DEST031", "DEBY122", "DERP022", "DEBY079", "DEBW102", "DEBW076", "DEBW045", "DESH016", "DESN004", "DEHE032", "DEBB050", "DEBW042", "DEBW046", "DENW067", "DESL019", "DEST014", "DENW062", "DEHE033", "DENW081", "DESH008", "DEBB055", "DENI011", "DEHB001", "DEHB004", "DEHB002", "DEHB003", "DEHB005", "DEST039", "DEUB003", "DEBW072", "DEST002", "DEBB001", "DEHE039", "DEBW035", "DESN005", "DEBW047", "DENW004", "DESN011", "DESN076", "DEBB064", "DEBB006", "DEHE001", "DESN012", "DEST030", "DESL003", "DEST104", "DENW050", "DENW008", "DETH026", "DESN085", "DESN014", "DESN092", "DENW071", "DEBW004", "DENI028", "DETH013", "DENI059", "DEBB007", "DEBW049", "DENI043", "DETH020", "DEBY017", "DEBY113", "DENW247", "DENW028", "DEBW025", "DEUB039", "DEBB009", "DEHE027", "DEBB042", "DEHE008", "DESN017", "DEBW084", "DEBW037", "DEHE058", "DEHE028", "DEBW112", "DEBY081", "DEBY082", "DEST032", "DETH009", "DEHE010", "DESN019", "DEHE023", "DETH036", "DETH040", "DEMV017", "DEBW028", "DENI042", "DEMV004", "DEMV019", "DEST044", "DEST050", "DEST072", "DEST022", "DEHH049", "DEHH047", "DEHH033", "DEHH050", "DEHH008", "DEHH021", "DENI054", "DEST070", "DEBB053", "DENW029", "DEBW050", "DEUB034", "DENW018", "DEST052", "DEBY020", "DENW063", "DESN050", "DETH061", "DERP014", "DETH024", "DEBW094", "DENI031", "DETH041", "DERP019", "DEBW081", "DEHE013", "DEBW021", "DEHE060", "DEBY031", "DESH021", "DESH033", "DEHE052", "DEBY004", "DESN024", "DEBW052", "DENW042", "DEBY032", "DENW053", "DENW059", "DEBB082", "DEBB031", "DEHE025", "DEBW053", "DEHE048", "DENW051", "DEBY034", "DEUB035", "DEUB032", "DESN028", "DESN059", "DEMV024", "DENW079", "DEHE044", "DEHE042", "DEBB043", "DEBB036", "DEBW024", "DERP001", "DEMV012", "DESH005", "DESH023", "DEUB031", "DENI062", "DENW006", "DEBB065", "DEST077", "DEST005", "DERP007", "DEBW006", "DEBW007", "DEHE030", "DENW015", "DEBY013", "DETH025", "DEUB033", "DEST025", "DEHE045", "DESN057", "DENW036", "DEBW044", "DEUB036", "DENW096", "DETH095", "DENW038", "DEBY089", "DEBY039", "DENW095", "DEBY047", "DEBB067", "DEBB040", "DEST078", "DENW065", "DENW066", "DEBY052", "DEUB030", "DETH027", "DEBB048", "DENW047", "DEBY049", "DERP021", "DEHE034", "DESN079", "DESL008", "DETH018", "DEBW103", "DEHE017", "DEBW111", "DENI016", "DENI038", "DENI058", "DENI029", "DEBY118", "DEBW032", "DEBW110", "DERP017", "DESN036", "DEBW026", "DETH042", "DEBB075", "DEBB052", "DEBB021", "DEBB038", "DESN051", "DEUB041", "DEBW020", "DEBW113", "DENW078", "DEHE018", "DEBW065", "DEBY062", "DEBW027", "DEBW041", "DEHE043", "DEMV007", "DEMV021", "DEBW054", "DETH005", "DESL012", "DESL011", "DEST069", "DEST071", "DEUB004", "DESH006", "DEUB029", "DEUB040", "DESN074", "DEBW031", "DENW013", "DENW179", "DEBW056", "DEBW087", "DEST061", "DEMV001", "DEBB024", "DEBW057", "DENW064", "DENW068", "DENW080", "DENI019", "DENI077", "DEHE026", "DEBB066", "DEBB083", "DEST063", "DEBW013", "DETH086", "DESL018", "DETH096", "DEBW059", "DEBY072", "DEBY088", "DEBW060", "DEBW107", "DEBW036", "DEUB026", "DEBW019", "DENW010", "DEST098", "DEHE019", "DEBW039", "DESL017", "DEBW034", "DEUB005", "DEBB051", "DEHE051", "DEBW023", "DEBY092", "DEBW008", "DEBW030", "DENI060", "DEST011", "DENW030", "DENI041", "DERP015", "DEUB001", "DERP016", "DERP028", "DERP013", "DEHE022", "DEUB021", "DEBW010", "DEST066", "DEBB063", "DEBB028", "DEHE024", "DENI020", "DENI051", "DERP025", "DEBY077", "DEMV018", "DEST089", "DEST028", "DETH060", "DEHE050", "DEUB028", "DESN045", "DEUB042"] +[ + "DENW094", + "DEBW029", + "DENI052", + "DENI063", + "DEBY109", + "DEUB022", + "DESN001", + "DEUB013", + "DETH016", + "DEBY002", + "DEBY005", + "DEBY099", + "DEUB038", + "DEBE051", + "DEBE056", + "DEBE062", + "DEBE032", + "DEBE034", + "DEBE010", + "DEHE046", + "DEST031", + "DEBY122", + "DERP022", + "DEBY079", + "DEBW102", + "DEBW076", + "DEBW045", + "DESH016", + "DESN004", + "DEHE032", + "DEBB050", + "DEBW042", + "DEBW046", + "DENW067", + "DESL019", + "DEST014", + "DENW062", + "DEHE033", + "DENW081", + "DESH008", + "DEBB055", + "DENI011", + "DEHB001", + "DEHB004", + "DEHB002", + "DEHB003", + "DEHB005", + "DEST039", + "DEUB003", + "DEBW072", + "DEST002", + "DEBB001", + "DEHE039", + "DEBW035", + "DESN005", + "DEBW047", + "DENW004", + "DESN011", + "DESN076", + "DEBB064", + "DEBB006", + "DEHE001", + "DESN012", + "DEST030", + "DESL003", + "DEST104", + "DENW050", + "DENW008", + "DETH026", + "DESN085", + "DESN014", + "DESN092", + "DENW071", + "DEBW004", + "DENI028", + "DETH013", + "DENI059", + "DEBB007", + "DEBW049", + "DENI043", + "DETH020", + "DEBY017", + "DEBY113", + "DENW247", + "DENW028", + "DEBW025", + "DEUB039", + "DEBB009", + "DEHE027", + "DEBB042", + "DEHE008", + "DESN017", + "DEBW084", + "DEBW037", + "DEHE058", + "DEHE028", + "DEBW112", + "DEBY081", + "DEBY082", + "DEST032", + "DETH009", + "DEHE010", + "DESN019", + "DEHE023", + "DETH036", + "DETH040", + "DEMV017", + "DEBW028", + "DENI042", + "DEMV004", + "DEMV019", + "DEST044", + "DEST050", + "DEST072", + "DEST022", + "DEHH049", + "DEHH047", + "DEHH033", + "DEHH050", + "DEHH008", + "DEHH021", + "DENI054", + "DEST070", + "DEBB053", + "DENW029", + "DEBW050", + "DEUB034", + "DENW018", + "DEST052", + "DEBY020", + "DENW063", + "DESN050", + "DETH061", + "DERP014", + "DETH024", + "DEBW094", + "DENI031", + "DETH041", + "DERP019", + "DEBW081", + "DEHE013", + "DEBW021", + "DEHE060", + "DEBY031", + "DESH021", + "DESH033", + "DEHE052", + "DEBY004", + "DESN024", + "DEBW052", + "DENW042", + "DEBY032", + "DENW053", + "DENW059", + "DEBB082", + "DEBB031", + "DEHE025", + "DEBW053", + "DEHE048", + "DENW051", + "DEBY034", + "DEUB035", + "DEUB032", + "DESN028", + "DESN059", + "DEMV024", + "DENW079", + "DEHE044", + "DEHE042", + "DEBB043", + "DEBB036", + "DEBW024", + "DERP001", + "DEMV012", + "DESH005", + "DESH023", + "DEUB031", + "DENI062", + "DENW006", + "DEBB065", + "DEST077", + "DEST005", + "DERP007", + "DEBW006", + "DEBW007", + "DEHE030", + "DENW015", + "DEBY013", + "DETH025", + "DEUB033", + "DEST025", + "DEHE045", + "DESN057", + "DENW036", + "DEBW044", + "DEUB036", + "DENW096", + "DETH095", + "DENW038", + "DEBY089", + "DEBY039", + "DENW095", + "DEBY047", + "DEBB067", + "DEBB040", + "DEST078", + "DENW065", + "DENW066", + "DEBY052", + "DEUB030", + "DETH027", + "DEBB048", + "DENW047", + "DEBY049", + "DERP021", + "DEHE034", + "DESN079", + "DESL008", + "DETH018", + "DEBW103", + "DEHE017", + "DEBW111", + "DENI016", + "DENI038", + "DENI058", + "DENI029", + "DEBY118", + "DEBW032", + "DEBW110", + "DERP017", + "DESN036", + "DEBW026", + "DETH042", + "DEBB075", + "DEBB052", + "DEBB021", + "DEBB038", + "DESN051", + "DEUB041", + "DEBW020", + "DEBW113", + "DENW078", + "DEHE018", + "DEBW065", + "DEBY062", + "DEBW027", + "DEBW041", + "DEHE043", + "DEMV007", + "DEMV021", + "DEBW054", + "DETH005", + "DESL012", + "DESL011", + "DEST069", + "DEST071", + "DEUB004", + "DESH006", + "DEUB029", + "DEUB040", + "DESN074", + "DEBW031", + "DENW013", + "DENW179", + "DEBW056", + "DEBW087", + "DEST061", + "DEMV001", + "DEBB024", + "DEBW057", + "DENW064", + "DENW068", + "DENW080", + "DENI019", + "DENI077", + "DEHE026", + "DEBB066", + "DEBB083", + "DEST063", + "DEBW013", + "DETH086", + "DESL018", + "DETH096", + "DEBW059", + "DEBY072", + "DEBY088", + "DEBW060", + "DEBW107", + "DEBW036", + "DEUB026", + "DEBW019", + "DENW010", + "DEST098", + "DEHE019", + "DEBW039", + "DESL017", + "DEBW034", + "DEUB005", + "DEBB051", + "DEHE051", + "DEBW023", + "DEBY092", + "DEBW008", + "DEBW030", + "DENI060", + "DEST011", + "DENW030", + "DENI041", + "DERP015", + "DEUB001", + "DERP016", + "DERP028", + "DERP013", + "DEHE022", + "DEUB021", + "DEBW010", + "DEST066", + "DEBB063", + "DEBB028", + "DEHE024", + "DENI020", + "DENI051", + "DERP025", + "DEBY077", + "DEMV018", + "DEST089", + "DEST028", + "DETH060", + "DEHE050", + "DEUB028", + "DESN045", + "DEUB042" +] diff --git a/src/data_handling/data_generator.py b/src/data_handling/data_generator.py index de2bb39fc0816e92cdd30e13e952e9e2b609b920..34d0955833b1534f3f86e54ec1c98ed3f8ad4cc1 100644 --- a/src/data_handling/data_generator.py +++ b/src/data_handling/data_generator.py @@ -14,7 +14,7 @@ import xarray as xr from src import helpers from src.data_handling.data_preparation import DataPrep -from src.join import EmptyQueryResult +from src.helpers.join import EmptyQueryResult number = Union[float, int] num_or_list = Union[number, List[number]] diff --git a/src/data_handling/data_preparation.py b/src/data_handling/data_preparation.py index 2c83316bafbf45144b4f432244b47483c32c5aac..bb5254572e400b89a219ec674f408f09350f849c 100644 --- a/src/data_handling/data_preparation.py +++ b/src/data_handling/data_preparation.py @@ -14,8 +14,8 @@ import pandas as pd import xarray as xr from src.configuration import check_path_and_create -from src import join, helpers -from src import statistics +from src import helpers +from src.helpers import join, statistics # define a more general date type for type hinting date = Union[dt.date, dt.datetime] diff --git a/src/datastore.py b/src/helpers/datastore.py similarity index 100% rename from src/datastore.py rename to src/helpers/datastore.py diff --git a/src/join.py b/src/helpers/join.py similarity index 87% rename from src/join.py rename to src/helpers/join.py index 90b3bfc482a817b13e6bea5d04203873e46d03a5..1b2abb6c8fe9d0db2dd45636f230cc9a2e232f7c 100644 --- a/src/join.py +++ b/src/helpers/join.py @@ -1,3 +1,4 @@ +"""Functions to access join database.""" __author__ = 'Felix Kleinert, Lukas Leufen' __date__ = '2019-10-16' @@ -16,25 +17,23 @@ str_or_none = Union[str, None] class EmptyQueryResult(Exception): - """ - Exception that get raised if a query to JOIN returns empty results. - """ + """Exception that get raised if a query to JOIN returns empty results.""" + pass def download_join(station_name: Union[str, List[str]], stat_var: dict, station_type: str = None, network_name: str = None, sampling: str = "daily") -> [pd.DataFrame, pd.DataFrame]: """ - read data from JOIN/TOAR + Read data from JOIN/TOAR. :param station_name: Station name e.g. DEBY122 :param stat_var: key as variable like 'O3', values as statistics on keys like 'mean' :param station_type: set the station type like "traffic" or "background", can be none :param network_name: set the measurement network like "UBA" or "AIRBASE", can be none :param sampling: sampling rate of the downloaded data, either set to daily or hourly (default daily) - :returns: - - df - data frame with all variables and statistics - - meta - data frame with all meta information + + :returns: data frame with all variables and statistics and meta data frame with all meta information """ # make sure station_name parameter is a list station_name = helpers.to_list(station_name) @@ -88,11 +87,13 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t def correct_data_format(data): """ - Transform to the standard data format. For some cases (e.g. hourly data), the data is returned as list instead of - a dictionary with keys datetime, values and metadata. This functions addresses this issue and transforms the data - into the dictionary version. + Transform to the standard data format. + + For some cases (e.g. hourly data), the data is returned as list instead of a dictionary with keys datetime, values + and metadata. This functions addresses this issue and transforms the data into the dictionary version. :param data: data in hourly format + :return: the same data but formatted to fit with aggregated format """ formatted = {"datetime": [], @@ -106,11 +107,13 @@ def correct_data_format(data): def get_data(opts: Dict, headers: Dict) -> Union[Dict, List]: """ - Download join data using requests framework. Data is returned as json like structure. Depending on the response - structure, this can lead to a list or dictionary. + Download join data using requests framework. + + Data is returned as json like structure. Depending on the response structure, this can lead to a list or dictionary. :param opts: options to create the request url :param headers: additional headers information like authorization, can be empty + :return: requested data (either as list or dictionary) """ url = create_url(**opts) @@ -140,12 +143,15 @@ def load_series_information(station_name: List[str], station_type: str_or_none, def _save_to_pandas(df: Union[pd.DataFrame, None], data: dict, stat: str, var: str) -> pd.DataFrame: """ - Save given data in data frame. If given data frame is not empty, the data is appened as new column. + Save given data in data frame. + + If given data frame is not empty, the data is appened as new column. :param df: data frame to append the new data, can be none :param data: new data to append or format as data frame containing the keys 'datetime' and '<stat>' :param stat: extracted statistic to get values from data (e.g. 'mean', 'dma8eu') :param var: variable the data is from (e.g. 'o3') + :return: new created or concatenated data frame """ if len(data["datetime"][0]) == 19: @@ -162,10 +168,12 @@ def _save_to_pandas(df: Union[pd.DataFrame, None], data: dict, stat: str, var: s def _correct_stat_name(stat: str) -> str: """ - Map given statistic name to new namespace defined by mapping dict. Return given name stat if not element of mapping - namespace. + Map given statistic name to new namespace defined by mapping dict. + + Return given name stat if not element of mapping namespace. :param stat: namespace from JOIN server + :return: stat mapped to local namespace """ mapping = {'average_values': 'mean', 'maximum': 'max', 'minimum': 'min'} @@ -174,9 +182,10 @@ def _correct_stat_name(stat: str) -> str: def _lower_list(args: List[str]) -> Iterator[str]: """ - lower all elements of given list + Lower all elements of given list. :param args: list with string entries to lower + :return: iterator that lowers all list entries """ for string in args: @@ -185,11 +194,12 @@ def _lower_list(args: List[str]) -> Iterator[str]: def create_url(base: str, service: str, **kwargs: Union[str, int, float, None]) -> str: """ - create a request url with given base url, service type and arbitrarily many additional keyword arguments + Create a request url with given base url, service type and arbitrarily many additional keyword arguments. :param base: basic url of the rest service :param service: service type, e.g. series, stats :param kwargs: keyword pairs for optional request specifications, e.g. 'statistics=maximum' + :return: combined url as string """ if not base.endswith("/"): diff --git a/src/statistics.py b/src/helpers/statistics.py similarity index 95% rename from src/statistics.py rename to src/helpers/statistics.py index 6510097fc3c31645bc0fa053a5ade05c3e4d908d..dd102cf3ad376cf145e3c447ff9ef902815776bb 100644 --- a/src/statistics.py +++ b/src/helpers/statistics.py @@ -1,7 +1,5 @@ from scipy import stats -from src.run_modules.run_environment import RunEnvironment - __author__ = 'Lukas Leufen, Felix Kleinert' __date__ = '2019-10-23' @@ -10,7 +8,6 @@ import xarray as xr import pandas as pd from typing import Union, Tuple - Data = Union[xr.DataArray, pd.DataFrame] @@ -29,6 +26,7 @@ def apply_inverse_transformation(data, mean, std=None, method="standardise"): def standardise(data: Data, dim: Union[str, int]) -> Tuple[Data, Data, Data]: """ This function standardises a xarray.dataarray (along dim) or pandas.DataFrame (along axis) with mean=0 and std=1 + :param data: :param string/int dim: | for xarray.DataArray as string: name of dimension which should be standardised @@ -44,6 +42,7 @@ def standardise(data: Data, dim: Union[str, int]) -> Tuple[Data, Data, Data]: def standardise_inverse(data: Data, mean: Data, std: Data) -> Data: """ This is the inverse function of `standardise` and therefore vanishes the standardising. + :param data: :param mean: :param std: @@ -55,6 +54,7 @@ def standardise_inverse(data: Data, mean: Data, std: Data) -> Data: def standardise_apply(data: Data, mean: Data, std: Data) -> Data: """ This applies `standardise` on data using given mean and std. + :param data: :param mean: :param std: @@ -66,6 +66,7 @@ def standardise_apply(data: Data, mean: Data, std: Data) -> Data: def centre(data: Data, dim: Union[str, int]) -> Tuple[Data, None, Data]: """ This function centres a xarray.dataarray (along dim) or pandas.DataFrame (along axis) to mean=0 + :param data: :param string/int dim: | for xarray.DataArray as string: name of dimension which should be standardised @@ -81,6 +82,7 @@ def centre(data: Data, dim: Union[str, int]) -> Tuple[Data, None, Data]: def centre_inverse(data: Data, mean: Data) -> Data: """ This function is the inverse function of `centre` and therefore adds the given values of mean to the data. + :param data: :param mean: :return: @@ -91,6 +93,7 @@ def centre_inverse(data: Data, mean: Data) -> Data: def centre_apply(data: Data, mean: Data) -> Data: """ This applies `centre` on data using given mean and std. + :param data: :param mean: :param std: @@ -147,7 +150,8 @@ class SkillScores: return skill_score - def _climatological_skill_score(self, data, mu_type=1, observation_name="obs", forecast_name="CNN", external_data=None): + def _climatological_skill_score(self, data, mu_type=1, observation_name="obs", forecast_name="CNN", + external_data=None): kwargs = {"external_data": external_data} if external_data is not None else {} return self.__getattribute__(f"skill_score_mu_case_{mu_type}")(data, observation_name, forecast_name, **kwargs) @@ -201,13 +205,14 @@ class SkillScores: def skill_score_mu_case_3(self, data, observation_name="obs", forecast_name="CNN", external_data=None): AI, BI, CI, data, suffix = self.skill_score_pre_calculations(data, observation_name, forecast_name) mean, sigma = suffix["mean"], suffix["sigma"] - AIII = (((external_data.mean().values - mean.loc[observation_name]) / sigma.loc[observation_name])**2).values + AIII = (((external_data.mean().values - mean.loc[observation_name]) / sigma.loc[observation_name]) ** 2).values skill_score = np.array((AI - BI - CI + AIII) / 1 + AIII) return pd.DataFrame({"skill_score": [skill_score], "AIII": [AIII]}).to_xarray().to_array() def skill_score_mu_case_4(self, data, observation_name="obs", forecast_name="CNN", external_data=None): AI, BI, CI, data, suffix = self.skill_score_pre_calculations(data, observation_name, forecast_name) - monthly_mean_external = self.create_monthly_mean_from_daily_data(external_data, columns=data.type.values, index=data.index) + monthly_mean_external = self.create_monthly_mean_from_daily_data(external_data, columns=data.type.values, + index=data.index) data = xr.concat([data, monthly_mean_external], dim="type") mean, sigma = suffix["mean"], suffix["sigma"] monthly_mean_external = self.create_monthly_mean_from_daily_data(external_data, columns=data.type.values) @@ -217,11 +222,12 @@ class SkillScores: # r_mu, p_mu = stats.spearmanr(data.loc[..., [observation_name, observation_name+'X']]) r_mu, p_mu = stats.pearsonr(data.loc[..., observation_name], data.loc[..., observation_name + "X"]) - AIV = np.array(r_mu**2) - BIV = ((r_mu - sigma_external / sigma.loc[observation_name])**2).values - CIV = (((mean_external - mean.loc[observation_name]) / sigma.loc[observation_name])**2).values + AIV = np.array(r_mu ** 2) + BIV = ((r_mu - sigma_external / sigma.loc[observation_name]) ** 2).values + CIV = (((mean_external - mean.loc[observation_name]) / sigma.loc[observation_name]) ** 2).values skill_score = np.array((AI - BI - CI - AIV + BIV + CIV) / (1 - AIV + BIV + CIV)) - return pd.DataFrame({"skill_score": [skill_score], "AIV": [AIV], "BIV": [BIV], "CIV": CIV}).to_xarray().to_array() + return pd.DataFrame( + {"skill_score": [skill_score], "AIV": [AIV], "BIV": [BIV], "CIV": CIV}).to_xarray().to_array() @staticmethod def create_monthly_mean_from_daily_data(data, columns=None, index=None): diff --git a/src/run_modules/experiment_setup.py b/src/run_modules/experiment_setup.py index 02cc9cd0e5780c8d8d35ffe725e502fbde7aafdb..9497db6565d957bc5196d2b8fc81e510197cbdab 100644 --- a/src/run_modules/experiment_setup.py +++ b/src/run_modules/experiment_setup.py @@ -6,7 +6,7 @@ import logging import os from typing import Union, Dict, Any -import src.configuration.path_config +from src.configuration import path_config from src import helpers from src.run_modules.run_environment import RunEnvironment @@ -47,12 +47,12 @@ class ExperimentSetup(RunEnvironment): super().__init__() # experiment setup - self._set_param("data_path", src.configuration.path_config.prepare_host(sampling=sampling)) + self._set_param("data_path", path_config.prepare_host(sampling=sampling)) self._set_param("create_new_model", create_new_model, default=True) if self.data_store.get("create_new_model"): trainable = True data_path = self.data_store.get("data_path") - bootstrap_path = src.configuration.path_config.set_bootstrap_path(bootstrap_path, data_path, sampling) + bootstrap_path = path_config.set_bootstrap_path(bootstrap_path, data_path, sampling) self._set_param("bootstrap_path", bootstrap_path) self._set_param("trainable", trainable, default=True) self._set_param("fraction_of_training", fraction_of_train, default=0.8) @@ -64,21 +64,21 @@ class ExperimentSetup(RunEnvironment): # set experiment name exp_date = self._get_parser_args(parser_args).get("experiment_date") - exp_name, exp_path = src.configuration.path_config.set_experiment_name(experiment_date=exp_date, experiment_path=experiment_path, + exp_name, exp_path = path_config.set_experiment_name(experiment_name=exp_date, experiment_path=experiment_path, sampling=sampling) self._set_param("experiment_name", exp_name) self._set_param("experiment_path", exp_path) - src.configuration.path_config.check_path_and_create(self.data_store.get("experiment_path")) + path_config.check_path_and_create(self.data_store.get("experiment_path")) # set plot path default_plot_path = os.path.join(exp_path, "plots") self._set_param("plot_path", plot_path, default=default_plot_path) - src.configuration.path_config.check_path_and_create(self.data_store.get("plot_path")) + path_config.check_path_and_create(self.data_store.get("plot_path")) # set results path default_forecast_path = os.path.join(exp_path, "forecasts") self._set_param("forecast_path", forecast_path, default_forecast_path) - src.configuration.path_config.check_path_and_create(self.data_store.get("forecast_path")) + path_config.check_path_and_create(self.data_store.get("forecast_path")) # setup for data self._set_param("stations", stations, default=DEFAULT_STATIONS) diff --git a/src/run_modules/post_processing.py b/src/run_modules/post_processing.py index 8a962888ec0b789a14a24b20c97148e7a8315b30..afeb3757d2dd4a108aa639aa64b1d619405132df 100644 --- a/src/run_modules/post_processing.py +++ b/src/run_modules/post_processing.py @@ -1,22 +1,21 @@ __author__ = "Lukas Leufen, Felix Kleinert" __date__ = '2019-12-11' - import inspect import logging import os +from typing import Dict import keras import numpy as np import pandas as pd import xarray as xr -from src import statistics +from src.data_handling.bootstraps import BootStraps from src.data_handling.data_distributor import Distributor from src.data_handling.data_generator import DataGenerator -from src.data_handling.bootstraps import BootStraps -from src.datastore import NameNotFoundInDataStore -from src.helpers import TimeTracking +from src.helpers.datastore import NameNotFoundInDataStore +from src.helpers import TimeTracking, statistics from src.model_modules.linear_model import OrdinaryLeastSquaredModel from src.model_modules.model_class import AbstractModelClass from src.plotting.postprocessing_plotting import PlotMonthlySummary, PlotStationMap, PlotClimatologicalSkillScore, \ @@ -24,8 +23,6 @@ from src.plotting.postprocessing_plotting import PlotMonthlySummary, PlotStation from src.plotting.postprocessing_plotting import plot_conditional_quantiles from src.run_modules.run_environment import RunEnvironment -from typing import Dict - class PostProcessing(RunEnvironment): @@ -135,6 +132,7 @@ class PostProcessing(RunEnvironment): Use already created bootstrap predictions and the original predictions (the not-bootstrapped ones) and calculate skill scores for the bootstraps. The result is saved as a xarray DataArray in a dictionary structure separated for each station (keys of dictionary). + :return: The result dictionary with station-wise skill scores """ @@ -157,7 +155,7 @@ class PostProcessing(RunEnvironment): shape = labels.shape # get original forecasts - orig = bootstraps.get_orig_prediction(forecast_path, f"forecasts_norm_{station}_test.nc").reshape(shape) + orig = bootstraps.get_orig_prediction(forecast_path, f"forecasts_norm_{station}_test.nc").reshape(shape) coords = (range(shape[0]), range(1, shape[1] + 1), ["orig"]) orig = xr.DataArray(orig, coords=coords, dims=["index", "ahead", "type"]) @@ -170,7 +168,8 @@ class PostProcessing(RunEnvironment): boot_scores = [] for ahead in range(1, window_lead_time + 1): data = boot_data.sel(ahead=ahead) - boot_scores.append(skill_scores.general_skill_score(data, forecast_name=boot, reference_name="orig")) + boot_scores.append( + skill_scores.general_skill_score(data, forecast_name=boot, reference_name="orig")) skill.loc[boot] = np.array(boot_scores) # collect all results in single dictionary @@ -244,17 +243,20 @@ class PostProcessing(RunEnvironment): for normalised in [True, False]: # create empty arrays - nn_prediction, persistence_prediction, ols_prediction, observation = self._create_empty_prediction_arrays(data, count=4) + nn_prediction, persistence_prediction, ols_prediction, observation = self._create_empty_prediction_arrays( + data, count=4) # nn forecast - nn_prediction = self._create_nn_forecast(input_data, nn_prediction, mean, std, transformation_method, normalised) + nn_prediction = self._create_nn_forecast(input_data, nn_prediction, mean, std, transformation_method, + normalised) # persistence persistence_prediction = self._create_persistence_forecast(data, persistence_prediction, mean, std, transformation_method, normalised) # ols - ols_prediction = self._create_ols_forecast(input_data, ols_prediction, mean, std, transformation_method, normalised) + ols_prediction = self._create_ols_forecast(input_data, ols_prediction, mean, std, transformation_method, + normalised) # observation observation = self._create_observation(data, observation, mean, std, transformation_method, normalised) diff --git a/src/run_modules/pre_processing.py b/src/run_modules/pre_processing.py index ce9b8699959ba5cf064600b123b03abf60ac64a9..3f0ce363ec844e3538d541ac726c8ee3322ae9ad 100644 --- a/src/run_modules/pre_processing.py +++ b/src/run_modules/pre_processing.py @@ -10,8 +10,8 @@ import pandas as pd from src.data_handling.data_generator import DataGenerator from src.helpers import TimeTracking -from src.configuration.path_config import check_path_and_create -from src.join import EmptyQueryResult +from src.configuration import path_config +from src.helpers.join import EmptyQueryResult from src.run_modules.run_environment import RunEnvironment DEFAULT_ARGS_LIST = ["data_path", "network", "stations", "variables", "interpolate_dim", "target_dim", "target_var"] @@ -86,7 +86,7 @@ class PreProcessing(RunEnvironment): meta_round = ["station_lon", "station_lat", "station_alt"] precision = 4 path = os.path.join(self.data_store.get("experiment_path"), "latex_report") - check_path_and_create(path) + path_config.check_path_and_create(path) set_names = ["train", "val", "test"] df = pd.DataFrame(columns=meta_data + set_names) for set_name in set_names: diff --git a/src/run_modules/run_environment.py b/src/run_modules/run_environment.py index 63a3cd3ac1c6de7e950690a1b035aafa039c056d..ab42efaa2e1f6357e202423f458a9b605b93bf51 100644 --- a/src/run_modules/run_environment.py +++ b/src/run_modules/run_environment.py @@ -6,8 +6,8 @@ import os import shutil import time -from src.datastore import DataStoreByScope as DataStoreObject -from src.datastore import NameNotFoundInDataStore +from src.helpers.datastore import DataStoreByScope as DataStoreObject +from src.helpers.datastore import NameNotFoundInDataStore from src.helpers import Logger from src.helpers import TimeTracking diff --git a/src/run_modules/training.py b/src/run_modules/training.py index 2d949af8c68f244c0a0da2bad6580c616695da8d..93eb5762ec765b34191537e72abb2277cddeea7a 100644 --- a/src/run_modules/training.py +++ b/src/run_modules/training.py @@ -4,7 +4,7 @@ __date__ = '2019-12-05' import json import logging import os -import pickle +from typing import Union import keras @@ -13,8 +13,6 @@ from src.model_modules.keras_extensions import LearningRateDecay, CallbackHandle from src.plotting.training_monitoring import PlotModelHistory, PlotModelLearningRate from src.run_modules.run_environment import RunEnvironment -from typing import Union - class Training(RunEnvironment): @@ -64,6 +62,7 @@ class Training(RunEnvironment): def _set_gen(self, mode: str) -> None: """ Set and distribute the generators for given mode regarding batch size + :param mode: name of set, should be from ["train", "val", "test"] """ gen = self.data_store.get("generator", mode) @@ -137,6 +136,7 @@ class Training(RunEnvironment): def load_best_model(self, name: str) -> None: """ Load model weights for model with name. Skip if no weights are available. + :param name: name of the model to load weights for """ logging.debug(f"load best model: {name}") @@ -151,6 +151,7 @@ class Training(RunEnvironment): Save callbacks (history, learning rate) of training. * history.history -> history.json * lr_sc.lr -> history_lr.json + :param history: history object of training """ logging.debug("saving callbacks") @@ -166,6 +167,7 @@ class Training(RunEnvironment): Creates the history and learning rate plot in dependence of the number of epochs. The plots are saved in the experiment's plot_path. History plot is named '<exp_name>_history_loss_val_loss.pdf', the learning rate with '<exp_name>_history_learning_rate.pdf'. + :param history: keras history object with losses to plot (must include 'loss' and 'val_loss') :param lr_sc: learning rate decay object with 'lr' attribute """ diff --git a/test/test_data_handling/test_data_generator.py b/test/test_data_handling/test_data_generator.py index f48978dab25d910806a17ba960e78a9c257adda6..754728ba403fbda25c021c2f576a1bc89d26f83f 100644 --- a/test/test_data_handling/test_data_generator.py +++ b/test/test_data_handling/test_data_generator.py @@ -8,7 +8,7 @@ import xarray as xr from src.data_handling.data_generator import DataGenerator from src.data_handling.data_preparation import DataPrep -from src.join import EmptyQueryResult +from src.helpers.join import EmptyQueryResult class TestDataGenerator: diff --git a/test/test_data_handling/test_data_preparation.py b/test/test_data_handling/test_data_preparation.py index edfaa74f03406036180bad1af760cd6f735e908d..a8ca555c9748f7656fefc007922ee0d7df1992fa 100644 --- a/test/test_data_handling/test_data_preparation.py +++ b/test/test_data_handling/test_data_preparation.py @@ -9,7 +9,7 @@ import pytest import xarray as xr from src.data_handling.data_preparation import DataPrep -from src.join import EmptyQueryResult +from src.helpers.join import EmptyQueryResult class TestDataPrep: diff --git a/test/test_datastore.py b/test/test_datastore.py index 76349b0bf3604f2f71f89b93c9bc6fdf131696ed..9aca1eef35927242df0b5f659eece716f81f6c13 100644 --- a/test/test_datastore.py +++ b/test/test_datastore.py @@ -3,8 +3,8 @@ __date__ = '2019-11-22' import pytest -from src.datastore import AbstractDataStore, DataStoreByVariable, DataStoreByScope, CorrectScope -from src.datastore import NameNotFoundInDataStore, NameNotFoundInScope, EmptyScope +from src.helpers.datastore import AbstractDataStore, DataStoreByVariable, DataStoreByScope, CorrectScope +from src.helpers.datastore import NameNotFoundInDataStore, NameNotFoundInScope, EmptyScope class TestAbstractDataStore: diff --git a/test/test_join.py b/test/test_join.py index 90c244c99a89e5a94d0ff544b3f4466b03484cec..5adc013cfbd446c4feaf4a2b344f07d6f170077d 100644 --- a/test/test_join.py +++ b/test/test_join.py @@ -2,8 +2,8 @@ from typing import Iterable import pytest -from src.join import * -from src.join import _save_to_pandas, _correct_stat_name, _lower_list +from src.helpers.join import * +from src.helpers.join import _save_to_pandas, _correct_stat_name, _lower_list from src.configuration.join_settings import join_settings diff --git a/test/test_modules/test_experiment_setup.py b/test/test_modules/test_experiment_setup.py index 585335b94d353e209893f238859149b743a04c94..e06ba6c0ce5b9abb169e20016342b2a0dfb47d0f 100644 --- a/test/test_modules/test_experiment_setup.py +++ b/test/test_modules/test_experiment_setup.py @@ -53,7 +53,7 @@ class TestExperimentSetup: assert data_store.get("fraction_of_training", "general") == 0.8 # set experiment name assert data_store.get("experiment_name", "general") == "TestExperiment_daily" - path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "TestExperiment")) + path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "TestExperiment_daily")) assert data_store.get("experiment_path", "general") == path default_statistics_per_var = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values', 'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', @@ -123,7 +123,7 @@ class TestExperimentSetup: # set experiment name assert data_store.get("experiment_name", "general") == "TODAY_network_daily" path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data", "testExperimentFolder", - "TODAY_network")) + "TODAY_network_daily")) assert data_store.get("experiment_path", "general") == path # setup for data assert data_store.get("stations", "general") == ['DEBY053', 'DEBW059', 'DEBW027'] diff --git a/test/test_modules/test_model_setup.py b/test/test_modules/test_model_setup.py index b91a43272cc97daac51ec50e85ab4b81e9a6cb9b..c4fee1770c3fcb3d4a04a880292173dd7c85144f 100644 --- a/test/test_modules/test_model_setup.py +++ b/test/test_modules/test_model_setup.py @@ -3,7 +3,7 @@ import os import pytest from src.data_handling.data_generator import DataGenerator -from src.datastore import EmptyScope +from src.helpers.datastore import EmptyScope from src.model_modules.keras_extensions import CallbackHandler from src.model_modules.model_class import AbstractModelClass from src.run_modules.model_setup import ModelSetup diff --git a/test/test_modules/test_pre_processing.py b/test/test_modules/test_pre_processing.py index b29ed1e21480a869e4c118332c18b6edd8ac23a5..3aac2abfd51cfc7d75b54baae3eab5a1828f7318 100644 --- a/test/test_modules/test_pre_processing.py +++ b/test/test_modules/test_pre_processing.py @@ -3,7 +3,7 @@ import logging import pytest from src.data_handling.data_generator import DataGenerator -from src.datastore import NameNotFoundInScope +from src.helpers.datastore import NameNotFoundInScope from src.helpers import PyTestRegex from src.run_modules.experiment_setup import ExperimentSetup from src.run_modules.pre_processing import PreProcessing, DEFAULT_ARGS_LIST, DEFAULT_KWARGS_LIST diff --git a/test/test_statistics.py b/test/test_statistics.py index 6e981faf08176537a42e63b060005c7d89c2a2b4..3da7a47871f6d92472de268d165d788c343ce394 100644 --- a/test/test_statistics.py +++ b/test/test_statistics.py @@ -3,7 +3,7 @@ import pandas as pd import pytest import xarray as xr -from src.statistics import standardise, standardise_inverse, standardise_apply, centre, centre_inverse, centre_apply, \ +from src.helpers.statistics import standardise, standardise_inverse, standardise_apply, centre, centre_inverse, centre_apply, \ apply_inverse_transformation lazy = pytest.lazy_fixture