Skip to content
Snippets Groups Projects
Select Git revision
  • 96ce12e82795186271a71351a3104e1ee4658b8f
  • master default protected
  • enxhi_issue460_remove_TOAR-I_access
  • michael_issue459_preprocess_german_stations
  • sh_pollutants
  • develop protected
  • release_v2.4.0
  • michael_issue450_feat_load-ifs-data
  • lukas_issue457_feat_set-config-paths-as-parameter
  • lukas_issue454_feat_use-toar-statistics-api-v2
  • lukas_issue453_refac_advanced-retry-strategy
  • lukas_issue452_bug_update-proj-version
  • lukas_issue449_refac_load-era5-data-from-toar-db
  • lukas_issue451_feat_robust-apriori-estimate-for-short-timeseries
  • lukas_issue448_feat_load-model-from-path
  • lukas_issue447_feat_store-and-load-local-clim-apriori-data
  • lukas_issue445_feat_data-insight-plot-monthly-distribution
  • lukas_issue442_feat_bias-free-evaluation
  • lukas_issue444_feat_choose-interp-method-cams
  • 414-include-crps-analysis-and-other-ens-verif-methods-or-plots
  • lukas_issue384_feat_aqw-data-handler
  • v2.4.0 protected
  • v2.3.0 protected
  • v2.2.0 protected
  • v2.1.0 protected
  • Kleinert_etal_2022_initial_submission
  • v2.0.0 protected
  • v1.5.0 protected
  • v1.4.0 protected
  • v1.3.0 protected
  • v1.2.1 protected
  • v1.2.0 protected
  • v1.1.0 protected
  • IntelliO3-ts-v1.0_R1-submit
  • v1.0.0 protected
  • v0.12.2 protected
  • v0.12.1 protected
  • v0.12.0 protected
  • v0.11.0 protected
  • v0.10.0 protected
  • IntelliO3-ts-v1.0_initial-submit
41 results

test_run_environment.py

Blame
  • data_preparation.py 3.39 KiB
    __author__ = 'Felix Kleinert, Lukas Leufen'
    __date__ = '2019-10-16'
    
    
    import xarray as xr
    import pandas as pd
    import logging
    import os
    from src import join
    
    
    class DataPrep:
    
        def __init__(self, path: str, network: str, stations, variables, **kwargs):
            self.path = path
            self.network = network
            self.stations = stations
            self.variables = variables
            self.statistics_per_var = kwargs.get("statistics_per_var", None)
            if self.statistics_per_var is not None:
                self.load_data()
            else:
                raise NotImplementedError
                # self.data, self.meta = Fkf.read_hourly_data_from_csv_to_xarray(self.path, self.network, self.stations,
                #                                                               self.variables, **kwargs)
            self.mean = None
            self.std = None
            self.df = None
            self.history = None
            self.label = None
            self.kwargs = kwargs
            self.data = None
            self.meta = None
    
        def load_data(self):
            self.check_path_and_create(self.path)
            file_name = self._set_file_name()
            meta_file = self._set_meta_file_name()
            try:
                self.data = xr.open_dataarray(file_name)
                self.meta = pd.read_csv(meta_file, index_col=0)
            except FileNotFoundError as e:
                logging.warning(e)
                df_all = {}
                df, self.meta = join.download_join(station_name=self.stations, statvar=self.statistics_per_var)
                df_all[self.stations[0]] = df
                # convert df_all to xarray
                xarr = {k: xr.DataArray(v, dims=['datetime', 'variables']) for k, v in df_all.items()}
                xarr = xr.Dataset(xarr).to_array(dim='Stations')
                self.data = xarr
                # save locally as nc file
                xarr.to_netcdf(path=file_name)
                self.meta.to_csv(meta_file)
    
        def _set_file_name(self):
            return f"{self.path}{''.join(self.stations)}_{'_'.join(sorted(self.variables))}.nc"
    
        def _set_meta_file_name(self):
            return f"{self.path}{''.join(self.stations)}_{'_'.join(sorted(self.variables))}_meta.csv"
    
        def __repr__(self):
            return f"Dataprep(path='{self.path}', network='{self.network}', stations={self.stations}, " \
                   f"variables={self.variables}, **{self.kwargs}"
    
        @staticmethod
        def check_path_and_create(path):
            try:
                os.makedirs(path)
                logging.info("Created path: {}".format(path))
            except FileExistsError:
                pass
    
        def interpolate(self, dim=None, method='linear', limit=None, use_coordinate=True, **kwargs):
            raise NotImplementedError
    
        def restandardise(self, data, dim='variables', **kwargs):
            raise NotImplementedError
    
        def standardise(self, dim):
            raise NotImplementedError
    
        def make_history_window(self, dim, window):
            raise NotImplementedError
    
        def shift(self, dim, window):
            raise NotImplementedError
    
        def make_labels(self, dimension_name_of_target, target_variable, dimension_name_of_shift, window):
            raise NotImplementedError
    
        def history_label_nan_remove(self, dim):
            raise NotImplementedError
    
        @staticmethod
        def create_indexarray(index_name, index_values):
            raise NotImplementedError
    
    
    if __name__ == "__main__":
    
        dp = DataPrep('data/', 'dummy', 'DEBW107', ['o3', 'temp'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'})
        print(dp)