Skip to content
Snippets Groups Projects
Commit ab95345e authored by lukas leufen's avatar lukas leufen
Browse files

correct data format from join if given as list, prepare host creates separate...

correct data format from join if given as list, prepare host creates separate data directory if hourly data is loaded
parent aa92f77b
No related branches found
No related tags found
2 merge requests!37include new development,!36include using of hourly data
...@@ -96,24 +96,24 @@ class TimeTracking(object): ...@@ -96,24 +96,24 @@ class TimeTracking(object):
logging.info(f"undefined job finished after {self}") logging.info(f"undefined job finished after {self}")
def prepare_host(create_new=True): def prepare_host(create_new=True, sampling="daily"):
hostname = socket.gethostname() hostname = socket.gethostname()
try: try:
user = os.getlogin() user = os.getlogin()
except OSError: except OSError:
user = "default" user = "default"
if hostname == 'ZAM144': if hostname == "ZAM144":
path = f'/home/{user}/Data/toar_daily/' path = f"/home/{user}/Data/toar_{sampling}/"
elif hostname == 'zam347': elif hostname == "zam347":
path = f'/home/{user}/Data/toar_daily/' path = f"/home/{user}/Data/toar_{sampling}/"
elif hostname == 'linux-aa9b': elif hostname == "linux-aa9b":
path = f'/home/{user}/machinelearningtools/data/toar_daily/' path = f"/home/{user}/machinelearningtools/data/toar_{sampling}/"
elif (len(hostname) > 2) and (hostname[:2] == 'jr'): elif (len(hostname) > 2) and (hostname[:2] == "jr"):
path = f'/p/project/cjjsc42/{user}/DATA/toar_daily/' path = f"/p/project/cjjsc42/{user}/DATA/toar_{sampling}/"
elif (len(hostname) > 2) and (hostname[:2] == 'jw'): elif (len(hostname) > 2) and (hostname[:2] == "jw"):
path = f'/p/home/jusers/{user}/juwels/intelliaq/DATA/toar_daily/' path = f"/p/home/jusers/{user}/juwels/intelliaq/DATA/toar_{sampling}/"
elif "runner-6HmDp9Qd-project-2411-concurrent" in hostname: elif "runner-6HmDp9Qd-project-2411-concurrent" in hostname:
path = f'/home/{user}/machinelearningtools/data/toar_daily/' path = f"/home/{user}/machinelearningtools/data/toar_{sampling}/"
else: else:
logging.error(f"unknown host '{hostname}'") logging.error(f"unknown host '{hostname}'")
raise OSError(f"unknown host '{hostname}'") raise OSError(f"unknown host '{hostname}'")
......
...@@ -44,6 +44,9 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t ...@@ -44,6 +44,9 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t
# load series information # load series information
vars_dict = load_series_information(station_name, station_type, network_name, join_url_base, headers) vars_dict = load_series_information(station_name, station_type, network_name, join_url_base, headers)
# correct stat_var values if data is not aggregated (hourly)
[stat_var.update({k: "values"}) for k in stat_var.keys()]
# download all variables with given statistic # download all variables with given statistic
data = None data = None
df = None df = None
...@@ -53,12 +56,18 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t ...@@ -53,12 +56,18 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t
logging.info('load: {}'.format(var)) logging.info('load: {}'.format(var))
# create data link # create data link
opts = {'base': join_url_base, 'service': 'stats', 'id': vars_dict[var], #'statistics': stat_var[var], opts = {'base': join_url_base, 'service': 'stats', 'id': vars_dict[var], 'statistics': stat_var[var],
'sampling': sampling, 'capture': 0, 'min_data_length': 1460, 'format': 'json'} 'sampling': sampling, 'capture': 0, 'min_data_length': 1460, 'format': 'json'}
# load data # load data
data = get_data(opts, headers) data = get_data(opts, headers)
# adjust data format if given as list of list
# no branch cover because this just happens when downloading hourly data using a secret token, not available
# for CI testing.
if isinstance(data, list): # pragma: no branch
data = correct_data_format(data)
# correct namespace of statistics # correct namespace of statistics
stat = _correct_stat_name(stat_var[var]) stat = _correct_stat_name(stat_var[var])
...@@ -75,6 +84,23 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t ...@@ -75,6 +84,23 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t
raise EmptyQueryResult("No data found in JOIN.") raise EmptyQueryResult("No data found in JOIN.")
def correct_data_format(data):
"""
Transform to the standard data format. For some cases (e.g. hourly data), the data is returned as list instead of
a dictionary with keys datetime, values and metadata. This functions addresses this issue and transforms the data
into the dictionary version.
:param data: data in hourly format
:return: the same data but formatted to fit with aggregated format
"""
formatted = {"datetime": [],
"values": [],
"metadata": data[-1]}
for d in data[:-1]:
for k, v in zip(["datetime", "values"], d):
formatted[k].append(v)
return formatted
def get_data(opts: Dict, headers: Dict) -> Union[Dict, List]: def get_data(opts: Dict, headers: Dict) -> Union[Dict, List]:
""" """
Download join data using requests framework. Data is returned as json like structure. Depending on the response Download join data using requests framework. Data is returned as json like structure. Depending on the response
...@@ -116,8 +142,8 @@ def _save_to_pandas(df: Union[pd.DataFrame, None], data: dict, stat: str, var: s ...@@ -116,8 +142,8 @@ def _save_to_pandas(df: Union[pd.DataFrame, None], data: dict, stat: str, var: s
:param var: variable the data is from (e.g. 'o3') :param var: variable the data is from (e.g. 'o3')
:return: new created or concatenated data frame :return: new created or concatenated data frame
""" """
if len(data[0][0]) == 19: if len(data["datetime"][0]) == 19:
str_format = "%Y-%m-%d %H:%M:%s" str_format = "%Y-%m-%d %H:%M:%S"
else: else:
str_format = "%Y-%m-%d %H:%M" str_format = "%Y-%m-%d %H:%M"
index = map(lambda s: dt.datetime.strptime(s, str_format), data['datetime']) index = map(lambda s: dt.datetime.strptime(s, str_format), data['datetime'])
...@@ -164,6 +190,7 @@ def create_url(base: str, service: str, **kwargs: Union[str, int, float, None]) ...@@ -164,6 +190,7 @@ def create_url(base: str, service: str, **kwargs: Union[str, int, float, None])
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
var_all_dic = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values', var_all_dic = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values',
'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', 'cloudcover': 'average_values', 'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', 'cloudcover': 'average_values',
'pblheight': 'maximum'} 'pblheight': 'maximum'}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment