Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
toargridding
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
esde
toar-public
toargridding
Commits
d3398af4
Commit
d3398af4
authored
10 months ago
by
Carsten Hinz
Browse files
Options
Downloads
Patches
Plain Diff
added custom exception, for the case, if an analysis does not provide results
parent
fb9df16e
No related branches found
Branches containing commit
No related tags found
Tags containing commit
2 merge requests
!11
Creation of first beta release version
,
!7
Resolve "Cache and requests: Handling of dead status endpoints (Internal Server Error, old requests)"
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
examples/produce_data_withOptional_country.ipynb
+2
-2
2 additions, 2 deletions
examples/produce_data_withOptional_country.ipynb
toargridding/toar_rest_client.py
+18
-8
18 additions, 8 deletions
toargridding/toar_rest_client.py
with
20 additions
and
10 deletions
examples/produce_data_withOptional_country.ipynb
+
2
−
2
View file @
d3398af4
...
...
@@ -10,7 +10,7 @@
"from collections import namedtuple\n",
"from pathlib import Path\n",
"\n",
"from toargridding.toar_rest_client import AnalysisServiceDownload, Connection\n",
"from toargridding.toar_rest_client import AnalysisServiceDownload, Connection
, EmptyDataError
\n",
"from toargridding.grids import RegularGrid\n",
"from toargridding.gridding import get_gridded_toar_data\n",
"from toargridding.metadata import TimeSample\n",
...
...
@@ -87,7 +87,7 @@
" stats=config.stats,\n",
" **config.moreOptions\n",
" )\n",
" except
Key
Error as e:\n",
" except
EmptyData
Error as e:\n",
" print(\"failed for \", person)\n",
" continue\n",
"\n",
...
...
%% Cell type:code id: tags:
```
python
from
datetime
import
datetime
as
dt
from
collections
import
namedtuple
from
pathlib
import
Path
from
toargridding.toar_rest_client
import
AnalysisServiceDownload
,
Connection
from
toargridding.toar_rest_client
import
AnalysisServiceDownload
,
Connection
,
EmptyDataError
from
toargridding.grids
import
RegularGrid
from
toargridding.gridding
import
get_gridded_toar_data
from
toargridding.metadata
import
TimeSample
from
toargridding.metadata_utilities
import
countryCodes
```
%% Cell type:code id: tags:
```
python
#creation of request.
Config
=
namedtuple
(
"
Config
"
,
[
"
grid
"
,
"
time
"
,
"
variables
"
,
"
stats
"
,
"
moreOptions
"
])
#see page 18 in https://toar-data.fz-juelich.de/sphinx/TOAR_UG_Vol03_Database/build/latex/toardatabase--userguide.pdf
varName
=
"
country
"
stationCountries
=
countryCodes
()
validCountries
=
stationCountries
.
getValidVocabular
(
controlName
=
"
Country Code
"
,
varName
=
varName
)
grid
=
RegularGrid
(
lat_resolution
=
1.9
,
lon_resolution
=
2.5
,
)
configs
=
dict
()
for
country
in
validCountries
:
valid_data
=
Config
(
grid
,
TimeSample
(
start
=
dt
(
2000
,
1
,
1
),
end
=
dt
(
2018
,
12
,
31
),
sampling
=
"
daily
"
),
#possibly adopt range:-)
[
"
mole_fraction_of_ozone_in_air
"
],
#variable name
[
"
dma8epa_strict
"
],
{
varName
:
country
}
)
configs
[
f
"
test_ta
{
country
}
"
]
=
valid_data
```
%% Cell type:code id: tags:
```
python
#CAVE: this cell runs about 45minutes per requested year. therefore we increase the waiting duration to 1h per request.
#the processing is done on the server of the TOAR database.
#a restart of the cell continues the request to the REST API if the requested data are ready for download
# The download can also take a few minutes
stats_endpoint
=
"
https://toar-data.fz-juelich.de/api/v2/analysis/statistics/
"
cache_basepath
=
Path
(
"
cache
"
)
result_basepath
=
Path
(
"
results
"
)
cache_basepath
.
mkdir
(
exist_ok
=
True
)
result_basepath
.
mkdir
(
exist_ok
=
True
)
analysis_service
=
AnalysisServiceDownload
(
stats_endpoint
=
stats_endpoint
,
cache_dir
=
cache_basepath
,
sample_dir
=
result_basepath
,
use_downloaded
=
True
)
Connection
.
DEBUG
=
True
# maybe adopt the interval for requesting the results and the total duration, before the client pauses the requests.
# as the requests take about 45min, it is more suitable to wait 60min before timing out the requests than the original 30min.
analysis_service
.
connection
.
setRequestTimes
(
interval_min
=
5
,
maxWait_min
=
60
)
createdFiles
=
[]
for
person
,
config
in
configs
.
items
():
print
(
f
"
\n
Processing
{
person
}
:
"
)
print
(
f
"
--------------------
"
)
try
:
datasets
,
metadatas
=
get_gridded_toar_data
(
analysis_service
=
analysis_service
,
grid
=
config
.
grid
,
time
=
config
.
time
,
variables
=
config
.
variables
,
stats
=
config
.
stats
,
**
config
.
moreOptions
)
except
Key
Error
as
e
:
except
EmptyData
Error
as
e
:
print
(
"
failed for
"
,
person
)
continue
for
dataset
,
metadata
in
zip
(
datasets
,
metadatas
):
outName
=
result_basepath
/
f
"
{
metadata
.
get_id
()
}
_
{
config
.
grid
.
get_id
()
}
.nc
"
dataset
.
to_netcdf
(
outName
)
createdFiles
.
append
(
outName
)
print
(
metadata
.
get_id
())
```
%% Cell type:code id: tags:
```
python
##TODO: now we only need to combine all the obtained results...
```
...
...
This diff is collapsed.
Click to expand it.
toargridding/toar_rest_client.py
+
18
−
8
View file @
d3398af4
...
...
@@ -20,6 +20,14 @@ STATION_LON = "station_coordinates_lng"
COORDS
=
[
STATION_LAT
,
STATION_LON
]
class
EmptyDataError
(
ValueError
):
"""
! custom exception for requests, where the analysis service only provides metadata.
This might be the case, if there are not statuins, or if the statistical analysis does not yield any data points
"""
def
__init__
(
self
,
message
):
super
().
__init__
(
message
)
@dataclass
(
frozen
=
True
)
class
QueryOptions
:
"""
Creation of a request to the TOAR database.
...
...
@@ -256,12 +264,14 @@ class Connection:
try
:
response
.
raise_for_status
()
except
requests
.
exceptions
.
HTTPError
as
e
:
print
(
f
"
\t
connection error (
{
e
.
response
.
status_code
}
:
{
e
.
response
.
reason
}
).
Trying again later
"
)
print
(
f
"
\t
connection error (
{
e
.
response
.
status_code
}
:
{
e
.
response
.
reason
}
).
"
)
self
.
printExecption
(
e
,
response
)
#a Status Code 500 seems indicated an aborted request -> restart the request and continue with new status endpoint
if
e
.
response
.
status_code
==
500
:
self
.
cache
.
remove
(
query_options
.
cache_key
)
status_endpoint
=
self
.
get_status_endpoint
(
query_options
)
else
:
print
(
"
\t
Trying again later.
"
)
continue
#are our results ready to obtain?
if
response
.
headers
[
"
Content-Type
"
]
==
"
application/zip
"
:
...
...
@@ -305,6 +315,7 @@ class Connection:
#will be overwritten in the next step.
self
.
cache
.
remove
(
query_options
.
cache_key
)
print
(
"
Removing status endpoint from cache and submitting new request.
"
)
pass
except
:
raise
RuntimeError
(
f
"
An error occurred during accessing a cached request
"
)
else
:
...
...
@@ -336,12 +347,9 @@ class Connection:
if
response
.
headers
[
"
Content-Type
"
]
==
"
application/json
"
:
status_endpoint
=
response
.
json
()[
"
status
"
]
#else:
# raise Exception( f"Unexpected type of response: {response.headers['Content-Type']}" )
#TODO: can this raise cause a problem?
response
.
raise_for_status
()
except
requests
.
exceptions
.
HTTPError
as
e
:
print
(
f
"
A
connection
error occurred:
"
)
print
(
f
"
A
n HTTP
error occurred:
"
)
self
.
printExecption
(
e
,
response
)
raise
e
except
requests
.
exceptions
.
ReadTimeout
as
e
:
...
...
@@ -500,6 +508,9 @@ class AnalysisService:
)
->
tuple
[
pd
.
DataFrame
,
pd
.
DataFrame
]:
"""
convert downloaded byte stream into pandas dataframes
throws an EmptyDataError, if the results file does not contain data.
This is a result if there are not stations contributing to a request or if the restrictions of the analysis exclude all points of a station.
Parameters:
----------
content:
...
...
@@ -510,8 +521,7 @@ class AnalysisService:
zip_stream
=
io
.
BytesIO
(
content
)
with
ZipFile
(
zip_stream
)
as
myzip
:
if
len
(
myzip
.
namelist
())
==
1
:
print
(
"
Downloaded data do not contain a timeseries.
"
)
raise
KeyError
(
"
Data file is empty
"
)
#TODO replace this with a custom exception.
raise
EmptyDataError
(
"
Data file from TOAR analysis service is empty
"
)
timeseries
=
self
.
extract_data
(
myzip
,
metadata
.
statistic
)
timeseries_metadata
=
self
.
extract_data
(
myzip
,
AnalysisService
.
METADATA
)
...
...
@@ -536,7 +546,7 @@ class AnalysisServiceDownload(AnalysisService):
"""
download service with caching of requests to the TOARDB
This service performs the request to the TOAR database and downloads the results of the request to disc before returning if for further processing.
When retrieving data, a check is don
w
, if this request has already been cached on disc.
When retrieving data, a check is don
e
, if this request has already been cached on disc.
Attributes:
----------
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment