Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
A
Airflow Datacat Integration
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
eFlows4HPC WP2
Airflow Datacat Integration
Commits
5bc427fe
Commit
5bc427fe
authored
3 years ago
by
Christian Boettcher
Browse files
Options
Downloads
Patches
Plain Diff
add basic client for datacat + first tests
parent
3dff2faf
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
.gitignore
+3
-2
3 additions, 2 deletions
.gitignore
datacat_integration/connection.py
+106
-0
106 additions, 0 deletions
datacat_integration/connection.py
tests/test_connection.py
+46
-0
46 additions, 0 deletions
tests/test_connection.py
with
155 additions
and
2 deletions
.gitignore
+
3
−
2
View file @
5bc427fe
...
...
@@ -7,5 +7,6 @@ __pycache__/
.coverage
coverage.xml
#
vs-code specific, individual
files
#
local env
files
settings.json
testing-authentication.env
\ No newline at end of file
This diff is collapsed.
Click to expand it.
datacat_integration/connection.py
0 → 100644
+
106
−
0
View file @
5bc427fe
from
typing
import
Dict
import
uuid
import
json
from
urllib.parse
import
urljoin
import
requests
class
DataCatalogEntry
:
"""
A datatype representing an entry in the datacatalog.
"""
name
:
str
=
""
url
:
str
=
""
metadata
:
Dict
[
str
,
str
]
=
{}
def
__init__
(
self
,
name
:
str
,
url
:
str
,
metadata
:
Dict
[
str
,
str
]):
self
.
name
=
name
self
.
url
=
url
self
.
metadata
=
metadata
def
json
(
self
):
"""
returns a json-compatible representation of the object.
"""
return
json
.
dumps
(
{
"
name
"
:
self
.
name
,
"
url
"
:
self
.
url
,
"
metadata
"
:
self
.
metadata
}
)
def
from_json
(
data
:
json
):
"""
returns a DataCatalogEntry object from the given json string
"""
dict_data
=
json
.
loads
(
data
)
return
DataCatalogEntry
(
dict_data
[
'
name
'
],
dict_data
[
'
url
'
],
dict_data
[
'
metadata
'
])
class
DataCatConnection
:
"""
An API to the DataCatalog. An instance of this class contains connection data for a single DataCatalog-server with a single user/pass login.
"""
def
__init__
(
self
,
catalog_url
:
str
=
""
,
username
:
str
=
""
,
password
:
str
=
""
):
self
.
url
=
catalog_url
self
.
user
=
username
self
.
_password
=
password
self
.
refresh_token
()
def
refresh_token
(
self
):
# POST /token
"""
Refresh the stored token by retrieving a new one from the server.
"""
data
=
{
"
username
"
:
self
.
user
,
"
password
"
:
self
.
_password
}
headers
=
{
'
accept
'
:
'
application/json
'
}
response
=
requests
.
post
(
urljoin
(
self
.
url
,
'
token
'
),
data
=
data
,
headers
=
headers
)
if
response
.
ok
:
self
.
_auth_token
=
response
.
json
()[
'
access_token
'
]
return
self
.
_auth_token
else
:
raise
ConnectionError
(
'
Could not authenticate with the DataCatalog.
'
)
def
get_token
(
self
):
# GET /me with auth and refresh if error
"""
Checks if the current token is valid. If yes, return it, else refresh it and return a new one.
"""
headers
=
{
'
accept
'
:
'
application/json
'
,
'
Authorization
'
:
'
Bearer {}
'
.
format
(
self
.
_auth_token
)
}
if
requests
.
get
(
urljoin
(
self
.
url
,
'
me
'
),
headers
=
headers
).
ok
:
return
self
.
_auth_token
else
:
return
self
.
refresh_token
()
def
get_object
(
self
,
datacat_type
:
str
,
oid
:
uuid
):
# GET /<type>/<oid>
"""
Returns a json of the given object from the server.
"""
headers
=
{
'
accept
'
:
'
application/json
'
}
url
=
urljoin
(
self
.
url
,
"
{}/{}
"
.
format
(
datacat_type
,
oid
))
return
requests
.
get
(
url
,
headers
=
headers
).
json
()
def
create_object
(
self
,
datacat_type
:
str
,
object
:
DataCatalogEntry
):
# POST /<type>
"""
Creates a new object in the datacatalog. Returns the oid of successful.
"""
headers
=
{
'
accept
'
:
'
application/json
'
,
'
Content-Type
'
:
'
application/json
'
,
'
Authorization
'
:
'
Bearer {}
'
.
format
(
self
.
_auth_token
)
}
response
=
requests
.
post
(
urljoin
(
self
.
url
,
datacat_type
),
headers
=
headers
,
data
=
object
.
json
())
if
response
.
ok
:
return
response
.
json
()[
0
]
else
:
raise
ConnectionError
(
response
.
text
)
def
list_type
(
self
,
datacat_type
:
str
):
"""
lists all elements of the given type
"""
headers
=
{
'
accept
'
:
'
application/json
'
}
url
=
urljoin
(
self
.
url
,
datacat_type
)
return
requests
.
get
(
url
,
headers
=
headers
).
json
()
\ No newline at end of file
This diff is collapsed.
Click to expand it.
tests/test_connection.py
0 → 100644
+
46
−
0
View file @
5bc427fe
from
unittest
import
TestCase
import
os
from
datacat_integration.connection
import
DataCatalogEntry
,
DataCatConnection
class
EntryTest
(
TestCase
):
def
setUp
(
self
)
->
None
:
self
.
json_string
=
'
{
"
name
"
:
"
foo
"
,
"
url
"
:
"
bar
"
,
"
metadata
"
: {
"
key1
"
:
"
val1
"
,
"
key2
"
:
"
val2
"
} }
'
self
.
entry
:
DataCatalogEntry
=
DataCatalogEntry
(
"
foo
"
,
"
bar
"
,
{
"
key1
"
:
"
val1
"
,
"
key2
"
:
"
val2
"
})
def
test_create_entry_from_json
(
self
):
entry_from_json
=
DataCatalogEntry
.
from_json
(
self
.
json_string
)
self
.
assertDictEqual
(
self
.
entry
.
metadata
,
entry_from_json
.
metadata
)
self
.
assertEqual
(
self
.
entry
.
name
,
entry_from_json
.
name
)
self
.
assertEqual
(
self
.
entry
.
url
,
entry_from_json
.
url
)
def
test_create_json_from_entry
(
self
):
json_from_entry
=
self
.
entry
.
json
()
self
.
assertEqual
(
json_from_entry
.
replace
(
"
"
,
""
),
self
.
json_string
.
replace
(
"
"
,
""
))
class
ConnectionTest
(
TestCase
):
def
setUp
(
self
)
->
None
:
self
.
url
=
os
.
getenv
(
'
DATACAT_URL
'
)
self
.
user
=
os
.
getenv
(
'
DATACAT_LOGIN
'
)
self
.
password
=
os
.
getenv
(
'
DATACAT_PASSWORD
'
)
# if these are not set, connection can not be properly tested
self
.
assertIsNotNone
(
self
.
url
)
self
.
assertIsNotNone
(
self
.
user
)
self
.
assertIsNotNone
(
self
.
password
)
def
test_create_token
(
self
):
pass
def
test_update_token
(
self
):
pass
def
test_get_object
(
self
):
pass
def
test_create_object
(
self
):
pass
# TODO
def
test_list_objects
(
self
):
pass
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment