diff --git a/dags/b2shareoperator.py b/dags/b2shareoperator.py index 4b31958c4056cc193be9c2f9ca3b136c5ed11a85..948027509818883a93d4ef699fe6b1a3818439af 100644 --- a/dags/b2shareoperator.py +++ b/dags/b2shareoperator.py @@ -42,6 +42,19 @@ def get_record_template(): }, "open_access": True} +def get_schema(url): + r = requests.get(url) + return r.json() + +def get_community(server, community_id): + response = requests.get( url=urljoin(server, f"api/communities/{community_id}"), + headers={'Content-Type':'application/json'}).json() + if 'status' in response: + return None + schema = get_schema(url=response['links']['schema']) + return response['id'], schema['json_schema']['allOf'][0]['required'] + + def create_draft_record(server: str, token: str, record): response = requests.post( url=urljoin(server, 'api/records/'), headers={'Content-Type':'application/json'}, diff --git a/dags/uploadflow.py b/dags/uploadflow.py index c460b3f7e2704dcaeaf50375ad109c9fb8bc1640..823e58ec9282a6aae762f8d6e07564b7f22ceac9 100644 --- a/dags/uploadflow.py +++ b/dags/uploadflow.py @@ -10,7 +10,7 @@ from airflow.providers.ssh.hooks.ssh import SSHHook from airflow.utils.dates import days_ago from b2shareoperator import (add_file, create_draft_record, - get_record_template, submit_draft) + get_community, submit_draft) default_args = { 'owner': 'airflow', @@ -73,6 +73,24 @@ def upload_example(): return mappings + def create_template(hrespo): + return { + "titles" : [{"title": hrespo['title']}], + "creators" : [{"creator_name": hrespo['creator_name']}], + "descriptions" :[ + { + "description": hrespo['description'], + "description_type": "Abstract" + } + ], + "community" : "2d58eb08-af65-4cad-bd25-92f1a17d325b", + "community_specific" :{ + "90942261-4637-4ac0-97b8-12e1edb38739": {"helmholtz centre": ["Forschungszentrum Jülich"]} + }, + "open_access": hrespo['open_access']=="True" + } + + @task() def upload(files: dict, **kwargs): @@ -88,22 +106,18 @@ def upload_example(): hook = HttpHook(http_conn_id='datacat', method='GET') hrespo = hook.run(endpoint=f"storage_target/{mid}").json()['metadata'] print(hrespo) + template = create_template(hrespo=hrespo) + community = get_community(server=server, community_id=template['community']) + if not community: + print("Not existing community") + return + cid, required = community + missing = [r for r in required if r not in template] + if any(missing): + print(f"Community {cid} required field {missing} are missing") + return + - template = { - "titles" : [{"title":hrespo['title']}], - "creators" : [{"creator_name": hrespo['creator_name']}], - "descriptions" :[ - { - "description": hrespo['description'], - "description_type": "Abstract" - } - ], - "community" : "2d58eb08-af65-4cad-bd25-92f1a17d325b", - "community_specific" :{ - "90942261-4637-4ac0-97b8-12e1edb38739": {"helmholtz centre": ["Forschungszentrum Jülich"]} - }, - "open_access": hrespo['open_access']=="True" - } r = create_draft_record(server=server, token=token, record=template) print(r) print(f"Draft record created {r['id']} --> {r['links']['self']}")