From 2bfd066bf5a37f0275ae6f8133ab25c12dca109e Mon Sep 17 00:00:00 2001 From: jrybicki-jsc <j.rybicki@fz-juelich.de> Date: Tue, 7 Sep 2021 10:30:34 +0200 Subject: [PATCH] same workflow now without taskflow --- dags/b2shareoperator.py | 40 +++++++++++++++++++++++++++++----------- dags/firsto.py | 22 ++++++++++++++++------ 2 files changed, 45 insertions(+), 17 deletions(-) diff --git a/dags/b2shareoperator.py b/dags/b2shareoperator.py index f9a4cb3..f56f849 100644 --- a/dags/b2shareoperator.py +++ b/dags/b2shareoperator.py @@ -20,33 +20,51 @@ def get_object_md(server, oid): return obj def download_file(url: str, target_dir: str): - fname = tempfile.mktemp(dir=target_dir) urllib.request.urlretrieve(url=url, filename=fname) return fname -server='https://b2share-testing.fz-juelich.de/' class B2ShareOperator(BaseOperator): + template_fields = ('target_dir',) def __init__( self, name: str, - conn_id: str = 'default_b2share', + conn_id: str = 'default_b2share', # 'https://b2share-testing.fz-juelich.de/', + target_dir: str = '/tmp/', **kwargs) -> None: super().__init__(**kwargs) self.name = name self.conn_id = conn_id + self.target_dir = target_dir + print(self.target_dir) + + def execute(self, **kwargs): + connection = Connection.get_connection_from_secrets('default_b2share') + server = connection.get_uri() + print(f"Rereiving data from {server}") + + print('Kwargs') print(kwargs) - def execute(self, context): + params = kwargs['context']['params'] + oid = params['oid'] + obj = get_object_md(server=server, oid=oid) + print(f"Retrieved object {oid}: {obj}") + flist = get_file_list(obj) + + ti = kwargs['context']['ti'] + name_mappings = {} + for fname, url in flist.items(): + tmpname = download_file(url=url, target_dir=self.target_dir) + print(f"Processing: {fname} --> {url} --> {tmpname}") - connection = Connection.get_connection_from_secrets(self.conn_id) - print(f"Rereiving data from {connection.get_uri()}") + name_mappings[fname]=tmpname + ti.xcom_push(key='local', value=tmpname) + ti.xcom_push(key='remote', value=fname) + break # for now only one file - lst = get_objects(server=connection.get_uri()) - flist = {o['id']: [f['key'] for f in o['files']] for o in lst} - print(f"GOT: {flist}") - print(self.params) - return len(flist) + + return len(name_mappings) diff --git a/dags/firsto.py b/dags/firsto.py index eb2a6b1..abe85fb 100644 --- a/dags/firsto.py +++ b/dags/firsto.py @@ -5,7 +5,7 @@ from airflow import DAG from airflow.utils.dates import days_ago from airflow.operators.bash import BashOperator - +from airflow.providers.sftp.operators.sftp import SFTPOperator from b2shareoperator import B2ShareOperator def_args = { @@ -19,10 +19,20 @@ def_args = { } with DAG('firsto', default_args=def_args, description='first dag', schedule_interval=timedelta(days=1), start_date=days_ago(2)) as dag: - t1 = BashOperator(task_id='print_date', bash_command='date') - t2 = BashOperator(task_id='do_noting', bash_command='sleep 5') - - t3 = B2ShareOperator(task_id='task_b2sh', dag=dag, name='B2Share') - t1 >> t2 >> t3 + get_b2obj = B2ShareOperator(task_id='task_b2sh', + dag=dag, + name='B2Share', + target_dir="{{ var.value.source_path}}") + + put_file = SFTPOperator( + task_id="upload_scp", + ssh_conn_id="default_ssh", + local_filepath="{{ti.xcom_pull(task_ids='task_b2sh', key='local')}}", + remote_filepath="{{ti.xcom_pull(task_ids='task_b2sh',key='remote')}}", + operation="put", + create_intermediate_dirs=True, + dag=dag) + + get_b2obj >> put_file -- GitLab