diff --git a/dags/b2shareoperator.py b/dags/b2shareoperator.py index f9a4cb3fd36031a3d6313fb875c8373756c6719f..f56f8492bc21ed1b8f177b41c11787fc401fcb3c 100644 --- a/dags/b2shareoperator.py +++ b/dags/b2shareoperator.py @@ -20,33 +20,51 @@ def get_object_md(server, oid): return obj def download_file(url: str, target_dir: str): - fname = tempfile.mktemp(dir=target_dir) urllib.request.urlretrieve(url=url, filename=fname) return fname -server='https://b2share-testing.fz-juelich.de/' class B2ShareOperator(BaseOperator): + template_fields = ('target_dir',) def __init__( self, name: str, - conn_id: str = 'default_b2share', + conn_id: str = 'default_b2share', # 'https://b2share-testing.fz-juelich.de/', + target_dir: str = '/tmp/', **kwargs) -> None: super().__init__(**kwargs) self.name = name self.conn_id = conn_id + self.target_dir = target_dir + print(self.target_dir) + + def execute(self, **kwargs): + connection = Connection.get_connection_from_secrets('default_b2share') + server = connection.get_uri() + print(f"Rereiving data from {server}") + + print('Kwargs') print(kwargs) - def execute(self, context): + params = kwargs['context']['params'] + oid = params['oid'] + obj = get_object_md(server=server, oid=oid) + print(f"Retrieved object {oid}: {obj}") + flist = get_file_list(obj) + + ti = kwargs['context']['ti'] + name_mappings = {} + for fname, url in flist.items(): + tmpname = download_file(url=url, target_dir=self.target_dir) + print(f"Processing: {fname} --> {url} --> {tmpname}") - connection = Connection.get_connection_from_secrets(self.conn_id) - print(f"Rereiving data from {connection.get_uri()}") + name_mappings[fname]=tmpname + ti.xcom_push(key='local', value=tmpname) + ti.xcom_push(key='remote', value=fname) + break # for now only one file - lst = get_objects(server=connection.get_uri()) - flist = {o['id']: [f['key'] for f in o['files']] for o in lst} - print(f"GOT: {flist}") - print(self.params) - return len(flist) + + return len(name_mappings) diff --git a/dags/firsto.py b/dags/firsto.py index eb2a6b1e3c7ee8d855ab77c83c0bdd21c95d8c26..abe85fb97f61ba67ca10f6b2fe137eb5db352629 100644 --- a/dags/firsto.py +++ b/dags/firsto.py @@ -5,7 +5,7 @@ from airflow import DAG from airflow.utils.dates import days_ago from airflow.operators.bash import BashOperator - +from airflow.providers.sftp.operators.sftp import SFTPOperator from b2shareoperator import B2ShareOperator def_args = { @@ -19,10 +19,20 @@ def_args = { } with DAG('firsto', default_args=def_args, description='first dag', schedule_interval=timedelta(days=1), start_date=days_ago(2)) as dag: - t1 = BashOperator(task_id='print_date', bash_command='date') - t2 = BashOperator(task_id='do_noting', bash_command='sleep 5') - - t3 = B2ShareOperator(task_id='task_b2sh', dag=dag, name='B2Share') - t1 >> t2 >> t3 + get_b2obj = B2ShareOperator(task_id='task_b2sh', + dag=dag, + name='B2Share', + target_dir="{{ var.value.source_path}}") + + put_file = SFTPOperator( + task_id="upload_scp", + ssh_conn_id="default_ssh", + local_filepath="{{ti.xcom_pull(task_ids='task_b2sh', key='local')}}", + remote_filepath="{{ti.xcom_pull(task_ids='task_b2sh',key='remote')}}", + operation="put", + create_intermediate_dirs=True, + dag=dag) + + get_b2obj >> put_file