From da734052d53db8488d7fd1de086ab7c5e857a632 Mon Sep 17 00:00:00 2001
From: jrybicki-jsc <j.rybicki@fz-juelich.de>
Date: Mon, 11 Apr 2022 11:12:17 +0200
Subject: [PATCH] check if image exists before overwritting

---
 dags/image_transfer.py | 18 ++++++++++++++++--
 docs/apirequests.adoc  |  2 +-
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/dags/image_transfer.py b/dags/image_transfer.py
index 02e764f..ba2da06 100644
--- a/dags/image_transfer.py
+++ b/dags/image_transfer.py
@@ -11,7 +11,12 @@ default_args = {
     'owner': 'airflow',
 }
 
-
+def file_exist(sftp, name):
+    try:
+        r = sftp.stat(name)  
+        return r.st_size
+    except:
+        return -1
 
 @dag(default_args=default_args, schedule_interval=None, start_date=days_ago(2), tags=['example'])
 def transfer_image():
@@ -28,10 +33,19 @@ def transfer_image():
 
         with ssh_hook.get_conn() as ssh_client:
             sftp_client = ssh_client.open_sftp()
+            remote_name = os.path.join(target, image_id)
+            size = file_exist(sftp=sftp_client, name=remote_name)
+            if size>0:
+                print(f"File {remote_name} exists and has {size} bytes")
+                force = params.get('force', True)
+                if force!= True:
+                    return 0
+                print("Forcing overwrite")
+
             ssh_client.exec_command(command=f"mkdir -p {target}")
             
             with requests.get(url, stream=True, verify=False) as r:
-                with sftp_client.open(os.path.join(target, image_id), 'wb') as f:
+                with sftp_client.open(remote_name, 'wb') as f:
                     f.set_pipelined(pipelined=True)
                     while True:
                         chunk=r.raw.read(1024 * 1000)
diff --git a/docs/apirequests.adoc b/docs/apirequests.adoc
index d3276c7..3b5d9a6 100644
--- a/docs/apirequests.adoc
+++ b/docs/apirequests.adoc
@@ -118,7 +118,7 @@ Optionally, the record created in b2share can be registered with data cat. This
 To transfer images from eFlows4HPC image build service use dag defined in +dags/image_transfer.py+ (transfer_image). It requires two parameters +image_id+ name of the image in the image
 build service (e.g. "wordcount_skylake.sif") and +target+ which defines a path on the system where the image will be transfered to. 
 
-The parameters should be passed along the credentials as described in <<credentials>>. The target directory will be created with ``mkdir -p`` on the target machine. The image is streamed directly to the target location (no local copy on DLS worker).
+The parameters should be passed along the credentials as described in <<credentials>>. The target directory will be created with ``mkdir -p`` on the target machine. The image is streamed directly to the target location (no local copy on DLS worker). By default the file on the target location will be overwritten, this can be disabled by providing +force='false'+ as dag parameter.  
 
 ----
 curl -X POST -u USER:PASS -H "Content-Type: application/json" \
-- 
GitLab