Skip to content
Snippets Groups Projects
Commit da734052 authored by Jedrzej Rybicki's avatar Jedrzej Rybicki
Browse files

check if image exists before overwritting

parent 8c689fd1
No related branches found
No related tags found
No related merge requests found
Pipeline #97525 passed
...@@ -11,7 +11,12 @@ default_args = { ...@@ -11,7 +11,12 @@ default_args = {
'owner': 'airflow', 'owner': 'airflow',
} }
def file_exist(sftp, name):
try:
r = sftp.stat(name)
return r.st_size
except:
return -1
@dag(default_args=default_args, schedule_interval=None, start_date=days_ago(2), tags=['example']) @dag(default_args=default_args, schedule_interval=None, start_date=days_ago(2), tags=['example'])
def transfer_image(): def transfer_image():
...@@ -28,10 +33,19 @@ def transfer_image(): ...@@ -28,10 +33,19 @@ def transfer_image():
with ssh_hook.get_conn() as ssh_client: with ssh_hook.get_conn() as ssh_client:
sftp_client = ssh_client.open_sftp() sftp_client = ssh_client.open_sftp()
remote_name = os.path.join(target, image_id)
size = file_exist(sftp=sftp_client, name=remote_name)
if size>0:
print(f"File {remote_name} exists and has {size} bytes")
force = params.get('force', True)
if force!= True:
return 0
print("Forcing overwrite")
ssh_client.exec_command(command=f"mkdir -p {target}") ssh_client.exec_command(command=f"mkdir -p {target}")
with requests.get(url, stream=True, verify=False) as r: with requests.get(url, stream=True, verify=False) as r:
with sftp_client.open(os.path.join(target, image_id), 'wb') as f: with sftp_client.open(remote_name, 'wb') as f:
f.set_pipelined(pipelined=True) f.set_pipelined(pipelined=True)
while True: while True:
chunk=r.raw.read(1024 * 1000) chunk=r.raw.read(1024 * 1000)
......
...@@ -118,7 +118,7 @@ Optionally, the record created in b2share can be registered with data cat. This ...@@ -118,7 +118,7 @@ Optionally, the record created in b2share can be registered with data cat. This
To transfer images from eFlows4HPC image build service use dag defined in +dags/image_transfer.py+ (transfer_image). It requires two parameters +image_id+ name of the image in the image To transfer images from eFlows4HPC image build service use dag defined in +dags/image_transfer.py+ (transfer_image). It requires two parameters +image_id+ name of the image in the image
build service (e.g. "wordcount_skylake.sif") and +target+ which defines a path on the system where the image will be transfered to. build service (e.g. "wordcount_skylake.sif") and +target+ which defines a path on the system where the image will be transfered to.
The parameters should be passed along the credentials as described in <<credentials>>. The target directory will be created with ``mkdir -p`` on the target machine. The image is streamed directly to the target location (no local copy on DLS worker). The parameters should be passed along the credentials as described in <<credentials>>. The target directory will be created with ``mkdir -p`` on the target machine. The image is streamed directly to the target location (no local copy on DLS worker). By default the file on the target location will be overwritten, this can be disabled by providing +force='false'+ as dag parameter.
---- ----
curl -X POST -u USER:PASS -H "Content-Type: application/json" \ curl -X POST -u USER:PASS -H "Content-Type: application/json" \
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment