diff --git a/README.md b/README.md index 59b2f655572adf2a02ba05ba878666d89ef9c793..5c7f1f8d05f486a1d54d525bb9eddc7156608a14 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,8 @@ eFlows4HPC Data Logistics Service ``` -mkdir ./logs ./plugins -echo -e "AIRFLOW_UID=$(id -u)\nAIRFLOW_GID=0" > .env +mkdir ./logs +echo -e "AIRFLOW_UID=$(id -u)" > .env reqs=`cat requirements.txt | tr '\n' ' '` echo "_PIP_ADDITIONAL_REQUIREMENTS=$reqs" >> .env diff --git a/dags/taskflow.py b/dags/taskflow.py index c0153ed3d415f23bdf2adb6e9128132694919f84..c86066f98a352dd536dc1e92fbb1be129f1b29d6 100644 --- a/dags/taskflow.py +++ b/dags/taskflow.py @@ -2,6 +2,7 @@ from airflow.decorators import dag, task from airflow.models.connection import Connection from airflow.providers.ssh.hooks.ssh import SSHHook +from airflow.models import Variable from airflow.utils.dates import days_ago import os @@ -39,9 +40,12 @@ def taskflow_example(): @task(multiple_outputs=True) def transform(flist: dict): name_mappings = {} + tmp_dir = Variable.get("working_dir", default_var='/tmp/') + print(f"Local working dir is: {tmp_dir}") + for fname, url in flist.items(): print(f"Processing: {fname} --> {url}") - tmpname = download_file(url=url, target_dir='/tmp/') + tmpname = download_file(url=url, target_dir=tmp_dir) name_mappings[fname] = tmpname return name_mappings diff --git a/dockers/connections.json b/dockers/connections.json new file mode 100644 index 0000000000000000000000000000000000000000..232a3bfa48c3b0353d3aca016d0db544637523d9 --- /dev/null +++ b/dockers/connections.json @@ -0,0 +1,22 @@ +{ + "default_b2share": { + "conn_type": "https", + "description": null, + "host": "b2share-testing.fz-juelich.de", + "login": null, + "password": null, + "schema": "", + "port": null, + "extra": null + }, + "default_ssh": { + "conn_type": "ssh", + "description": null, + "host": "openssh-server", + "login": "eflows", + "password": "rand", + "schema": null, + "port": 2222, + "extra": null + } +} \ No newline at end of file diff --git a/dockers/docker-compose.yaml b/dockers/docker-compose.yaml index b3c5e7b42b10362cf005e1434a2490930f7fbc8c..24c78d4b44dc381c2382b39ca5ffea88c6c44a14 100644 --- a/dockers/docker-compose.yaml +++ b/dockers/docker-compose.yaml @@ -56,13 +56,13 @@ x-airflow-common: AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true' AIRFLOW__CORE__LOAD_EXAMPLES: 'false' AIRFLOW__API__AUTH_BACKEND: 'airflow.api.auth.backend.basic_auth' - _AIRFLOW_WWW_USER_PASSWORD: 'somepass' _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-} volumes: - ./dags:/opt/airflow/dags + - ./config/airflow.cfg:/opt/airflow/airflow.cfg - ./logs:/opt/airflow/logs - ./plugins:/opt/airflow/plugins - - ./config/airflow.cfg:/opt/airflow/airflow.cfg + - ./templates/main.html:/home/airflow/.local/lib/python3.7/site-packages/airflow/www/templates/airflow/main.html user: "${AIRFLOW_UID:-50000}:0" depends_on: &airflow-common-depends-on @@ -102,13 +102,9 @@ services: command: webserver ports: - 7001:8080 - volumes: - - ./templates/footer.html:/home/airflow/.local/lib/python3.7/site-packages/airflow/www/templates/appbuilder/footer.html - - ./templates/main.html:/home/airflow/.local/lib/python3.7/site-packages/airflow/www/templates/airflow/main.html - - ./config/airflow.cfg:/opt/airflow/airflow.cfg healthcheck: test: ["CMD", "curl", "--fail", "http://localhost:8080/health"] - interval: 10s + interval: 60s timeout: 10s retries: 5 restart: always @@ -122,7 +118,7 @@ services: command: scheduler healthcheck: test: ["CMD-SHELL", 'airflow jobs check --job-type SchedulerJob --hostname "$${HOSTNAME}"'] - interval: 10s + interval: 60s timeout: 10s retries: 5 restart: always @@ -138,7 +134,7 @@ services: test: - "CMD-SHELL" - 'celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"' - interval: 10s + interval: 30s timeout: 10s retries: 5 environment: @@ -147,6 +143,11 @@ services: # See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation DUMB_INIT_SETSID: "0" restart: always + volumes: + - ./dags:/opt/airflow/dags + - ./config/airflow.cfg:/opt/airflow/airflow.cfg + - ./logs:/opt/airflow/logs + - ./tmp/:/work/ depends_on: <<: *airflow-common-depends-on airflow-init: @@ -157,9 +158,14 @@ services: command: triggerer healthcheck: test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"'] - interval: 10s + interval: 60s timeout: 10s retries: 5 + environment: + <<: *airflow-common-env + # Required to handle warm shutdown of the celery workers properly + # See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation + DUMB_INIT_SETSID: "0" restart: always depends_on: <<: *airflow-common-depends-on @@ -256,17 +262,19 @@ services: - -c - airflow - flower: + airflow-setup: <<: *airflow-common - command: celery flower - ports: - - 5555:5555 - healthcheck: - test: ["CMD", "curl", "--fail", "http://localhost:5555/"] - interval: 10s - timeout: 10s - retries: 5 - restart: always + environment: + <<: *airflow-common-env + CONNECTION_CHECK_MAX_COUNT: "0" + entrypoint: /bin/bash + command: + - -c + - | + exec /entrypoint airflow variables import /opt/airflow/variables.json + echo "Variables added" + volumes: + - ./dockers/variables.json:/opt/airflow/variables.json depends_on: <<: *airflow-common-depends-on airflow-init: diff --git a/requirements.txt b/requirements.txt index 240d00f906211bb0f8dad8dc7426a58b99fea37f..aeecd2f8a68c7beca6f3259e5c3642f6349f8984 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ requests urllib3==1.26.6 +plyvel apache-airflow-providers-ssh apache-airflow-providers-http apache-airflow-providers-sftp diff --git a/templates/footer.html b/templates/footer.html deleted file mode 100644 index 442715dc62e970cc309627fa838c814440cc9a17..0000000000000000000000000000000000000000 --- a/templates/footer.html +++ /dev/null @@ -1,37 +0,0 @@ -{% block footer %} -<footer class="footer d-inlign-flex" style="background-image: url(https://eflows4hpc.eu/wp-content/uploads/2021/02/Barra-gris-footer.png) !important; height: auto; color: #575756 !important;"> - <div class="container p-0"> - <div class="p-0 w-100" style="background-image: url(https://eflows4hpc.eu/wp-content/uploads/2021/01/barra-3-color-8.png) !important; width: 100%; height: 15px; background-repeat: no-repeat; background-size: cover"></div> - <div class="row mt-2 px-3"> - <div class="col-lg-6 col-12 d-inlign-flex"> - <p class="m-3 text-center align-self-center"> - <a href="https://www.eFlows4HPC.eu"> - <img src="https://eflows4hpc.eu/wp-content/uploads/2021/02/logo-blanc_1-1.svg" alt="eFlows4HPC Logo" title="eFlows4HPC" style="height: auto; max-height: 70px;" class="m-4 align-self-center"/> - </a> - <a href="https://twitter.com/eFlows4HPC"><i class="fa fa-twitter-square m-4 fa-2x" style="color: white"></i></a> - <a href="https://www.linkedin.com/company/eflows4hpc/"><i class="fa fa-linkedin-square mr-4 fa-2x" style="color: white"></i></a> - <a href="https://gitlab.jsc.fz-juelich.de/eflows4hpc-wp2/data-logistics-service/"><i class="fa fa-github-square mr-4 fa-2x" style="color: white"></i></a> - </p> - </div> - <div class="col-lg-6 col-12 d-inlign-flex"> - <p class="m-2 align-self-center" style="color: white"> - <span class="mr-3 mt-1 float-left"> - <img loading="lazy" src="https://eflows4hpc.eu/wp-content/uploads/2021/01/bandera-8.png" alt="" style="max-width:52px; max-height:34px"> - </span> - <small style="display: flow-root"> - This work has been supported by the eFlows4HPC project, contract #955558. This project has received funding from the European High-Performance Computing Joint Undertaking (JU) under grant agreement No 955558. - <br> - The JU receives support from the European Union’s Horizon 2020 research and innovation programme and Spain, Germany, France, Italy, Poland, Switzerland, Norway. - <strong> - <a style="color: #f39200" href="https://www.fz-juelich.de/portal/EN/Service/LegalNotice/_node.html">Impressum</a> - </strong> - </small> - <div class="row mt-4 pl-5"> - <p style="border-top: 1px solid darkgray;"><small>This service is based on Apache Airflow {{ version_label }}: {% if airflow_version %}<a href="https://pypi.python.org/pypi/apache-airflow/{{ airflow_version }}" target="_blank">v{{ airflow_version }}</a>{% else %} N/A{% endif %}</small></p> - </div> - </p> - </div> - </div> - </div> -</footer> -{% endblock %} diff --git a/templates/main.html b/templates/main.html index 187275c5a77b89463ed2fb39807e074138c7c7f8..69d1670d715efc7ca4bb82145a784bcae2733394 100644 --- a/templates/main.html +++ b/templates/main.html @@ -18,6 +18,7 @@ #} {% extends 'appbuilder/baselayout.html' %} +{% from 'airflow/_messages.html' import message %} {% block page_title -%} {% if title is defined -%} @@ -51,7 +52,7 @@ {% block messages %} {% include 'appbuilder/flash.html' %} {% if scheduler_job is defined and (not scheduler_job or not scheduler_job.is_alive()) %} - <div class="alert alert-warning"> + {% call message(category='warning', dismissable=false) %} <p>The scheduler does not appear to be running. {% if scheduler_job %} Last heartbeat was received @@ -63,15 +64,61 @@ {% endif %} </p> <p>The DAGs list may not update, and new tasks will not be scheduled.</p> - </div> + {% endcall %} + {% endif %} + {% if triggerer_job is defined and (not triggerer_job or not triggerer_job.is_alive()) %} + {% call message(category='warning', dismissable=false) %} + <p>The triggerer does not appear to be running. + {% if triggerer_job %} + Last heartbeat was received + <time class="scheduler-last-heartbeat" + title="{{ triggerer_job.latest_heartbeat.isoformat() }}" + datetime="{{ triggerer_job.latest_heartbeat.isoformat() }}" + data-datetime-convert="false" + >{{ macros.datetime_diff_for_humans(triggerer_job.latest_heartbeat) }}</time>. + {% endif %} + </p> + <p>Triggers will not run, and any deferred operator will remain deferred until it times out and fails.</p> + {% endcall %} {% endif %} {% endblock %} {% block footer %} - {% if not current_user.is_anonymous %} - {% set version_label = 'Version' %} - {% include 'appbuilder/footer.html' %} - {% endif %} +<footer class="footer" style="background-image: url(https://eflows4hpc.eu/wp-content/uploads/2021/02/Barra-gris-footer.png) !important; height: auto; color: #575756 !important;"> + <div class="container"> + <div class="p-0 w-100" style="background-image: url(https://eflows4hpc.eu/wp-content/uploads/2021/01/barra-3-color-8.png) !important; width: 100%; height: 15px; background-repeat: no-repeat; background-size: cover"></div> + <div class="row mt-2 px-3"> + <div class="col-lg-6 col-12 d-inlign-flex"> + <p class="m-3 text-center align-self-center"> + <a href="https://www.eFlows4HPC.eu"> + <img src="https://eflows4hpc.eu/wp-content/uploads/2021/02/logo-blanc_1-1.svg" alt="eFlows4HPC Logo" title="eFlows4HPC" style="height: auto; max-height: 70px;" class="m-4 align-self-center"/> + </a> + <a href="https://twitter.com/eFlows4HPC"><i class="fa fa-twitter-square m-4 fa-2x" style="color: white"></i></a> + <a href="https://www.linkedin.com/company/eflows4hpc/"><i class="fa fa-linkedin-square mr-4 fa-2x" style="color: white"></i></a> + <a href="https://gitlab.jsc.fz-juelich.de/eflows4hpc-wp2/data-logistics-service/"><i class="fa fa-github-square mr-4 fa-2x" style="color: white"></i></a> + </p> + </div> + <div class="col-lg-6 col-12 d-inlign-flex"> + <p class="m-2 align-self-center" style="color: white"> + <span class="mr-3 mt-1 float-left"> + <img loading="lazy" src="https://eflows4hpc.eu/wp-content/uploads/2021/01/bandera-8.png" alt="" style="max-width:52px; max-height:34px"> + </span> + <small style="display: flow-root"> + This work has been supported by the eFlows4HPC project, contract #955558. This project has received funding from the European High-Performance Computing Joint Undertaking (JU) under grant agreement No 955558. + <br> + The JU receives support from the European Union’s Horizon 2020 research and innovation programme and Spain, Germany, France, Italy, Poland, Switzerland, Norway. + <strong> + <a style="color: #f39200" href="https://www.fz-juelich.de/portal/EN/Service/LegalNotice/_node.html">Impressum</a> + </strong> + </small> + <div class="row mt-4 pl-5"> + <p style="border-top: 1px solid darkgray;"><small>This service is based on Apache Airflow {{ version_label }}: {% if airflow_version %}<a href="https://pypi.python.org/pypi/apache-airflow/{{ airflow_version }}" target="_blank">v{{ airflow_version }}</a>{% else %} N/A{% endif %}</small></p> + </div> + </p> + </div> + </div> + </div> +</footer> {% endblock %} {% block tail_js %}