Newer
Older
image: python:3.9-slim

Maria Petrova-El Sayed
committed
variables:
OS_AUTH_TYPE: v3applicationcredential
OS_AUTH_URL: https://hdf-cloud.fz-juelich.de:5000
OS_IDENTITY_API_VERSION: 3
OS_REGION_NAME: "HDFCloud"
OS_INTERFACE: public
PRODUCTION_IP: 134.94.199.220
OLD_PROD_NAME: old-airflow-production
PRODUCTION_NAME: airflow-production
PRODUCTION_URL: https://datalogistics.eflows4hpc.eu
PRODUCTION_DOMAIN: datalogistics.eflows4hpc.eu
AIRFLOW_TESTUSER: "airflow"
AIRFLOW__SECRETS__BACKEND: datacat_integration.secrets.DatacatSecretsBackend
DAG_GIT_URL: https://github.com/eflows4hpc/dls-dags
VOLUME_ID: 6b58c3a6-691b-496a-8afd-153637c2de48
TESTING_IP: 134.94.199.115
OLD_TEST_NAME: old-airflow-testing
TESTING_NAME: airflow-testing
TESTING_URL: https://zam10115.zam.kfa-juelich.de
TESTING_DOMAIN: zam10115.zam.kfa-juelich.de
METADATA_URL: https://zam10045.zam.kfa-juelich.de:7000/oauth2/.well-known/openid-configuration
TESTING_OAUTH_ID: ff3f5a29-d210-4be2-a6d4-93c4fc755bfe
PRODUCTION_OAUTH_ID: affcd446-cab1-4751-8d76-e792f830d415
# before script copied from gitlab docs
.before_script_template: &ssh_setup
before_script:
- 'command -v ssh-agent >/dev/null || ( apt-get update -y && apt-get install openssh-client gcc libxslt-dev libffi-dev libssl-dev build-essential python3-dev -y )'
- eval $(ssh-agent -s)
- echo "$SSH_PRIVATE_KEY" | tr -d '\r' | ssh-add -
- mkdir -p ~/.ssh
- chmod 700 ~/.ssh

Maria Petrova-El Sayed
committed
- build
- publish
- deploy
build-testing-image:
stage: build
image: docker:latest
services:
- docker:dind
when: manual
tags:
- laptop
variables:
IMAGE_COMMIT_TAG: $CI_REGISTRY_IMAGE/eflows-airflow:testing
script:
- docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
- docker build --no-cache=true --pull -t $IMAGE_COMMIT_TAG -f dockers/eflows-airflow.docker .
- docker push $IMAGE_COMMIT_TAG

Maria Petrova-El Sayed
committed
build-production-image:
stage: build
image: docker:latest
services:
- docker:dind
variables:
IMAGE_COMMIT_TAG: $CI_REGISTRY_IMAGE/eflows-airflow:$CI_COMMIT_SHORT_SHA
IMAGE_LATEST_TAG: $CI_REGISTRY_IMAGE/eflows-airflow:latest
script:
- docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
- docker build --no-cache=true --pull -t $IMAGE_COMMIT_TAG -f dockers/eflows-airflow.docker .
- docker push $IMAGE_COMMIT_TAG
- docker tag $IMAGE_COMMIT_TAG $IMAGE_LATEST_TAG
- docker push $IMAGE_LATEST_TAG
full-deploy-testing:
stage: deploy
environment: Testing
rules:
- if: ($CI_COMMIT_BRANCH == "main" && $MANUAL_FULL_DEPLOY == "true")
<<: *ssh_setup
script:
- echo "Starting the full testing deployment of airflows."
- pip install python-openstackclient
- OLD_ID=`openstack server show $TESTING_NAME -f value -c id` && server_exists=true || echo "No testing server found. It might be a first time deployment"
- openstack server set --name $OLD_TEST_NAME $OLD_ID;
# - if [ "$server_exists" = true ] ; then
# ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "sudo docker-compose -f /home/airflow/data-logistics-service/dockers/docker-compose.yaml --project-directory /home/airflow/eflows-airflow down"
# openstack server set --name $OLD_TEST_NAME $OLD_ID;
# fi
- INSTANCE_ID=`openstack server create -f value -c id --prefix IMAGE_ --flavor l2 --image 149a65b5-aeb8-499f-aaa6-ec966bd28dd6 --user-data scripts/cloudinit.yml --security-group ssh --security-group www --security-group https $TESTING_NAME`
- while [ "`openstack server show $INSTANCE_ID -c addresses -f value`" = "{}" ]; do sleep 5; done # wait until an address is available to attach the floating ip
- openstack server add floating ip $INSTANCE_ID $TESTING_IP
- sleep 10 # ensure that next command reaches the new server, prevents host key problems
- ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "sudo mkdir -p /persistent_data/docker_volumes"
- until ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP ls /finished_cloudinit >/dev/null 2>&1; do sleep 30; done # wait until cloudinit script is complete
- ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "sudo service docker restart" # to use the configured docker data path
- ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "docker volume create --name=persistent_postgres-db-volume"
- ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "docker volume create --name=persistent_certs"
- ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP 'sed -i "s_eflows-airflow:latest_eflows-airflow:testing_g" /home/airflow/data-logistics-service/dockers/docker-compose.yaml'
- ssh -oStrictHostKeyChecking=accept-new airflow@$PTESTING_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $TESTING_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS $AIRFLOW_FERNET_KEY $DAG_GIT_URL $TESTING_OAUTH_ID $SSO_CLIENT_SECRET $METADATA_URL"
- if: ($CI_COMMIT_BRANCH == "main" && $MANUAL_FULL_DEPLOY !~ /true/ )
<<: *ssh_setup
environment: Testing
script:
- ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "cd /home/airflow/data-logistics-service && git stash && git stash clear && git checkout main && git checkout -f $CI_COMMIT_TAG && git pull --all"
- ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP 'sed -i "s_eflows-airflow:latest_eflows-airflow:testing_g" /home/airflow/data-logistics-service/dockers/docker-compose.yaml'
- ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $TESTING_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS $AIRFLOW_FERNET_KEY $DAG_GIT_URL $TESTING_OAUTH_ID $SSO_CLIENT_SECRET $METADATA_URL"
force-light-deploy-testing: # for deploying images generated on other branches to testing - can only be done by logged in and authorized users
stage: deploy
when: manual
<<: *ssh_setup
environment: Testing
script:
- ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "cd /home/airflow/data-logistics-service && git stash && git stash clear && git checkout main && git checkout -f $CI_COMMIT_TAG && git pull --all"
- ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP 'sed -i "s_eflows-airflow:latest_eflows-airflow:testing_g" /home/airflow/data-logistics-service/dockers/docker-compose.yaml'
- ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $TESTING_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS $AIRFLOW_FERNET_KEY $DAG_GIT_URL $TESTING_OAUTH_ID $SSO_CLIENT_SECRET $METADATA_URL"
full-deploy-production:
stage: deploy
environment: Production
- if: ($CI_COMMIT_TAG =~ /stable/ && $MANUAL_FULL_DEPLOY == "true")

Maria Petrova-El Sayed
committed
<<: *ssh_setup
- echo "Starting the full production deployment of airflows."
- pip install python-openstackclient
- OLD_ID=`openstack server show $PRODUCTION_NAME -f value -c id` && server_exists=true || echo "No production server found. It might be a first time deployment"
- openstack server set --name $OLD_PROD_NAME $OLD_ID
# - if [ "$server_exists" = true ] ; then
# ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo docker-compose -f /home/airflow/data-logistics-service/dockers/docker-compose.yaml --project-directory /home/airflow/eflows-airflow down"
# openstack server set --name $OLD_PROD_NAME $OLD_ID;
# fi
- openstack server remove volume $OLD_ID $VOLUME_ID
- INSTANCE_ID=`openstack server create -f value -c id --prefix IMAGE_ --flavor m4 --image 149a65b5-aeb8-499f-aaa6-ec966bd28dd6 --user-data scripts/cloudinit.yml --security-group ssh --security-group www --security-group https $PRODUCTION_NAME`
- while [ "`openstack server show $INSTANCE_ID -c addresses -f value`" = "{}" ]; do sleep 5; done # wait until an address is available to attach the floating ip
- openstack server add floating ip $INSTANCE_ID $PRODUCTION_IP
- sleep 10 # ensure that next command reaches the new server, prevents host key problems
# do the mount /dev/vdb1 stuff
- openstack server add volume $INSTANCE_ID $VOLUME_ID
- sleep 20 # apparently it may take some time until the volume is available to the OS
- while [ "`ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP ls `" ]; do sleep 5; done
- until ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP ls /finished_cloudinit >/dev/null 2>&1; do sleep 30; done # wait until cloudinit script is complete
- ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo mkdir -p /persistent_data && sudo mount /dev/vdb1 /persistent_data"
- ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo service docker restart" # to use the configured docker data path
- ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $PRODUCTION_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS $AIRFLOW_FERNET_KEY $DAG_GIT_URL $PRODUCTION_OAUTH_ID $SSO_CLIENT_SECRET $METADATA_URL"
- echo "Done"

Maria Petrova-El Sayed
committed

Maria Petrova-El Sayed
committed
# NOTE Light deployment did not perform well when the template/main.html file was changed (in case of the official airflow image being updated)
# TODO Add proper tests
light-deploy-production:
stage: deploy
- if: ($CI_COMMIT_TAG =~ /stable/ && $MANUAL_FULL_DEPLOY !~ /true/)
<<: *ssh_setup
environment: Production
script:
- ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "cd /home/airflow/data-logistics-service && git stash && git stash clear && git checkout main && git fetch --all && git checkout -f $CI_COMMIT_TAG && rm -rf dags && git clone https://github.com/eflows4hpc/dls-dags.git dags"
- ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $PRODUCTION_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS $AIRFLOW_FERNET_KEY $DAG_GIT_URL $PRODUCTION_OAUTH_ID $SSO_CLIENT_SECRET $METADATA_URL"
test-production-webserver:
cache: {}
stage: test-deployment
- echo "This is a simple check if the deployment was successful and dags get executed"
# ensure that the docker containers are up and running before testing the airflow deployment; timeout in 16 to 17 minutes
- 'while [ $SECONDS -le 1000 ] ; do if output=$(curl --insecure --max-time 10 -I -H "Accept: application/json" $PRODUCTION_URL/home) ; then break; else sleep 30; fi ; done'
- 'curl --insecure -I -H "Accept: application/json" $PRODUCTION_URL/home'
- 'curl -X GET -u $AIRFLOW_TESTUSER:$AIRFLOW_TESTUSER_PASS -H "Content-Type: application/json" $PRODUCTION_URL/api/v1/dags'
- 'curl -X GET -u $AIRFLOW_TESTUSER:$AIRFLOW_TESTUSER_PASS -H "Content-Type: application/json" $PRODUCTION_URL/api/v1/connections'
- 'curl -X POST -u $AIRFLOW_TESTUSER:$AIRFLOW_TESTUSER_PASS -H "Content-Type: application/json" --data {} $PRODUCTION_URL/api/v1/dags/testdag/dagRuns'

Maria Petrova-El Sayed
committed
cleanup-successful-full-deployment:
# check if there is an old prod or test instance, and delete it if present
stage: cleanup
when: on_success
- if: $MANUAL_FULL_DEPLOY == "true"

Maria Petrova-El Sayed
committed
script:
- echo "This is the cleanup for the full-redeployment of the testing or production servers"
- echo "if this job is reached, all earlier jobs were successful, and any lingering old instances need to be removed"
- pip install python-openstackclient
- openstack server delete $OLD_TEST_NAME && echo "Deleted old testing server." || echo "No old testing server found."
- openstack server delete $OLD_PROD_NAME && echo "Deleted old production server." || echo "No old production server found."
cleanup-failed-full-deployment:
# check if there is an old prod or test instance, assign respective ip to it, re-attach volume, delete new instance, rename old instance
# if there is none, this is a failed light-deployment, which is handled by another job
# this does not guarantee a successful rollback, but unless the old instance was faulty, this should work
stage: cleanup
when: on_failure
- if: ($CI_COMMIT_TAG =~ /stable/ && $MANUAL_FULL_DEPLOY == "true")
<<: *ssh_setup
script:
- echo "This is the cleanup for the full-redeployment of the testing or production servers"
- echo "if this job is reached, some earlier job had to have failed, this will return to the previous instance (if available)"
- echo "A successfull cleanup can not be guaranteed, depending on the failure reason"
- pip install python-openstackclient
# check which old instance is present. (either old test or old production); store instance id in a var
- OLD_TEST_ID=`openstack server show $OLD_TEST_NAME -f value -c id` && rollback_test=true || echo "No old testing server found."
# if applicable: rollback test server
- if [ "$rollback_test" = true ] ; then
REMOVE_ID=`openstack server show $TESTING_NAME -f value -c id` && new_deployment_exists=true|| echo "No new testing server has been created.";
openstack server set --name $TESTING_NAME $OLD_TEST_ID;
openstack server add floating ip $OLD_TEST_ID $TESTING_IP;
if [ "$new_deployment_exists" = true ] ; then
openstack server delete $REMOVE_ID && echo "Deleted faulty testing server.";
fi

Maria Petrova-El Sayed
committed
# gitlab should automatically alert the devs about this failure
publishgit-do:
stage: publish
only:
- tags
tags: [stable]
script:
- apt-get update
- apt-get install -y git
- (git remote -v | grep gith) || git remote add gith "https://${GITHUB_USER}:${GITHUB_TOKEN}@github.com/eflows4hpc/data-logistics-service.git"
- git remote -v