default: image: python:3.9-slim variables: OS_AUTH_TYPE: v3applicationcredential OS_AUTH_URL: https://hdf-cloud.fz-juelich.de:5000 OS_IDENTITY_API_VERSION: 3 OS_REGION_NAME: "HDFCloud" OS_INTERFACE: public PRODUCTION_IP: 134.94.199.220 OLD_PROD_NAME: old-airflow-production PRODUCTION_NAME: airflow-production PRODUCTION_URL: https://datalogistics.eflows4hpc.eu PRODUCTION_DOMAIN: datalogistics.eflows4hpc.eu AIRFLOW_TESTUSER: "airflow" AIRFLOW__SECRETS__BACKEND_KWARGS: $TESTING_AIRFLOW__SECRETS__BACKEND_KWARGS AIRFLOW__SECRETS__BACKEND: datacat_integration.secrets.DatacatSecretsBackend VOLUME_ID: 6b58c3a6-691b-496a-8afd-153637c2de48 DOCKER_TLS_CERTDIR: "" # before script copied from gitlab docs .before_script_template: &ssh_setup before_script: - 'command -v ssh-agent >/dev/null || ( apt-get update -y && apt-get install openssh-client gcc libxslt-dev libffi-dev libssl-dev build-essential python3-dev -y )' - eval $(ssh-agent -s) - echo "$SSH_PRIVATE_KEY" | tr -d '\r' | ssh-add - - mkdir -p ~/.ssh - chmod 700 ~/.ssh stages: - test - build - publish - deploy - test-deployment - cleanup test: stage: test image: name: $CI_REGISTRY_IMAGE/eflows-airflow:latest entrypoint: [""] before_script: - echo "DEBUG:" - pip --version - airflow db init - pip install -r requirements.txt - pip install nose==1.3.7 - airflow connections add --conn-uri https://b2share-testing.fz-juelich.de/ default_b2share script: - ls - pwd - cp dags/* /opt/airflow/dags/ - airflow dags list - airflow connections list - airflow dags test testdag 2021-08-18 - nosetests build-custom-image: stage: build image: docker:latest services: - docker:dind when: manual tags: - laptop variables: IMAGE_COMMIT_TAG: $CI_REGISTRY_IMAGE/eflows-airflow:$CI_COMMIT_SHORT_SHA IMAGE_LATEST_TAG: $CI_REGISTRY_IMAGE/eflows-airflow:latest DOCKER_HOST: tcp://localhost:2375/ script: - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY - docker build --no-cache=true --pull -t $IMAGE_COMMIT_TAG -f dockers/eflows-airflow.docker . - docker push $IMAGE_COMMIT_TAG - docker tag $IMAGE_COMMIT_TAG $IMAGE_LATEST_TAG - docker push $IMAGE_LATEST_TAG full-deploy-production: stage: deploy environment: Production only: - web <<: *ssh_setup script: - echo "Starting the full testing deployment of airflows example." - pip install python-openstackclient - OLD_ID=`openstack server show $PRODUCTION_NAME -f value -c id` && server_exists=true || echo "No production server found. It might be a first time deployment" - if [ "$server_exists" = true ] ; then ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo docker-compose -f /home/airflow/data-logistics-service/dockers/docker-compose.yaml --project-directory /home/airflow/eflows-airflow down" openstack server set --name $OLD_PROD_NAME $OLD_ID; fi - openstack server remove volume $OLD_ID $VOLUME_ID - INSTANCE_ID=`openstack server create -f value -c id --prefix IMAGE_ --flavor m4 --image 149a65b5-aeb8-499f-aaa6-ec966bd28dd6 --user-data scripts/cloudinit.yml --security-group ssh --security-group www --security-group https $PRODUCTION_NAME` - while [ "`openstack server show $INSTANCE_ID -c addresses -f value`" = "{}" ]; do sleep 5; done # wait until an address is available to attach the floating ip - openstack server add floating ip $INSTANCE_ID $PRODUCTION_IP - sleep 10 # ensure that next command reaches the new server, prevents host key problems # do the mount /dev/vdb1 stuff - openstack server add volume $INSTANCE_ID $VOLUME_ID - sleep 20 # apparently it may take some time until the volume is available to the OS - ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo mkdir -p /persistent_data && sudo mount /dev/vdb1 /persistent_data" - until ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP ls /finished_cloudinit >/dev/null 2>&1; do sleep 30; done # wait until cloudinit script is complete - ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo service docker restart" # to use the configured docker data path - ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $PRODUCTION_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS $AIRFLOW_FERNET_KEY" - echo "Done" # NOTE Light deployment did not perform well when the template/main.html file was changed (in case of the official airflow image being updated) # TODO Add proper tests light-deploy-production: stage: deploy # only run when master is updated, unless the pipeline was triggered via the web UI only: - main except: - tags - web <<: *ssh_setup environment: Production script: - ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "cd /home/airflow/data-logistics-service && git stash && git stash clear && git checkout main && git checkout -f $CI_COMMIT_TAG && git pull --all" - ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $PRODUCTION_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS" # TODO add $AIRFLOW_FERNET_KEY test-production-webserver: cache: {} stage: test-deployment only: - web # and master except: - tags script: - apt update && apt -y install curl - echo "This is a simple check if the deployment was successful and dags get executed" # ensure that the docker containers are up and running before testing the airflow deployment; timeout in 16 to 17 minutes - SECONDS=0 - 'while [ $SECONDS -le 1000 ] ; do if output=$(curl --insecure --max-time 10 -I -H "Accept: application/json" $PRODUCTION_URL/home) ; then break; else sleep 30; fi ; done' - 'curl --insecure -I -H "Accept: application/json" $PRODUCTION_URL/home' - 'curl -X GET -u $AIRFLOW_TESTUSER:$AIRFLOW_TESTUSER_PASS -H "Content-Type: application/json" $PRODUCTION_URL/api/v1/dags' - 'curl -X GET -u $AIRFLOW_TESTUSER:$AIRFLOW_TESTUSER_PASS -H "Content-Type: application/json" $PRODUCTION_URL/api/v1/connections' - 'curl -X POST -u $AIRFLOW_TESTUSER:$AIRFLOW_TESTUSER_PASS -H "Content-Type: application/json" --data {} $PRODUCTION_URL/api/v1/dags/testdag/dagRuns' cleanup-successful-full-deployment: # check if there is an old prod or test instance, and delete it if present stage: cleanup when: on_success only: - web script: - echo "This is the cleanup for the full-redeployment of the testing or production servers" - echo "if this job is reached, all earlier jobs were successful, and any lingering old instances need to be removed" - pip install python-openstackclient - openstack server delete $OLD_TEST_NAME && echo "Deleted old testing server." || echo "No old testing server found." cleanup-failed-full-deployment: # check if there is an old prod or test instance, assign respective ip to it, re-attach volume, delete new instance, rename old instance # if there is none, this is a failed light-deployment, which is handled by another job # this does not guarantee a successful rollback, but unless the old instance was faulty, this should work stage: cleanup when: on_failure only: - web <<: *ssh_setup script: - echo "This is the cleanup for the full-redeployment of the testing or production servers" - echo "if this job is reached, some earlier job had to have failed, this will return to the previous instance (if available)" - echo "A successfull cleanup can not be guaranteed, depending on the failure reason" - pip install python-openstackclient # check which old instance is present. (either old test or old production); store instance id in a var - OLD_TEST_ID=`openstack server show $OLD_TEST_NAME -f value -c id` && rollback_test=true || echo "No old testing server found." # if applicable: rollback test server - if [ "$rollback_test" = true ] ; then REMOVE_ID=`openstack server show $TESTING_NAME -f value -c id` && new_deployment_exists=true|| echo "No new testing server has been created."; openstack server set --name $TESTING_NAME $OLD_TEST_ID; openstack server add floating ip $OLD_TEST_ID $TESTING_IP; if [ "$new_deployment_exists" = true ] ; then openstack server delete $REMOVE_ID && echo "Deleted faulty testing server."; fi fi # gitlab should automatically alert the devs about this failure publishgit-do: stage: publish only: - tags tags: [stable] script: - apt-get update - apt-get install -y git - (git remote rm gith) || echo "Not found" - (git remote -v | grep gith) || git remote add gith "https://${GITHUB_USER}:${GITHUB_TOKEN}@github.com/eflows4hpc/data-logistics-service.git" - git remote -v - git fetch --unshallow origin - git push gith +HEAD:refs/heads/main