Skip to content
Snippets Groups Projects
.gitlab-ci.yml 14.2 KiB
Newer Older
variables:
  OS_AUTH_TYPE: v3applicationcredential
  OS_AUTH_URL: https://hdf-cloud.fz-juelich.de:5000
  OS_IDENTITY_API_VERSION: 3
  OS_REGION_NAME: "HDFCloud"
  OS_INTERFACE: public
  PRODUCTION_IP: 134.94.199.220
  OLD_PROD_NAME: old-airflow-production
  PRODUCTION_NAME: airflow-production
  PRODUCTION_URL: https://datalogistics.eflows4hpc.eu
  PRODUCTION_DOMAIN: datalogistics.eflows4hpc.eu
  AIRFLOW_TESTUSER: "airflow"
  AIRFLOW__SECRETS__BACKEND: datacat_integration.secrets.DatacatSecretsBackend
  DAG_GIT_URL: https://github.com/eflows4hpc/dls-dags
  VOLUME_ID: 6b58c3a6-691b-496a-8afd-153637c2de48
Christian Boettcher's avatar
Christian Boettcher committed
  DOCKER_TLS_CERTDIR: ""
  TESTING_IP: 134.94.199.115
  OLD_TEST_NAME: old-airflow-testing
  TESTING_NAME: airflow-testing
  TESTING_URL: https://zam10115.zam.kfa-juelich.de
  TESTING_DOMAIN: zam10115.zam.kfa-juelich.de
  METADATA_URL: https://zam10045.zam.kfa-juelich.de:7000/oauth2/.well-known/openid-configuration
  TESTING_OAUTH_ID: ff3f5a29-d210-4be2-a6d4-93c4fc755bfe
  PRODUCTION_OAUTH_ID: affcd446-cab1-4751-8d76-e792f830d415
.before_script_template: &ssh_setup
  before_script:
    - 'command -v ssh-agent >/dev/null || ( apt-get update -y && apt-get install openssh-client gcc libxslt-dev libffi-dev libssl-dev build-essential python3-dev -y )'
    - eval $(ssh-agent -s)
    - echo "$SSH_PRIVATE_KEY" | tr -d '\r' | ssh-add -
    - mkdir -p ~/.ssh
    - chmod 700 ~/.ssh
  - cleanup
build-testing-image:
  stage: build
  image: docker:latest
  services:
    - docker:dind
  when: manual
  tags:
    - laptop
  variables:
    IMAGE_COMMIT_TAG: $CI_REGISTRY_IMAGE/eflows-airflow:testing
    
  script:
    - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
    - docker build --no-cache=true --pull -t $IMAGE_COMMIT_TAG -f dockers/eflows-airflow.docker .
    - docker push $IMAGE_COMMIT_TAG

  stage: build
  image: docker:latest
  services:
    - docker:dind
Christian Boettcher's avatar
Christian Boettcher committed
  when: manual
  tags:
    - laptop
    IMAGE_COMMIT_TAG: $CI_REGISTRY_IMAGE/eflows-airflow:$CI_COMMIT_SHORT_SHA
    IMAGE_LATEST_TAG: $CI_REGISTRY_IMAGE/eflows-airflow:latest
    - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
    - docker build --no-cache=true --pull -t $IMAGE_COMMIT_TAG -f dockers/eflows-airflow.docker .
    - docker push $IMAGE_COMMIT_TAG
    - docker tag $IMAGE_COMMIT_TAG $IMAGE_LATEST_TAG
    - docker push $IMAGE_LATEST_TAG

full-deploy-testing:
  stage: deploy
  environment: Testing
  rules:
    - if: ($CI_COMMIT_BRANCH == "main" && $MANUAL_FULL_DEPLOY == "true")
  <<: *ssh_setup
  script:
    - echo "Starting the full testing deployment of airflows."
    - pip install python-openstackclient
    - OLD_ID=`openstack server show $TESTING_NAME -f value -c id` && server_exists=true || echo "No testing server found. It might be a first time deployment"
    - openstack server set --name $OLD_TEST_NAME $OLD_ID;
#    - if [ "$server_exists" = true ] ; then
#      ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "sudo docker-compose -f /home/airflow/data-logistics-service/dockers/docker-compose.yaml --project-directory /home/airflow/eflows-airflow down"
#      openstack server set --name $OLD_TEST_NAME $OLD_ID;
#      fi
    - INSTANCE_ID=`openstack server create -f value -c id --prefix IMAGE_ --flavor l2 --image 149a65b5-aeb8-499f-aaa6-ec966bd28dd6 --user-data scripts/cloudinit.yml --security-group ssh --security-group www --security-group https $TESTING_NAME`
    - while [ "`openstack server show $INSTANCE_ID -c addresses -f value`" = "{}" ]; do sleep 5; done # wait until an address is available to attach the floating ip
    - openstack server add floating ip $INSTANCE_ID $TESTING_IP
    - sleep 10 # ensure that next command reaches the new server, prevents host key problems
    - ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "sudo mkdir -p /persistent_data/docker_volumes"
    - until ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP ls /finished_cloudinit >/dev/null 2>&1; do sleep 30; done # wait until cloudinit script is complete
    - ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "sudo service docker restart" # to use the configured docker data path
    - ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "docker volume create --name=persistent_postgres-db-volume"
    - ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "docker volume create --name=persistent_certs"
    - ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP 'sed -i "s_eflows-airflow:latest_eflows-airflow:testing_g" /home/airflow/data-logistics-service/dockers/docker-compose.yaml'
    - ssh -oStrictHostKeyChecking=accept-new airflow@$PTESTING_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $TESTING_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS $AIRFLOW_FERNET_KEY $DAG_GIT_URL $TESTING_OAUTH_ID $SSO_CLIENT_SECRET $METADATA_URL"
light-deploy-testing:
  stage: deploy
  rules:
    - if: ($CI_COMMIT_BRANCH == "main" && $MANUAL_FULL_DEPLOY !~ /true/ )
  <<: *ssh_setup
  environment: Testing
  script:
    - ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "cd /home/airflow/data-logistics-service && git stash && git stash clear && git checkout main && git checkout -f $CI_COMMIT_TAG && git pull --all"
    - ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP 'sed -i "s_eflows-airflow:latest_eflows-airflow:testing_g" /home/airflow/data-logistics-service/dockers/docker-compose.yaml'
    - ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $TESTING_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS $AIRFLOW_FERNET_KEY $DAG_GIT_URL $TESTING_OAUTH_ID $SSO_CLIENT_SECRET $METADATA_URL"
Christian Boettcher's avatar
Christian Boettcher committed
force-light-deploy-testing: # for deploying images generated on other branches to testing - can only be done by logged in and authorized users
  stage: deploy
  when: manual

  <<: *ssh_setup
  environment: Testing
  script:
    - ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "cd /home/airflow/data-logistics-service && git stash && git stash clear && git checkout main && git checkout -f $CI_COMMIT_TAG && git pull --all"
    - ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP 'sed -i "s_eflows-airflow:latest_eflows-airflow:testing_g" /home/airflow/data-logistics-service/dockers/docker-compose.yaml'
    - ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $TESTING_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS $AIRFLOW_FERNET_KEY $DAG_GIT_URL $TESTING_OAUTH_ID $SSO_CLIENT_SECRET $METADATA_URL"
full-deploy-production:
  environment: Production
     - if: ($CI_COMMIT_TAG =~ /stable/ && $MANUAL_FULL_DEPLOY == "true")
    - echo "Starting the full production deployment of airflows."
    - pip install python-openstackclient
    - OLD_ID=`openstack server show $PRODUCTION_NAME -f value -c id` && server_exists=true || echo "No production server found. It might be a first time deployment"
    - openstack server set --name $OLD_PROD_NAME $OLD_ID
#    - if [ "$server_exists" = true ] ; then
#      ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo docker-compose -f /home/airflow/data-logistics-service/dockers/docker-compose.yaml --project-directory /home/airflow/eflows-airflow down"
#      openstack server set --name $OLD_PROD_NAME $OLD_ID;
#      fi
    - openstack server remove volume $OLD_ID $VOLUME_ID
    - INSTANCE_ID=`openstack server create -f value -c id --prefix IMAGE_ --flavor m4 --image 149a65b5-aeb8-499f-aaa6-ec966bd28dd6 --user-data scripts/cloudinit.yml --security-group ssh --security-group www --security-group https $PRODUCTION_NAME`
    - while [ "`openstack server show $INSTANCE_ID -c addresses -f value`" = "{}" ]; do sleep 5; done # wait until an address is available to attach the floating ip
    - openstack server add floating ip $INSTANCE_ID $PRODUCTION_IP
    - sleep 10 # ensure that next command reaches the new server, prevents host key problems
    # do the mount /dev/vdb1 stuff
    - openstack server add volume $INSTANCE_ID $VOLUME_ID
    - sleep 20 # apparently it may take some time until the volume is available to the OS
Jedrzej Rybicki's avatar
Jedrzej Rybicki committed
    - while [ "`ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP ls `" ]; do sleep 5; done
    - until ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP ls /finished_cloudinit >/dev/null 2>&1; do sleep 30; done # wait until cloudinit script is complete
    - ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo mkdir -p /persistent_data && sudo mount /dev/vdb1 /persistent_data"
Christian Boettcher's avatar
Christian Boettcher committed
    - ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo service docker restart" # to use the configured docker data path
    - ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $PRODUCTION_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS $AIRFLOW_FERNET_KEY $DAG_GIT_URL $PRODUCTION_OAUTH_ID $SSO_CLIENT_SECRET $METADATA_URL"
# NOTE Light deployment did not perform well when the template/main.html file was changed (in case of the official airflow image being updated)
# TODO Add proper tests
light-deploy-production:
    - if: ($CI_COMMIT_TAG =~ /stable/ && $MANUAL_FULL_DEPLOY !~ /true/)
  environment: Production
    - ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "cd /home/airflow/data-logistics-service && git stash && git stash clear && git checkout main && git fetch --all && git checkout -f $CI_COMMIT_TAG && rm -rf dags && git clone https://github.com/eflows4hpc/dls-dags.git dags"
    - ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $PRODUCTION_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS $AIRFLOW_FERNET_KEY $DAG_GIT_URL $PRODUCTION_OAUTH_ID $SSO_CLIENT_SECRET $METADATA_URL"
test-production-webserver:
  cache: {}
  stage: test-deployment 
Christian Boettcher's avatar
Christian Boettcher committed
    - if: $CI_COMMIT_TAG =~ /stable/
    - apt update && apt -y install curl
    - echo "This is a simple check if the deployment was successful and dags get executed"
    # ensure that the docker containers are up and running before testing the airflow deployment; timeout in 16 to 17 minutes
    - 'while [ $SECONDS -le 1000 ] ; do if output=$(curl --insecure --max-time 10 -I -H "Accept: application/json" $PRODUCTION_URL/home) ; then break; else sleep 30; fi ; done'
    - 'curl --insecure -I -H "Accept: application/json" $PRODUCTION_URL/home'
    - 'curl -X GET -u $AIRFLOW_TESTUSER:$AIRFLOW_TESTUSER_PASS -H "Content-Type: application/json" $PRODUCTION_URL/api/v1/dags'
    - 'curl -X GET -u $AIRFLOW_TESTUSER:$AIRFLOW_TESTUSER_PASS -H "Content-Type: application/json" $PRODUCTION_URL/api/v1/connections'
    - 'curl -X POST -u $AIRFLOW_TESTUSER:$AIRFLOW_TESTUSER_PASS -H "Content-Type: application/json" --data {} $PRODUCTION_URL/api/v1/dags/testdag/dagRuns'
cleanup-successful-full-deployment:
  # check if there is an old prod or test instance, and delete it if present
  stage: cleanup
  when: on_success
     - if: $MANUAL_FULL_DEPLOY == "true"
  script:
    - echo "This is the cleanup for the full-redeployment of the testing or production servers"
    - echo "if this job is reached, all earlier jobs were successful, and any lingering old instances need to be removed"
    - pip install python-openstackclient
    - openstack server delete $OLD_TEST_NAME && echo "Deleted old testing server." || echo "No old testing server found."
    - openstack server delete $OLD_PROD_NAME && echo "Deleted old production server." || echo "No old production server found."

cleanup-failed-full-deployment:
  # check if there is an old prod or test instance, assign respective ip to it, re-attach volume, delete new instance, rename old instance
  # if there is none, this is a failed light-deployment, which is handled by another job
  # this does not guarantee a successful rollback, but unless the old instance was faulty, this should work
  stage: cleanup
  when: on_failure
     - if: ($CI_COMMIT_TAG =~ /stable/ && $MANUAL_FULL_DEPLOY == "true")
  <<: *ssh_setup
  script:
    - echo "This is the cleanup for the full-redeployment of the testing or production servers"
    - echo "if this job is reached, some earlier job had to have failed, this will return to the previous instance (if available)"
    - echo "A successfull cleanup can not be guaranteed, depending on the failure reason"
    - pip install python-openstackclient
    # check which old instance is present. (either old test or old production); store instance id in a var
    - OLD_TEST_ID=`openstack server show $OLD_TEST_NAME -f value -c id` && rollback_test=true || echo "No old testing server found."
    # if applicable: rollback test server
    - if [ "$rollback_test" = true ] ; then 
      REMOVE_ID=`openstack server show $TESTING_NAME -f value -c id` && new_deployment_exists=true|| echo "No new testing server has been created.";
      openstack server set --name $TESTING_NAME $OLD_TEST_ID;
      openstack server add floating ip $OLD_TEST_ID $TESTING_IP;
      if [ "$new_deployment_exists" = true ] ; then
      openstack server delete $REMOVE_ID && echo "Deleted faulty testing server.";
      fi
    # gitlab should automatically alert the devs about this failure
publishgit-do:
  stage: publish
  only:
    - tags
  tags: [stable]
  script:
    - apt-get update 
    - apt-get install -y git
    - (git remote rm gith) || echo "Not found"
    - (git remote -v | grep gith) || git remote add gith "https://${GITHUB_USER}:${GITHUB_TOKEN}@github.com/eflows4hpc/data-logistics-service.git"
    - git remote -v
Jedrzej Rybicki's avatar
Jedrzej Rybicki committed
    - git fetch --unshallow origin
Jedrzej Rybicki's avatar
Jedrzej Rybicki committed
    - git push gith +HEAD:refs/heads/main