diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 20bc1a364ecdb7cac7e7d89609275db521406834..e63bf602ce16bc6688c0c618a3a519370594a707 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -108,7 +108,7 @@ full-deploy-testing: - until ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP ls /finished_cloudinit >/dev/null 2>&1; do sleep 30; done # wait until cloudinit script is complete - ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "sudo service docker restart" # to use the configured docker data path - ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP 'sed -i "s_eflows-airflow:latest_eflows-airflow:testing_g" /home/airflow/data-logistics-service/dockers/docker-compose.yaml' - - ssh -oStrictHostKeyChecking=accept-new airflow@$PTESTING_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $TESTING_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS $AIRFLOW_FERNET_KEY $DAG_GIT_URL" + - ssh -oStrictHostKeyChecking=accept-new airflow@$PTESTING_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $TESTING_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS $AIRFLOW_FERNET_KEY $DAG_GIT_URL $SSO_CLIENT_SECRET" - echo "Done" light-deploy-testing: @@ -121,7 +121,7 @@ light-deploy-testing: script: - ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "cd /home/airflow/data-logistics-service && git stash && git stash clear && git checkout main && git checkout -f $CI_COMMIT_TAG && git pull --all" - ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP 'sed -i "s_eflows-airflow:latest_eflows-airflow:testing_g" /home/airflow/data-logistics-service/dockers/docker-compose.yaml' - - ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $TESTING_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS $AIRFLOW_FERNET_KEY $DAG_GIT_URL" + - ssh -oStrictHostKeyChecking=accept-new airflow@$TESTING_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $TESTING_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS $AIRFLOW_FERNET_KEY $DAG_GIT_URL $SSO_CLIENT_SECRET" @@ -151,7 +151,7 @@ full-deploy-production: - until ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP ls /finished_cloudinit >/dev/null 2>&1; do sleep 30; done # wait until cloudinit script is complete - ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo mkdir -p /persistent_data && sudo mount /dev/vdb1 /persistent_data" - ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo service docker restart" # to use the configured docker data path - - ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $PRODUCTION_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS $AIRFLOW_FERNET_KEY $DAG_GIT_URL" + - ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $PRODUCTION_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS $AIRFLOW_FERNET_KEY $DAG_GIT_URL $SSO_CLIENT_SECRET" - echo "Done" # NOTE Light deployment did not perform well when the template/main.html file was changed (in case of the official airflow image being updated) @@ -164,7 +164,7 @@ light-deploy-production: environment: Production script: - ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "cd /home/airflow/data-logistics-service && git stash && git stash clear && git checkout main && git checkout -f $CI_COMMIT_TAG && git pull --all && rm -rf dags && git clone https://github.com/eflows4hpc/dls-dags.git dags" - - ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $PRODUCTION_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS $AIRFLOW_FERNET_KEY $DAG_GIT_URL" + - ssh -oStrictHostKeyChecking=accept-new airflow@$PRODUCTION_IP "sudo /home/airflow/data-logistics-service/scripts/deployment.sh /home/airflow /home/airflow/data-logistics-service $PRODUCTION_DOMAIN $AIRFLOW__SECRETS__BACKEND $AIRFLOW__SECRETS__BACKEND_KWARGS $AIRFLOW_FERNET_KEY $DAG_GIT_URL $SSO_CLIENT_SECRET" test-production-webserver: cache: {} diff --git a/client_secrets.json b/client_secrets.json new file mode 100644 index 0000000000000000000000000000000000000000..67556cb30cfe4b5d05c8ba33af668b8c0b30562c --- /dev/null +++ b/client_secrets.json @@ -0,0 +1,18 @@ +{ + "web":{ + "issuer":"https://zam10045.zam.kfa-juelich.de:7000/oauth2", + + "client_id":"ff3f5a29-d210-4be2-a6d4-93c4fc755bfe", + "client_secret":"SSO_CLIENT_SECRET", + "auth_uri":"https://zam10045.zam.kfa-juelich.de:7000/oauth2-as/oauth2-authz", + "redirect_urls":[ + "https://zam10115.zam.kfa-juelich.de", + "https://zam10115.zam.kfa-juelich.de/oidc_callback", + "https://zam10115.zam.kfa-juelich.de/home", + "https://zam10115.zam.kfa-juelich.de/login" + ], + + "token_uri":"https://zam10045.zam.kfa-juelich.de:7000/oauth2/token", + "userinfo_uri": "https://zam10045.zam.kfa-juelich.de:7000/oauth2/userinfo" + } + } \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index cb40ab776ed5a9aa04d12e6d3851f0e5dfbf682d..56ab98f8d773d6dcf27ca965676e41b8894ada38 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,5 @@ apache-airflow-providers-http apache-airflow-providers-sftp --index-url https://gitlab.jsc.fz-juelich.de/api/v4/projects/4405/packages/pypi/simple airflow-datacat-integration>=0.1.4 +flask-oidc +aiohttp diff --git a/scripts/deployment.sh b/scripts/deployment.sh index 01136afba8b1200fcc43a41965c9a7767d9edec3..c85ae4b9347be5c1baaa1d4c02d061249de35bdc 100755 --- a/scripts/deployment.sh +++ b/scripts/deployment.sh @@ -2,7 +2,7 @@ # @author Maria Petrova & Christian Böttcher ## USAGE: # -# deployment.sh <user_home_directory> <git_directory> [SERVER_DOMAIN] [AIRFLOW__SECRETS__BACKEND] [AIRFLOW__SECRETS__BACKEND_KWARGS] [AIRFLOW__CORE__FERNET_KEY] [DAG_GIT_URL] +# deployment.sh <user_home_directory> <git_directory> [SERVER_DOMAIN] [AIRFLOW__SECRETS__BACKEND] [AIRFLOW__SECRETS__BACKEND_KWARGS] [AIRFLOW__CORE__FERNET_KEY] [DAG_GIT_URL] [SSO_CLIENT_SECRET] OLD_DIR=`pwd` GIT_REPO=$HOME/data-logistics-service @@ -15,7 +15,8 @@ if [ -z ${3+x} ]; then export SERVER_DOMAIN=dls.fz-juelich.de; else export SERVE if [ -z ${4+x} ]; then unset AIRFLOW__SECRETS__BACKEND; else export AIRFLOW__SECRETS__BACKEND=$4; fi if [ -z ${5+x} ]; then unset AIRFLOW__SECRETS__BACKEND_KWARGS; else export AIRFLOW__SECRETS__BACKEND_KWARGS=$5; fi if [ -z ${6+x} ]; then unset AIRFLOW__CORE__FERNET_KEY; else export AIRFLOW__CORE__FERNET_KEY=$6; fi -if [ -z ${6+x} ]; then unset DAG_GIT_URL; else export DAG_GIT_URL=$7; fi +if [ -z ${7+x} ]; then unset DAG_GIT_URL; else export DAG_GIT_URL=$7; fi +if [ -z ${8+x} ]; then unset SSO_CLIENT_SECRET; else export SSO_CLIENT_SECRET=$8; fi @@ -44,6 +45,8 @@ rm -rf $AIRFLOW_DIR/dags && mkdir $AIRFLOW_DIR/dags && git clone $DAG_GIT_URL $A cp -r plugins/* $AIRFLOW_DIR/plugins cp config/* $AIRFLOW_DIR/config/ cp -r templates/* $AIRFLOW_DIR/templates +cp webserver_config.py $AIRFLOW_DIR/webserver_config.py +cp client_secret.json $AIRFLOW_DIR/client_secret.json # Setup environment variables and install requirements echo -e "AIRFLOW_UID=$(id -u)" > $GIT_REPO/dockers/.env export AIRFLOW_UID=$(id -u) @@ -51,6 +54,7 @@ export AIRFLOW_UID=$(id -u) pip install -r $GIT_REPO/requirements.txt sed -i "s_datalogistics.eflows4hpc.eu_${SERVER_DOMAIN}_g" $GIT_REPO/dockers/docker-compose.yaml +sed -i "s_SSO_CLIENT_SECRET_${SSO_CLIENT_SECRET}_g" $AIRFLOW_DIR/client_secret.json # it is at this point assumed that ip and volume are correctly assigned, and that dns is working properly echo "-----------Bringing up the docker containers-----------" diff --git a/webserver_config.py b/webserver_config.py new file mode 100644 index 0000000000000000000000000000000000000000..4e06053be7d4864ea3aa93d0f9109e84ff78bf25 --- /dev/null +++ b/webserver_config.py @@ -0,0 +1,186 @@ +import os, logging, json, posixpath + +from airflow import configuration as conf +from airflow.www.security import AirflowSecurityManager +from flask import abort, make_response, redirect +from flask_appbuilder.security.manager import AUTH_OID +from flask_appbuilder.security.views import AuthOIDView +from flask_appbuilder.views import ModelView, SimpleFormView, expose +from flask_login import login_user +from flask_oidc import OpenIDConnect + +logger = logging.getLogger(__name__) + +# Set the OIDC field that should be used +NICKNAME_OIDC_FIELD = 'nickname' +FULL_NAME_OIDC_FIELD = 'name' +GROUPS_OIDC_FIELD = 'groups' +EMAIL_FIELD = 'email' +SUB_FIELD = 'sub' # User ID + + +# Convert groups from comma separated string to list +ALLOWED_GROUPS = os.environ.get('ALLOWED_GROUPS') +if ALLOWED_GROUPS: + ALLOWED_GROUPS = [g.strip() for g in ALLOWED_GROUPS.split(',')] +else: ALLOWED_GROUPS = [] + +if ALLOWED_GROUPS: + logger.debug('AirFlow access requires membership to one of the following groups: %s' + % ', '.join(ALLOWED_GROUPS)) + + +# Extending AuthOIDView +class AuthOIDCView(AuthOIDView): + + @expose('/login/', methods=['GET', 'POST']) + def login(self, flag=True): + + sm = self.appbuilder.sm + oidc = sm.oid + + @self.appbuilder.sm.oid.require_login + def handle_login(): + user = sm.auth_user_oid(oidc.user_getfield(EMAIL_FIELD)) + + # Group membership required + if ALLOWED_GROUPS: + + # Fetch group membership information from GitLab + groups = oidc.user_getinfo([GROUPS_OIDC_FIELD]).get(GROUPS_OIDC_FIELD, []) + intersection = set(ALLOWED_GROUPS) & set(groups) + logger.debug('AirFlow user member of groups in ACL list: %s' % ', '.join(intersection)) + + # Unable to find common groups, prevent login + if not intersection: + return abort(403) + + # Create user (if it doesn't already exist) + if user is None: + info = oidc.user_getinfo([ + NICKNAME_OIDC_FIELD, + FULL_NAME_OIDC_FIELD, + GROUPS_OIDC_FIELD, + SUB_FIELD, + EMAIL_FIELD, + "profile" + ]) + full_name = info.get(FULL_NAME_OIDC_FIELD) + if " " in full_name: + full_name = full_name.split(" ") + first_name = full_name[0] + last_name = full_name[1] + else: + first_name = full_name + last_name = "" + user = sm.add_user( + username=info.get(NICKNAME_OIDC_FIELD), + first_name=first_name, + last_name=last_name, + email=info.get(EMAIL_FIELD), + role=sm.find_role(sm.auth_user_registration_role) + ) + + login_user(user, remember=False) + return redirect(self.appbuilder.get_url_for_index) + + return handle_login() + + @expose('/logout/', methods=['GET', 'POST']) + def logout(self): + oidc = self.appbuilder.sm.oid + if not oidc.credentials_store: + return redirect('/login/') + self.revoke_token() + oidc.logout() + super(AuthOIDCView, self).logout() + response = make_response("You have been signed out") + return response + + def revoke_token(self): + """ Revokes the provided access token. Sends a POST request to the token revocation endpoint + """ + import aiohttp + import asyncio + import json + oidc = self.appbuilder.sm.oid + sub = oidc.user_getfield(SUB_FIELD) + config = oidc.credentials_store + config = config.get(str(sub)) + config = json.loads(config) + payload = { + "token": config['access_token'], + "token_type_hint": "refresh_token" + } + auth = aiohttp.BasicAuth(config['client_id'], config['client_secret']) + # Sends an asynchronous POST request to revoke the token + + async def revoke(): + async with aiohttp.ClientSession() as session: + async with session.post(self.appbuilder.app.config.get('OIDC_LOGOUT_URI'), data=payload, auth=auth) as response: + logging.info(f"Revoke response {response.status}") + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + loop.run_until_complete(revoke()) + + + +class OIDCSecurityManager(AirflowSecurityManager): + """ + Custom security manager class that allows using the OpenID Connection authentication method. + """ + def __init__(self, appbuilder): + super(OIDCSecurityManager, self).__init__(appbuilder) + if self.auth_type == AUTH_OID: + self.oid = OpenIDConnect(self.appbuilder.get_app) + self.authoidview = AuthOIDCView + + +basedir = os.path.abspath(os.path.dirname(__file__)) + +SECURITY_MANAGER_CLASS = OIDCSecurityManager +# The SQLAlchemy connection string. +SQLALCHEMY_DATABASE_URI = conf.get('core', 'SQL_ALCHEMY_CONN') + +# Flask-WTF flag for CSRF +CSRF_ENABLED = True + +AUTH_TYPE = AUTH_OID +OIDC_CLIENT_SECRETS = 'client_secret.json' # Configuration file for OIDC +OIDC_COOKIE_SECURE= False +OIDC_ID_TOKEN_COOKIE_SECURE = False +OIDC_REQUIRE_VERIFIED_EMAIL = False +OIDC_USER_INFO_ENABLED = True +CUSTOM_SECURITY_MANAGER = OIDCSecurityManager + +# Ensure that the secrets file exists +if not os.path.exists(OIDC_CLIENT_SECRETS): + ValueError('Unable to load OIDC client configuration. %s does not exist.' % OIDC_CLIENT_SECRETS) + +# Parse client_secret.json for scopes and logout URL +with open(OIDC_CLIENT_SECRETS) as f: + OIDC_APPCONFIG = json.loads(f.read()) + +# Ensure that the logout/revoke URL is specified in the client secrets file +UNITY_OIDC_URL = OIDC_APPCONFIG.get('web', {}).get('issuer') +if not UNITY_OIDC_URL: + raise ValueError('Invalid OIDC client configuration, GitLab OIDC URI not specified.') + +OIDC_SCOPES = OIDC_APPCONFIG.get('OIDC_SCOPES', ['openid', 'email', 'profile']) # Scopes that should be requested. +OIDC_LOGOUT_URI = posixpath.join(UNITY_OIDC_URL, 'oauth/revoke') # OIDC logout URL + + +# Allow user self registration +AUTH_USER_REGISTRATION = False + +# Default role to provide to new users +AUTH_USER_REGISTRATION_ROLE = os.environ.get('AUTH_USER_REGISTRATION_ROLE', 'Public') + +AUTH_ROLE_ADMIN = 'Admin' +AUTH_ROLE_PUBLIC = "Public" + + +OPENID_PROVIDERS = [ + {'name': 'Unity', 'url': posixpath.join(UNITY_OIDC_URL, 'oauth/authorize')} +] \ No newline at end of file