Skip to content
Snippets Groups Projects

not beutifull yet but works, variable used for common working direcotry, dag...

Merged Jedrzej Rybicki requested to merge confignewbranch into main
8 files
+ 300
120
Compare changes
  • Side-by-side
  • Inline
Files
8
+ 70
47
@@ -21,21 +21,22 @@ default_timezone = utc
@@ -21,21 +21,22 @@ default_timezone = utc
# ``SequentialExecutor``, ``LocalExecutor``, ``CeleryExecutor``, ``DaskExecutor``,
# ``SequentialExecutor``, ``LocalExecutor``, ``CeleryExecutor``, ``DaskExecutor``,
# ``KubernetesExecutor``, ``CeleryKubernetesExecutor`` or the
# ``KubernetesExecutor``, ``CeleryKubernetesExecutor`` or the
# full import path to the class when using a custom executor.
# full import path to the class when using a custom executor.
executor = SequentialExecutor
executor = CeleryExecutor
# The SqlAlchemy connection string to the metadata database.
# The SqlAlchemy connection string to the metadata database.
# SqlAlchemy supports many different database engines.
# SqlAlchemy supports many different database engines.
# More information here:
# More information here:
# http://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html#database-uri
# http://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html#database-uri
# sql_alchemy_conn = sqlite:////opt/airflow/airflow.db
sql_alchemy_conn = sqlite:////opt/airflow/airflow.db
# The encoding for the databases
# The encoding for the databases
sql_engine_encoding = utf-8
sql_engine_encoding = utf-8
# Collation for ``dag_id``, ``task_id``, ``key`` columns in case they have different encoding.
# Collation for ``dag_id``, ``task_id``, ``key`` columns in case they have different encoding.
# This is particularly useful in case of mysql with utf8mb4 encoding because
# By default this collation is the same as the database collation, however for ``mysql`` and ``mariadb``
# primary keys for XCom table has too big size and ``sql_engine_collation_for_ids`` should
# the default is ``utf8mb3_bin`` so that the index sizes of our index keys will not exceed
# be set to ``utf8mb3_general_ci``.
# the maximum size of allowed index when collation is set to ``utf8mb4`` variant
 
# (see https://github.com/apache/airflow/pull/17603#issuecomment-901121618).
# sql_engine_collation_for_ids =
# sql_engine_collation_for_ids =
# If SqlAlchemy should pool database connections.
# If SqlAlchemy should pool database connections.
@@ -85,9 +86,12 @@ parallelism = 32
@@ -85,9 +86,12 @@ parallelism = 32
# The maximum number of task instances allowed to run concurrently in each DAG. To calculate
# The maximum number of task instances allowed to run concurrently in each DAG. To calculate
# the number of tasks that is running concurrently for a DAG, add up the number of running
# the number of tasks that is running concurrently for a DAG, add up the number of running
# tasks for all DAG runs of the DAG. This is configurable at the DAG level with ``concurrency``,
# tasks for all DAG runs of the DAG. This is configurable at the DAG level with ``max_active_tasks``,
# which is defaulted as ``dag_concurrency``.
# which is defaulted as ``max_active_tasks_per_dag``.
dag_concurrency = 16
#
 
# An example scenario when this would be useful is when you want to stop a new dag with an early
 
# start date from stealing all the executor slots in a cluster.
 
max_active_tasks_per_dag = 16
# Are DAGs paused by default at creation
# Are DAGs paused by default at creation
dags_are_paused_at_creation = True
dags_are_paused_at_creation = True
@@ -100,7 +104,7 @@ max_active_runs_per_dag = 16
@@ -100,7 +104,7 @@ max_active_runs_per_dag = 16
# Whether to load the DAG examples that ship with Airflow. It's good to
# Whether to load the DAG examples that ship with Airflow. It's good to
# get started, but you probably want to set this to ``False`` in a production
# get started, but you probably want to set this to ``False`` in a production
# environment
# environment
load_examples = True
load_examples = False
# Whether to load the default connections that ship with Airflow. It's good to
# Whether to load the default connections that ship with Airflow. It's good to
# get started, but you probably want to set this to ``False`` in a production
# get started, but you probably want to set this to ``False`` in a production
@@ -169,6 +173,9 @@ dag_discovery_safe_mode = True
@@ -169,6 +173,9 @@ dag_discovery_safe_mode = True
# The number of retries each task is going to have by default. Can be overridden at dag or task level.
# The number of retries each task is going to have by default. Can be overridden at dag or task level.
default_task_retries = 0
default_task_retries = 0
 
# The weighting method used for the effective total priority weight of the task
 
default_task_weight_rule = downstream
 
# Updating serialized DAG can not be faster than a minimum interval to reduce database write rate.
# Updating serialized DAG can not be faster than a minimum interval to reduce database write rate.
min_serialized_dag_update_interval = 30
min_serialized_dag_update_interval = 30
@@ -176,13 +183,6 @@ min_serialized_dag_update_interval = 30
@@ -176,13 +183,6 @@ min_serialized_dag_update_interval = 30
# read rate. This config controls when your DAGs are updated in the Webserver
# read rate. This config controls when your DAGs are updated in the Webserver
min_serialized_dag_fetch_interval = 10
min_serialized_dag_fetch_interval = 10
# Whether to persist DAG files code in DB.
# If set to True, Webserver reads file contents from DB instead of
# trying to access files in a DAG folder.
# (Default is ``True``)
# Example: store_dag_code = True
# store_dag_code =
# Maximum number of Rendered Task Instance Fields (Template Fields) per task to store
# Maximum number of Rendered Task Instance Fields (Template Fields) per task to store
# in the Database.
# in the Database.
# All the template_fields for each of Task Instance are stored in the Database.
# All the template_fields for each of Task Instance are stored in the Database.
@@ -220,6 +220,11 @@ hide_sensitive_var_conn_fields = True
@@ -220,6 +220,11 @@ hide_sensitive_var_conn_fields = True
# extra JSON.
# extra JSON.
sensitive_var_conn_names =
sensitive_var_conn_names =
 
# Task Slot counts for ``default_pool``. This setting would not have any effect in an existing
 
# deployment where the ``default_pool`` is already created. For existing deployments, users can
 
# change the number of slots using Webserver, API or the CLI
 
default_pool_task_slot_count = 128
 
[logging]
[logging]
# The folder where airflow should store its log files
# The folder where airflow should store its log files
# This path must be absolute
# This path must be absolute
@@ -258,7 +263,7 @@ logging_level = INFO
@@ -258,7 +263,7 @@ logging_level = INFO
# Logging level for Flask-appbuilder UI.
# Logging level for Flask-appbuilder UI.
#
#
# Supported values: ``CRITICAL``, ``ERROR``, ``WARNING``, ``INFO``, ``DEBUG``.
# Supported values: ``CRITICAL``, ``ERROR``, ``WARNING``, ``INFO``, ``DEBUG``.
fab_logging_level = WARN
fab_logging_level = WARNING
# Logging class
# Logging class
# Specify the class that will specify the logging configuration
# Specify the class that will specify the logging configuration
@@ -297,8 +302,15 @@ task_log_reader = task
@@ -297,8 +302,15 @@ task_log_reader = task
# A comma\-separated list of third-party logger names that will be configured to print messages to
# A comma\-separated list of third-party logger names that will be configured to print messages to
# consoles\.
# consoles\.
# Example: extra_loggers = connexion,sqlalchemy
# Example: extra_logger_names = connexion,sqlalchemy
extra_loggers =
extra_logger_names =
 
 
# When you start an airflow worker, airflow starts a tiny web server
 
# subprocess to serve the workers local log files to the airflow main
 
# web server, who then builds pages and sends them to users. This defines
 
# the port on which the logs are served. It needs to be unused, and open
 
# visible from the main web server to connect into the workers.
 
worker_log_server_port = 8793
[metrics]
[metrics]
@@ -406,8 +418,9 @@ access_control_allow_headers =
@@ -406,8 +418,9 @@ access_control_allow_headers =
# Specifies the method or methods allowed when accessing the resource.
# Specifies the method or methods allowed when accessing the resource.
access_control_allow_methods =
access_control_allow_methods =
# Indicates whether the response can be shared with requesting code from the given origin.
# Indicates whether the response can be shared with requesting code from the given origins.
access_control_allow_origin =
# Separate URLs with space.
 
access_control_allow_origins =
[lineage]
[lineage]
# what lineage backend to use
# what lineage backend to use
@@ -491,7 +504,7 @@ reload_on_plugin_change = False
@@ -491,7 +504,7 @@ reload_on_plugin_change = False
# Secret key used to run your flask app. It should be as random as possible. However, when running
# Secret key used to run your flask app. It should be as random as possible. However, when running
# more than 1 instances of webserver, make sure all of them use the same ``secret_key`` otherwise
# more than 1 instances of webserver, make sure all of them use the same ``secret_key`` otherwise
# one of them will error with "CSRF session token is missing".
# one of them will error with "CSRF session token is missing".
secret_key = 8kUFwlRKUhs6i8NBAvUmWg==
secret_key = Jvww64wGcBs22UNHJjToNw==
# Number of workers to run the Gunicorn web server
# Number of workers to run the Gunicorn web server
workers = 4
workers = 4
@@ -529,7 +542,7 @@ dag_orientation = LR
@@ -529,7 +542,7 @@ dag_orientation = LR
# The amount of time (in secs) webserver will wait for initial handshake
# The amount of time (in secs) webserver will wait for initial handshake
# while fetching logs from other worker machine
# while fetching logs from other worker machine
log_fetch_timeout_sec = 5
log_fetch_timeout_sec = 15
# Time interval (in secs) to wait before next log fetching.
# Time interval (in secs) to wait before next log fetching.
log_fetch_delay_sec = 2
log_fetch_delay_sec = 2
@@ -603,7 +616,11 @@ update_fab_perms = True
@@ -603,7 +616,11 @@ update_fab_perms = True
session_lifetime_minutes = 43200
session_lifetime_minutes = 43200
# Sets a custom page title for the DAGs overview page and site title for all pages
# Sets a custom page title for the DAGs overview page and site title for all pages
instance_name =eFlows4HPC
# instance_name =
 
 
# How frequently, in seconds, the DAG data will auto-refresh in graph or tree view
 
# when auto-refresh is turned on
 
auto_refresh_interval = 3
[email]
[email]
@@ -654,11 +671,14 @@ smtp_retry_limit = 5
@@ -654,11 +671,14 @@ smtp_retry_limit = 5
# additional configuration options based on the Python platform. See:
# additional configuration options based on the Python platform. See:
# https://docs.sentry.io/error-reporting/configuration/?platform=python.
# https://docs.sentry.io/error-reporting/configuration/?platform=python.
# Unsupported options: ``integrations``, ``in_app_include``, ``in_app_exclude``,
# Unsupported options: ``integrations``, ``in_app_include``, ``in_app_exclude``,
# ``ignore_errors``, ``before_breadcrumb``, ``before_send``, ``transport``.
# ``ignore_errors``, ``before_breadcrumb``, ``transport``.
# Enable error reporting to Sentry
# Enable error reporting to Sentry
sentry_on = false
sentry_on = false
sentry_dsn =
sentry_dsn =
 
# Dotted path to a before_send function that the sentry SDK should be configured to use.
 
# before_send =
 
[celery_kubernetes_executor]
[celery_kubernetes_executor]
# This section only applies if you are using the ``CeleryKubernetesExecutor`` in
# This section only applies if you are using the ``CeleryKubernetesExecutor`` in
@@ -701,13 +721,6 @@ worker_concurrency = 16
@@ -701,13 +721,6 @@ worker_concurrency = 16
# Example: worker_prefetch_multiplier = 1
# Example: worker_prefetch_multiplier = 1
# worker_prefetch_multiplier =
# worker_prefetch_multiplier =
# When you start an airflow worker, airflow starts a tiny web server
# subprocess to serve the workers local log files to the airflow main
# web server, who then builds pages and sends them to users. This defines
# the port on which the logs are served. It needs to be unused, and open
# visible from the main web server to connect into the workers.
worker_log_server_port = 8793
# Umask that will be used when starting workers with the ``airflow celery worker``
# Umask that will be used when starting workers with the ``airflow celery worker``
# in daemon mode. This control the file-creation mode mask which determines the initial
# in daemon mode. This control the file-creation mode mask which determines the initial
# value of file permission bits for newly created files.
# value of file permission bits for newly created files.
@@ -812,10 +825,6 @@ tls_key =
@@ -812,10 +825,6 @@ tls_key =
# listen (in seconds).
# listen (in seconds).
job_heartbeat_sec = 5
job_heartbeat_sec = 5
# How often (in seconds) to check and tidy up 'running' TaskInstancess
# that no longer have a matching DagRun
clean_tis_without_dagrun_interval = 15.0
# The scheduler constantly tries to trigger new tasks (look at the
# The scheduler constantly tries to trigger new tasks (look at the
# scheduler section in the docs for more information). This defines
# scheduler section in the docs for more information). This defines
# how often the scheduler should run (in seconds).
# how often the scheduler should run (in seconds).
@@ -825,8 +834,10 @@ scheduler_heartbeat_sec = 5
@@ -825,8 +834,10 @@ scheduler_heartbeat_sec = 5
# -1 indicates unlimited number
# -1 indicates unlimited number
num_runs = -1
num_runs = -1
# The number of seconds to wait between consecutive DAG file processing
# Controls how long the scheduler will sleep between loops, but if there was nothing to do
processor_poll_interval = 1
# in the loop. i.e. if it scheduled something then it will start the next loop
 
# iteration straight away.
 
scheduler_idle_sleep_time = 1
# Number of seconds after which a DAG file is parsed. The DAG file is parsed every
# Number of seconds after which a DAG file is parsed. The DAG file is parsed every
# ``min_file_process_interval`` number of seconds. Updates to DAGs are reflected after
# ``min_file_process_interval`` number of seconds. Updates to DAGs are reflected after
@@ -865,11 +876,8 @@ scheduler_zombie_task_threshold = 300
@@ -865,11 +876,8 @@ scheduler_zombie_task_threshold = 300
catchup_by_default = True
catchup_by_default = True
# This changes the batch size of queries in the scheduling main loop.
# This changes the batch size of queries in the scheduling main loop.
# If this is too high, SQL query performance may be impacted by one
# If this is too high, SQL query performance may be impacted by
# or more of the following:
# complexity of query predicate, and/or excessive locking.
# - reversion to full table scan
# - complexity of query predicate
# - excessive locking
# Additionally, you may hit the maximum allowable query length for your db.
# Additionally, you may hit the maximum allowable query length for your db.
# Set this to 0 for no limit (not advised)
# Set this to 0 for no limit (not advised)
max_tis_per_query = 512
max_tis_per_query = 512
@@ -917,6 +925,13 @@ allow_trigger_in_future = False
@@ -917,6 +925,13 @@ allow_trigger_in_future = False
# DAG dependency detector class to use
# DAG dependency detector class to use
dependency_detector = airflow.serialization.serialized_objects.DependencyDetector
dependency_detector = airflow.serialization.serialized_objects.DependencyDetector
 
# How often to check for expired trigger requests that have not run yet.
 
trigger_timeout_check_interval = 15
 
 
[triggerer]
 
# How many triggers a single Triggerer will run at once, by default.
 
default_capacity = 1000
 
[kerberos]
[kerberos]
ccache = /tmp/airflow_krb5_ccache
ccache = /tmp/airflow_krb5_ccache
@@ -926,6 +941,12 @@ reinit_frequency = 3600
@@ -926,6 +941,12 @@ reinit_frequency = 3600
kinit_path = kinit
kinit_path = kinit
keytab = airflow.keytab
keytab = airflow.keytab
 
# Allow to disable ticket forwardability.
 
forwardable = True
 
 
# Allow to remove source IP from token, useful when using token behind NATted Docker host.
 
include_ip = True
 
[github_enterprise]
[github_enterprise]
api_rev = v3
api_rev = v3
@@ -941,7 +962,8 @@ end_of_log_mark = end_of_log
@@ -941,7 +962,8 @@ end_of_log_mark = end_of_log
# Qualified URL for an elasticsearch frontend (like Kibana) with a template argument for log_id
# Qualified URL for an elasticsearch frontend (like Kibana) with a template argument for log_id
# Code will construct log_id using the log_id template from the argument above.
# Code will construct log_id using the log_id template from the argument above.
# NOTE: The code will prefix the https:// automatically, don't include that here.
# NOTE: scheme will default to https if one is not provided
 
# Example: frontend = http://localhost:5601/app/kibana#/discover?_a=(columns:!(message),query:(language:kuery,query:'log_id: "{log_id}"'),sort:!(log.offset,asc))
frontend =
frontend =
# Write the task logs to the stdout of the worker, rather than the default files
# Write the task logs to the stdout of the worker, rather than the default files
@@ -964,7 +986,7 @@ use_ssl = False
@@ -964,7 +986,7 @@ use_ssl = False
verify_certs = True
verify_certs = True
[kubernetes]
[kubernetes]
# Path to the YAML pod file. If set, all other kubernetes-related fields are ignored.
# Path to the YAML pod file that forms the basis for KubernetesExecutor workers.
pod_template_file =
pod_template_file =
# The repository of the Kubernetes Image for the Worker to Run
# The repository of the Kubernetes Image for the Worker to Run
@@ -1049,6 +1071,9 @@ worker_pods_pending_timeout = 300
@@ -1049,6 +1071,9 @@ worker_pods_pending_timeout = 300
# How often in seconds to check if Pending workers have exceeded their timeouts
# How often in seconds to check if Pending workers have exceeded their timeouts
worker_pods_pending_timeout_check_interval = 120
worker_pods_pending_timeout_check_interval = 120
 
# How often in seconds to check for task instances stuck in "queued" status without a pod
 
worker_pods_queued_check_interval = 60
 
# How many pending pods to check for timeout violations in each check interval.
# How many pending pods to check for timeout violations in each check interval.
# You may want this higher if you have a very large cluster and/or use ``multi_namespace_mode``.
# You may want this higher if you have a very large cluster and/or use ``multi_namespace_mode``.
worker_pods_pending_timeout_batch_size = 100
worker_pods_pending_timeout_batch_size = 100
@@ -1068,5 +1093,3 @@ shards = 5
@@ -1068,5 +1093,3 @@ shards = 5
# comma separated sensor classes support in smart_sensor.
# comma separated sensor classes support in smart_sensor.
sensors_enabled = NamedHivePartitionSensor
sensors_enabled = NamedHivePartitionSensor
rbac = True
Loading