Skip to content
Snippets Groups Projects
airflow.cfg 50.2 KiB
Newer Older
Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
orphaned_tasks_check_interval = 300.0
child_process_log_directory = /opt/airflow/logs/scheduler

# Local task jobs periodically heartbeat to the DB. If the job has
# not heartbeat in this many seconds, the scheduler will mark the
# associated task instance as failed and will re-schedule the task.
scheduler_zombie_task_threshold = 300

# How often (in seconds) should the scheduler check for zombie tasks.
zombie_detection_interval = 10.0

Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# Turn off scheduler catchup by setting this to ``False``.
# Default behavior is unchanged and
# Command Line Backfills still work, but the scheduler
Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# will not do scheduler catchup if this is ``False``,
# however it can be set on a per DAG basis in the
# DAG definition (catchup)
catchup_by_default = True

# Setting this to True will make first task instance of a task
# ignore depends_on_past setting. A task instance will be considered
# as the first task instance of a task when there is no task instance
# in the DB with an execution_date earlier than it., i.e. no manual marking
# success will be needed for a newly added task to be scheduled.
ignore_first_depends_on_past_by_default = True

# This changes the batch size of queries in the scheduling main loop.
Jedrzej Rybicki's avatar
Jedrzej Rybicki committed
# If this is too high, SQL query performance may be impacted by
# complexity of query predicate, and/or excessive locking.
# Additionally, you may hit the maximum allowable query length for your db.
# Set this to 0 for no limit (not advised)
max_tis_per_query = 512

Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# Should the scheduler issue ``SELECT ... FOR UPDATE`` in relevant queries.
# If this is set to False then you should not run more than a single
# scheduler at once
use_row_level_locking = True
Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# Max number of DAGs to create DagRuns for per scheduler loop.
max_dagruns_to_create_per_loop = 10

# How many DagRuns should a scheduler examine (and lock) when scheduling
# and queuing tasks.
max_dagruns_per_loop_to_schedule = 20
Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# Should the Task supervisor process perform a "mini scheduler" to attempt to schedule more tasks of the
# same DAG. Leaving this on will mean tasks in the same DAG execute quicker, but might starve out other
# dags in some circumstances
schedule_after_task_execution = True

# The scheduler can run multiple processes in parallel to parse dags.
# This defines how many processes will run.
parsing_processes = 2

# One of ``modified_time``, ``random_seeded_by_host`` and ``alphabetical``.
# The scheduler will list and sort the dag files to decide the parsing order.
#
# * ``modified_time``: Sort by modified time of the files. This is useful on large scale to parse the
#   recently modified DAGs first.
# * ``random_seeded_by_host``: Sort randomly across multiple Schedulers but with same order on the
#   same host. This is useful when running with Scheduler in HA mode where each scheduler can
#   parse different DAG files.
# * ``alphabetical``: Sort by filename
file_parsing_sort_mode = modified_time
# Whether the dag processor is running as a standalone process or it is a subprocess of a scheduler
# job.
standalone_dag_processor = False

# Only applicable if `[scheduler]standalone_dag_processor` is true and  callbacks are stored
# in database. Contains maximum number of callbacks that are fetched during a single loop.
max_callbacks_per_loop = 20

# Only applicable if `[scheduler]standalone_dag_processor` is true.
# Time in seconds after which dags, which were not updated by Dag Processor are deactivated.
dag_stale_not_seen_duration = 600

# Turn off scheduler use of cron intervals by setting this to False.
# DAGs submitted manually in the web UI or with trigger_dag will still run.
use_job_schedule = True

# Allow externally triggered DagRuns for Execution Dates in the future
# Only has effect if schedule_interval is set to None in DAG
allow_trigger_in_future = False

Jedrzej Rybicki's avatar
Jedrzej Rybicki committed
# How often to check for expired trigger requests that have not run yet.
trigger_timeout_check_interval = 15

[triggerer]
# How many triggers a single Triggerer will run at once, by default.
default_capacity = 1000

[kerberos]
ccache = /tmp/airflow_krb5_ccache

# gets augmented with fqdn
principal = airflow
reinit_frequency = 3600
kinit_path = kinit
keytab = airflow.keytab

Jedrzej Rybicki's avatar
Jedrzej Rybicki committed
# Allow to disable ticket forwardability.
forwardable = True

# Allow to remove source IP from token, useful when using token behind NATted Docker host.
include_ip = True

[elasticsearch]
# Elasticsearch host
host =

# Format of the log_id, which is used to query for a given tasks logs
log_id_template = {dag_id}-{task_id}-{run_id}-{map_index}-{try_number}

# Used to mark the end of a log stream for a task
end_of_log_mark = end_of_log

# Qualified URL for an elasticsearch frontend (like Kibana) with a template argument for log_id
# Code will construct log_id using the log_id template from the argument above.
Jedrzej Rybicki's avatar
Jedrzej Rybicki committed
# NOTE: scheme will default to https if one is not provided
# Example: frontend = http://localhost:5601/app/kibana#/discover?_a=(columns:!(message),query:(language:kuery,query:'log_id: "{log_id}"'),sort:!(log.offset,asc))
frontend =

# Write the task logs to the stdout of the worker, rather than the default files
write_stdout = False

# Instead of the default log formatter, write the log lines as JSON
json_format = False

# Log fields to also attach to the json output, if enabled
json_fields = asctime, filename, lineno, levelname, message

Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# The field where host name is stored (normally either `host` or `host.name`)
host_field = host

# The field where offset is stored (normally either `offset` or `log.offset`)
offset_field = offset

[elasticsearch_configs]
use_ssl = False
verify_certs = True

[kubernetes_executor]
Jedrzej Rybicki's avatar
Jedrzej Rybicki committed
# Path to the YAML pod file that forms the basis for KubernetesExecutor workers.
Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
pod_template_file =
Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# The repository of the Kubernetes Image for the Worker to Run
worker_container_repository =
Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# The tag of the Kubernetes Image for the Worker to Run
worker_container_tag =

# The Kubernetes namespace where airflow workers should be created. Defaults to ``default``
namespace = default

Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# If True, all worker pods will be deleted upon termination
delete_worker_pods = True

# If False (and delete_worker_pods is True),
# failed worker pods will not be deleted so users can investigate them.
# This only prevents removal of worker pods where the worker itself failed,
# not when the task it ran failed.
delete_worker_pods_on_failure = False

# Number of Kubernetes Worker Pod creation calls per scheduler loop.
# Note that the current default of "1" will only launch a single pod
# per-heartbeat. It is HIGHLY recommended that users increase this
# number to match the tolerance of their kubernetes cluster for
# better performance.
worker_pods_creation_batch_size = 1
Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# Allows users to launch pods in multiple namespaces.
# Will require creating a cluster-role for the scheduler
multi_namespace_mode = False

# Use the service account kubernetes gives to pods to connect to kubernetes cluster.
# It's intended for clients that expect to be running inside a pod running on kubernetes.
# It will raise an exception if called from a process not running in a kubernetes environment.
in_cluster = True

# When running with in_cluster=False change the default cluster_context or config_file
# options to Kubernetes client. Leave blank these to use default behaviour like ``kubectl`` has.
# cluster_context =

Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# Path to the kubernetes configfile to be used when ``in_cluster`` is set to False
# config_file =

# Keyword parameters to pass while calling a kubernetes client core_v1_api methods
# from Kubernetes Executor provided as a single line formatted JSON dictionary string.
# List of supported params are similar for all core_v1_apis, hence a single config
Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# variable for all apis. See:
# https://raw.githubusercontent.com/kubernetes-client/python/41f11a09995efcd0142e25946adc7591431bfb2f/kubernetes/client/api/core_v1_api.py
kube_client_request_args =
Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# Optional keyword arguments to pass to the ``delete_namespaced_pod`` kubernetes client
# ``core_v1_api`` method when using the Kubernetes Executor.
# This should be an object and can contain any of the options listed in the ``v1DeleteOptions``
# class defined here:
# https://github.com/kubernetes-client/python/blob/41f11a09995efcd0142e25946adc7591431bfb2f/kubernetes/client/models/v1_delete_options.py#L19
# Example: delete_option_kwargs = {"grace_period_seconds": 10}
delete_option_kwargs =
Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# Enables TCP keepalive mechanism. This prevents Kubernetes API requests to hang indefinitely
# when idle connection is time-outed on services like cloud load balancers or firewalls.
enable_tcp_keepalive = True
Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# When the `enable_tcp_keepalive` option is enabled, TCP probes a connection that has
# been idle for `tcp_keep_idle` seconds.
tcp_keep_idle = 120
Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# When the `enable_tcp_keepalive` option is enabled, if Kubernetes API does not respond
# to a keepalive probe, TCP retransmits the probe after `tcp_keep_intvl` seconds.
tcp_keep_intvl = 30
Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# When the `enable_tcp_keepalive` option is enabled, if Kubernetes API does not respond
# to a keepalive probe, TCP retransmits the probe `tcp_keep_cnt number` of times before
# a connection is considered to be broken.
tcp_keep_cnt = 6
Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# Set this to false to skip verifying SSL certificate of Kubernetes python client.
verify_ssl = True
Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# How long in seconds a worker can be in Pending before it is considered a failure
worker_pods_pending_timeout = 300
Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# How often in seconds to check if Pending workers have exceeded their timeouts
worker_pods_pending_timeout_check_interval = 120
Jedrzej Rybicki's avatar
Jedrzej Rybicki committed
# How often in seconds to check for task instances stuck in "queued" status without a pod
worker_pods_queued_check_interval = 60

Maria Petrova-El Sayed's avatar
Maria Petrova-El Sayed committed
# How many pending pods to check for timeout violations in each check interval.
# You may want this higher if you have a very large cluster and/or use ``multi_namespace_mode``.
worker_pods_pending_timeout_batch_size = 100
[sensors]
# Sensor default timeout, 7 days by default (7 * 24 * 60 * 60).
default_timeout = 604800