| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| --- |
| apiVersion: v1 |
| kind: ConfigMap |
| metadata: |
| name: {{.Name}} |
| namespace: {{.Namespace}} |
| labels: |
| {{range $k,$v := .Labels }} |
| {{$k}}: {{$v}} |
| {{end}} |
| annotations: |
| {{range $k,$v := .Cluster.Spec.Annotations }} |
| {{$k}}: {{$v}} |
| {{end}} |
| data: |
| airflow.cfg: | |
| [core] |
| airflow_home = /usr/local/airflow |
| dags_folder = /usr/local/airflow/dags |
| base_log_folder = /usr/local/airflow/logs |
| logging_level = INFO |
| executor = KubernetesExecutor |
| parallelism = 32 |
| load_examples = False |
| plugins_folder = /usr/local/airflow/plugins |
| sql_alchemy_conn = {{.SQLConn}} |
| |
| [scheduler] |
| dag_dir_list_interval = 300 |
| child_process_log_directory = /usr/local/airflow/logs/scheduler |
| # Task instances listen for external kill signal (when you clear tasks |
| # from the CLI or the UI), this defines the frequency at which they should |
| # listen (in seconds). |
| job_heartbeat_sec = 5 |
| max_threads = 2 |
| |
| # The scheduler constantly tries to trigger new tasks (look at the |
| # scheduler section in the docs for more information). This defines |
| # how often the scheduler should run (in seconds). |
| scheduler_heartbeat_sec = 5 |
| |
| # after how much time should the scheduler terminate in seconds |
| # -1 indicates to run continuously (see also num_runs) |
| run_duration = -1 |
| |
| # after how much time a new DAGs should be picked up from the filesystem |
| min_file_process_interval = 0 |
| |
| statsd_on = False |
| statsd_host = localhost |
| statsd_port = 8125 |
| statsd_prefix = airflow |
| |
| print_stats_interval = 30 |
| scheduler_zombie_task_threshold = 300 |
| max_tis_per_query = 0 |
| authenticate = False |
| |
| # Turn off scheduler catchup by setting this to False. |
| # Default behavior is unchanged and |
| # Command Line Backfills still work, but the scheduler |
| # will not do scheduler catchup if this is False, |
| # however it can be set on a per DAG basis in the |
| # DAG definition (catchup) |
| catchup_by_default = True |
| |
| [webserver] |
| # The base url of your website as airflow cannot guess what domain or |
| # cname you are using. This is used in automated emails that |
| # airflow sends to point links to the right web server |
| base_url = http://localhost:8080 |
| |
| # The ip specified when starting the web server |
| web_server_host = 0.0.0.0 |
| |
| # The port on which to run the web server |
| web_server_port = 8080 |
| |
| # Paths to the SSL certificate and key for the web server. When both are |
| # provided SSL will be enabled. This does not change the web server port. |
| web_server_ssl_cert = |
| web_server_ssl_key = |
| |
| # Number of seconds the webserver waits before killing gunicorn master that doesn't respond |
| web_server_master_timeout = 120 |
| |
| # Number of seconds the gunicorn webserver waits before timing out on a worker |
| web_server_worker_timeout = 120 |
| |
| # Number of workers to refresh at a time. When set to 0, worker refresh is |
| # disabled. When nonzero, airflow periodically refreshes webserver workers by |
| # bringing up new ones and killing old ones. |
| worker_refresh_batch_size = 1 |
| |
| # Number of seconds to wait before refreshing a batch of workers. |
| worker_refresh_interval = 30 |
| |
| # Secret key used to run your flask app |
| secret_key = temporary_key |
| |
| # Number of workers to run the Gunicorn web server |
| workers = 4 |
| |
| # The worker class gunicorn should use. Choices include |
| # sync (default), eventlet, gevent |
| worker_class = sync |
| |
| # Log files for the gunicorn webserver. '-' means log to stderr. |
| access_logfile = - |
| error_logfile = - |
| |
| # Expose the configuration file in the web server |
| expose_config = False |
| |
| # Set to true to turn on authentication: |
| # https://airflow.incubator.apache.org/security.html#web-authentication |
| authenticate = False |
| |
| # Filter the list of dags by owner name (requires authentication to be enabled) |
| filter_by_owner = False |
| |
| # Filtering mode. Choices include user (default) and ldapgroup. |
| # Ldap group filtering requires using the ldap backend |
| # |
| # Note that the ldap server needs the "memberOf" overlay to be set up |
| # in order to user the ldapgroup mode. |
| owner_mode = user |
| |
| # Default DAG view. Valid values are: |
| # tree, graph, duration, gantt, landing_times |
| dag_default_view = tree |
| |
| # Default DAG orientation. Valid values are: |
| # LR (Left->Right), TB (Top->Bottom), RL (Right->Left), BT (Bottom->Top) |
| dag_orientation = LR |
| |
| # Puts the webserver in demonstration mode; blurs the names of Operators for |
| # privacy. |
| demo_mode = False |
| |
| # The amount of time (in secs) webserver will wait for initial handshake |
| # while fetching logs from other worker machine |
| log_fetch_timeout_sec = 5 |
| |
| # By default, the webserver shows paused DAGs. Flip this to hide paused |
| # DAGs by default |
| hide_paused_dags_by_default = False |
| |
| # Consistent page size across all listing views in the UI |
| page_size = 100 |
| |
| # Use FAB-based webserver with RBAC feature |
| rbac = True |
| |
| [smtp] |
| # If you want airflow to send emails on retries, failure, and you want to use |
| # the airflow.utils.email.send_email_smtp function, you have to configure an |
| # smtp server here |
| smtp_host = localhost |
| smtp_starttls = True |
| smtp_ssl = False |
| # Uncomment and set the user/pass settings if you want to use SMTP AUTH |
| # smtp_user = airflow |
| # smtp_password = airflow |
| smtp_port = 25 |
| smtp_mail_from = airflow@example.com |
| |
| [kubernetes] |
| airflow_configmap = {{.Name}} |
| worker_container_repository = {{.Cluster.Spec.Worker.Image}} |
| worker_container_tag = {{.Cluster.Spec.Worker.Version}} |
| worker_container_image_pull_policy = IfNotPresent |
| delete_worker_pods = True |
| worker_service_account_name = |
| git_repo = {{.Cluster.Spec.DAGs.Git.Repo}} |
| git_branch = {{.Cluster.Spec.DAGs.Git.Branch}} |
| git_subpath = {{.Cluster.Spec.DAGs.DagSubdir}} |
| git_dags_folder_mount_point = /usr/local/airflow/dags/ |
| git_sync_dest = gitdags |
| git_user = |
| git_password = |
| in_cluster = True |
| namespace = {{.Namespace}} |
| gcp_service_account_keys = |
| |
| # For cloning DAGs from git repositories into volumes: https://github.com/kubernetes/git-sync |
| git_sync_container_repository = gcr.io/google-containers/git-sync-amd64 |
| git_sync_container_tag = v2.0.5 |
| git_sync_init_container_name = git-sync-clone |
| |
| [kubernetes_node_selectors] |
| # The Key-value pairs to be given to worker pods. |
| # The worker pods will be scheduled to the nodes of the specified key-value pairs. |
| # Should be supplied in the format: key = value |
| |
| [hive] |
| # Default mapreduce queue for HiveOperator tasks |
| default_hive_mapred_queue = |
| |
| [celery] |
| # This section only applies if you are using the CeleryExecutor in |
| # [core] section above |
| |
| # The app name that will be used by celery |
| celery_app_name = airflow.executors.celery_executor |
| |
| # The concurrency that will be used when starting workers with the |
| # "airflow worker" command. This defines the number of task instances that |
| # a worker will take, so size up your workers based on the resources on |
| # your worker box and the nature of your tasks |
| worker_concurrency = 16 |
| |
| # When you start an airflow worker, airflow starts a tiny web server |
| # subprocess to serve the workers local log files to the airflow main |
| # web server, who then builds pages and sends them to users. This defines |
| # the port on which the logs are served. It needs to be unused, and open |
| # visible from the main web server to connect into the workers. |
| worker_log_server_port = 8793 |
| |
| # The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally |
| # a sqlalchemy database. Refer to the Celery documentation for more |
| # information. |
| # http://docs.celeryproject.org/en/latest/userguide/configuration.html#broker-settings |
| broker_url = sqla+mysql://airflow:airflow@localhost:3306/airflow |
| |
| # The Celery result_backend. When a job finishes, it needs to update the |
| # metadata of the job. Therefore it will post a message on a message bus, |
| # or insert it into a database (depending of the backend) |
| # This status is used by the scheduler to update the state of the task |
| # The use of a database is highly recommended |
| # http://docs.celeryproject.org/en/latest/userguide/configuration.html#task-result-backend-settings |
| result_backend = db+mysql://airflow:airflow@localhost:3306/airflow |
| |
| # Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start |
| # it `airflow flower`. This defines the IP that Celery Flower runs on |
| flower_host = 0.0.0.0 |
| |
| # The root URL for Flower |
| # Ex: flower_url_prefix = /flower |
| flower_url_prefix = |
| |
| # This defines the port that Celery Flower runs on |
| flower_port = 5555 |
| |
| # Securing Flower with Basic Authentication |
| # Accepts user:password pairs separated by a comma |
| # Example: flower_basic_auth = user1:password1,user2:password2 |
| flower_basic_auth = |
| |
| # Default queue that tasks get assigned to and that worker listen on. |
| default_queue = default |
| |
| # How many processes CeleryExecutor uses to sync task state. |
| # 0 means to use max(1, number of cores - 1) processes. |
| sync_parallelism = 0 |
| |
| # Import path for celery configuration options |
| celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG |
| |
| [celery_broker_transport_options] |
| # The visibility timeout defines the number of seconds to wait for the worker |
| # to acknowledge the task before the message is redelivered to another worker. |
| # Make sure to increase the visibility timeout to match the time of the longest |
| # ETA you're planning to use. Especially important in case of using Redis or SQS |
| visibility_timeout = 21600 |
| |
| # In case of using SSL |
| ssl_active = False |
| ssl_key = |
| ssl_cert = |
| ssl_cacert = |
| |
| [dask] |
| # This section only applies if you are using the DaskExecutor in |
| # [core] section above |
| |
| # The IP address and port of the Dask cluster's scheduler. |
| cluster_address = 127.0.0.1:8786 |
| # TLS/ SSL settings to access a secured Dask scheduler. |
| tls_ca = |
| tls_cert = |
| tls_key = |
| |
| [ldap] |
| # set this to ldaps://<your.ldap.server>:<port> |
| uri = |
| user_filter = objectClass=* |
| user_name_attr = uid |
| group_member_attr = memberOf |
| superuser_filter = |
| data_profiler_filter = |
| bind_user = cn=Manager,dc=example,dc=com |
| bind_password = insecure |
| basedn = dc=example,dc=com |
| cacert = /etc/ca/ldap_ca.crt |
| search_scope = LEVEL |
| |
| [mesos] |
| # Mesos master address which MesosExecutor will connect to. |
| master = localhost:5050 |
| |
| # The framework name which Airflow scheduler will register itself as on mesos |
| framework_name = Airflow |
| |
| # Number of cpu cores required for running one task instance using |
| # 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>' |
| # command on a mesos slave |
| task_cpu = 1 |
| |
| # Memory in MB required for running one task instance using |
| # 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>' |
| # command on a mesos slave |
| task_memory = 256 |
| |
| # Enable framework checkpointing for mesos |
| # See http://mesos.apache.org/documentation/latest/slave-recovery/ |
| checkpoint = False |
| |
| # Failover timeout in milliseconds. |
| # When checkpointing is enabled and this option is set, Mesos waits |
| # until the configured timeout for |
| # the MesosExecutor framework to re-register after a failover. Mesos |
| # shuts down running tasks if the |
| # MesosExecutor framework fails to re-register within this timeframe. |
| # failover_timeout = 604800 |
| |
| # Enable framework authentication for mesos |
| # See http://mesos.apache.org/documentation/latest/configuration/ |
| authenticate = False |
| |
| # Mesos credentials, if authentication is enabled |
| # default_principal = admin |
| # default_secret = admin |
| |
| # Optional Docker Image to run on slave before running the command |
| # This image should be accessible from mesos slave i.e mesos slave |
| # should be able to pull this docker image before executing the command. |
| # docker_image_slave = puckel/docker-airflow |
| |
| [kerberos] |
| ccache = /tmp/airflow_krb5_ccache |
| # gets augmented with fqdn |
| principal = airflow |
| reinit_frequency = 3600 |
| kinit_path = kinit |
| keytab = airflow.keytab |
| |
| [cli] |
| api_client = airflow.api.client.json_client |
| endpoint_url = http://localhost:8080 |
| |
| [api] |
| auth_backend = airflow.api.auth.backend.default |
| |
| [github_enterprise] |
| api_rev = v3 |
| |
| [admin] |
| # UI to hide sensitive variable fields when set to True |
| hide_sensitive_variable_fields = True |
| |
| [elasticsearch] |
| elasticsearch_host = |