Skip to content

Commit

Permalink
airflow: 1st commit
Browse files Browse the repository at this point in the history
  • Loading branch information
leopaul36 committed Dec 21, 2021
1 parent 81665f4 commit 352edc3
Show file tree
Hide file tree
Showing 17 changed files with 2,153 additions and 0 deletions.
55 changes: 55 additions & 0 deletions galaxy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
### REQUIRED

# The namespace of the collection. This can be a company/brand/organization or product namespace under which all
# content lives. May only contain alphanumeric lowercase characters and underscores. Namespaces cannot start with
# underscores or numbers and cannot contain consecutive underscores
namespace: tosit

# The name of the collection. Has the same character restrictions as 'namespace'
name: tdp_extra

# The version of the collection. Must be compatible with semantic versioning
version: 0.0.1

# The path to the Markdown (.md) readme file. This path is relative to the root of the collection
readme: README.md

# A list of the collection's content authors. Can be just the name or in the format 'Full Name <email> (url)
# @nicks:irc/im.site#channel'
authors: []


### OPTIONAL but strongly recommended

# A short summary description of the collection
description: Tosit Data Platform (TDP) extra collection

# Either a single license or a list of licenses for content inside of a collection. Ansible Galaxy currently only
# accepts L(SPDX,https://spdx.org/licenses/) licenses. This key is mutually exclusive with 'license_file'
license: []

# The path to the license file for the collection. This path is relative to the root of the collection. This key is
# mutually exclusive with 'license'
license_file: ''

# A list of tags you want to associate with the collection for indexing/searching. A tag name has the same character
# requirements as 'namespace' and 'name'
tags: []

# Collections that this collection requires to be installed for it to be usable. The key of the dict is the
# collection label 'namespace.name'. The value is a version range
# L(specifiers,https://python-semanticversion.readthedocs.io/en/latest/#requirement-specification). Multiple version
# range specifiers can be set and are separated by ','
dependencies: {}

# The URL of the originating SCM repository
repository: ''

# The URL to any online docs
documentation: ''

# The URL to the homepage of the collection/project
homepage: ''

# The URL to the collection issue tracker
issues: ''
6 changes: 6 additions & 0 deletions playbooks/airflow.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
- name: "Deploy Airflow"
hosts: airflow
tasks:
- import_role:
name: tosit.tdp_extra.airflow
41 changes: 41 additions & 0 deletions roles/airflow/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Ansible Airflow TDP Extra

This role deploys the Apache Airflow release. It installs:

- Airflow Webserver
- Airflow Scheduler
- Airflow Worker (Celery + Flower)

Currently the role only supports the deployment of SSL-enabled Airflow.

## Prerequisites

- `python3` and `python3-pip` installed on all nodes
- Hadoop TDP release .tar.gz (`hadoop_dist_file` role variable) file available in `files`
- Groups `airflow_webserver`, `airflow_executor` defined in the Ansible hosts file
- Certificate files `{{ fqdn }}.key` and `{{ fqdn }}.pem` for every node available in `files`
- Admin access to a KDC with the `realm`, `kadmin_principal` and `kadmin_password` role vars provided

## Example

The following hosts file and playbook are given as examples.

### Host file

```
[airflow_webserver]
tdp-master-1
[airflow_executor]
tdp-worker-1
```

### Required variables

- `realm`: Kerberos realm of the cluster
- `kadmin_principal`: admin principal used to connect kadmin service
- `kadmin_password`: passowrd of the admin principal

### Example playbooks

- [airflow.yml](../../playbooks/airflow.yaml)
34 changes: 34 additions & 0 deletions roles/airflow/defaults/main.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
---
# Airflow Version
airflow_version: 2.2.2

# Airflow installation directory
airflow_root_dir: /opt/tdp
airflow_install_dir: "{{ airflow_root_dir }}/airflow"

# Airflow users and group
airflow_user: airflow
airflow_group: airflow

# Airflow pid directories
airflow_pid_dir: /run/airflow

# Airflow database
airflow_db_user: airflow
airflow_db_pass: airflow123
airflow_db_host: tdp-db-1.lxd
airflow_db_port: 5432
airflow_db_name: airflow_db
aitflow_sql_alchemy_conn: "postgresql+psycopg2://{{ airflow_db_user }}:{{ airflow_db_pass }}@{{ airflow_db_host }}:{{ airflow_db_port }}/{{ airflow_db_name }}"

# Airflow admin user
airflow_admin_user: admin
airflow_admin_pass: admin123

# Airflow log dir
airflow_log_dir: /var/log/airflow

# Airflow config
airflow_executor: CeleryExecutor
airflow_result_backend: "db+postgresql://{{ airflow_db_user }}:{{ airflow_db_pass }}@{{ airflow_db_host }}:{{ airflow_db_port }}/{{ airflow_db_name }}"
airflow_broker_url: "redis://tdp-airflow-w-1.lxd:6379/0"
72 changes: 72 additions & 0 deletions roles/airflow/tasks/airflow.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
---
- name: Add Airflow group
group:
name: "{{ airflow_group }}"

- name: Add Airflow users
user:
name: "{{ airflow_user }}"
group: "{{ airflow_group }}"

- name: Ensures {{ airflow_root_dir }} exists
file:
path: "{{ airflow_root_dir }}"
state: directory

- name: Ensures {{ airflow_root_dir }}/airflow-{{ airflow_version }} exists
file:
path: "{{ airflow_root_dir }}/airflow-{{ airflow_version }}"
state: directory

- name: Ensures log directory
file:
path: '{{ airflow_log_dir }}'
state: directory
group: '{{ airflow_group }}'
owner: '{{ airflow_user }}'

- name: Create directory for pid
file:
path: '{{ airflow_pid_dir }}'
state: directory
group: '{{ airflow_group }}'
owner: '{{ airflow_user }}'

- name: Template airflow tmpfiles.d
template:
src: tmpfiles-airflow-webserver.conf.j2
dest: /etc/tmpfiles.d/airflow-webserver.conf

- name: Create symbolic link to Airflow installation
file:
src: "{{ airflow_root_dir }}/airflow-{{ airflow_version }}"
dest: "{{ airflow_install_dir }}"
state: link

- name: Template Constraint file
template:
src: constraints-3.6.txt.j2
dest: /tmp/constraints-3.6.txt

- name: Pip install airflow
pip:
name: "apache-airflow=={{ airflow_version }}"
executable: pip3
extra_args: --constraint /tmp/constraints-3.6.txt

- name: Pip install celery
pip:
name: "celery"
executable: pip3
extra_args: --constraint /tmp/constraints-3.6.txt

- name: Pip install redis
pip:
name: "redis"
executable: pip3
extra_args: --constraint /tmp/constraints-3.6.txt

- name: Template airflow.cfg file
template:
src: airflow.cfg.j2
dest: "{{ airflow_install_dir }}/airflow.cfg"
16 changes: 16 additions & 0 deletions roles/airflow/tasks/executor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
- name: Pip install flower
pip:
name: "flower"
executable: pip3
extra_args: --constraint /tmp/constraints-3.6.txt

- name: Template Airflow worker service file
template:
src: airflow-worker.service.j2
dest: /usr/lib/systemd/system/airflow-worker.service

- name: Template Airflow flower service file
template:
src: airflow-flower.service.j2
dest: /usr/lib/systemd/system/airflow-flower.service
11 changes: 11 additions & 0 deletions roles/airflow/tasks/main.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
- import_tasks: airflow.yaml

- import_tasks: webserver.yaml
when: "'airflow_webserver' in group_names"

- import_tasks: scheduler.yaml
when: "'airflow_webserver' in group_names"

- import_tasks: executor.yaml
when: "'airflow_executor' in group_names"
5 changes: 5 additions & 0 deletions roles/airflow/tasks/scheduler.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
- name: Template Airflow scheduler service file
template:
src: airflow-scheduler.service.j2
dest: /usr/lib/systemd/system/airflow-scheduler.service
27 changes: 27 additions & 0 deletions roles/airflow/tasks/webserver.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
---
- name: Airflow Database Init
shell: airflow db init
environment:
AIRFLOW_HOME: "{{ airflow_install_dir }}"

- name: Create Airflow admin user
shell: |
airflow users create \
--username {{ airflow_admin_user }} \
--password {{ airflow_admin_pass }} \
--role Admin \
--firstname admin \
--lastname admin \
--email admin@tdp
environment:
AIRFLOW_HOME: "{{ airflow_install_dir }}"

- name: Template webserver_config.py file
template:
src: webserver_config.py.j2
dest: "{{ airflow_install_dir }}/webserver_config.py"

- name: Template Airflow webserver service file
template:
src: airflow-webserver.service.j2
dest: /usr/lib/systemd/system/airflow-webserver.service
33 changes: 33 additions & 0 deletions roles/airflow/templates/airflow-flower.service.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[Unit]
Description=Airflow celery flower
After=network.target

[Service]
Environment="AIRFLOW_HOME={{ airflow_install_dir }}"
User={{ airflow_user }}
Group={{ airflow_group }}
Type=simple
ExecStart=/usr/local/bin/airflow celery flower --pid {{ airflow_pid_dir }}/flower.pid
Restart=on-failure
RestartSec=5s

[Install]
WantedBy=multi-user.target
33 changes: 33 additions & 0 deletions roles/airflow/templates/airflow-scheduler.service.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[Unit]
Description=Airflow scheduler daemon
After=network.target

[Service]
Environment="AIRFLOW_HOME={{ airflow_install_dir }}"
User={{ airflow_user }}
Group={{ airflow_group }}
Type=simple
ExecStart=/usr/local/bin/airflow scheduler --pid {{ airflow_pid_dir }}/scheduler.pid
Restart=always
RestartSec=5s

[Install]
WantedBy=multi-user.target
34 changes: 34 additions & 0 deletions roles/airflow/templates/airflow-webserver.service.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[Unit]
Description=Airflow webserver daemon
After=network.target

[Service]
Environment="AIRFLOW_HOME={{ airflow_install_dir }}"
User={{ airflow_user }}
Group={{ airflow_group }}
Type=simple
ExecStart=/usr/local/bin/airflow webserver --pid {{ airflow_pid_dir }}/webserver.pid
Restart=on-failure
RestartSec=5s
PrivateTmp=true

[Install]
WantedBy=multi-user.target
Loading

0 comments on commit 352edc3

Please sign in to comment.