Skip to content

Commit

Permalink
feat: node decommissioning
Browse files Browse the repository at this point in the history
  • Loading branch information
SteBaum committed Aug 16, 2024
1 parent 99b0d0d commit bfe540b
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 0 deletions.
15 changes: 15 additions & 0 deletions playbooks/decommission/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Node decommissioning

- First decommission the HBase regionserver of the node for example:

```sh
ansible-playbook tdp/playbooks/meta/hbase-decommission.yaml -l worker-01
```

- If you want to update and refreh the queue of the Yarn capacity scheduler, change the variables in the Yarn variables file. Then execute the playbook hadoop-decommission which will effectively update and refreh the queue of the yarn capacity scheduler, decommission the Yarn nodemanager and decommission the HDFS datanode by adding the its FQDN to the variable `excluded_node_fqdn`, for example:

```sh
ansible-playbook tdp/playbooks/meta/decommission.yml -e "excluded_node_fqdn=worker-01.novalocal"
```

*NB*: the decommissioning of the HDFS datanode can take several hours depending on the size of the file system.
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright 2022 TOSIT.IO
# SPDX-License-Identifier: Apache-2.0

---
- name: Hadoop HDFS datanode Decommission
hosts: hdfs_nn
tasks:
- tosit.tdp.resolve: # noqa unnamed-task
node_name: hdfs_namenode
- name: Decommission HDFS datanode
ansible.builtin.import_role:
name: tosit.tdp.hdfs.namenode
tasks_from: decommission
- ansible.builtin.meta: clear_facts # noqa unnamed-task
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright 2022 TOSIT.IO
# SPDX-License-Identifier: Apache-2.0

---
- name: Hadoop Yarn resourcemanager decommission
hosts: yarn_rm
tasks:
- tosit.tdp.resolve: # noqa unnamed-task
node_name: yarn_resourcemanager
- name: Decommision YARN NM
ansible.builtin.import_role:
name: tosit.tdp.yarn.resourcemanager
tasks_from: decommission
- ansible.builtin.meta: clear_facts # noqa unnamed-task
11 changes: 11 additions & 0 deletions playbooks/decommission/hadoop-decommission.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright 2022 TOSIT.IO
# SPDX-License-Identifier: Apache-2.0

---
# Add variable of the decommissioned node FQDN example:
# ansible-playbook tdp/playbooks/decommission/decommission.yml -e "excluded_node_fqdn=worker-01.novalocal"
- ansible.builtin.import_playbook: ../utils/yarn_capacity_scheduler.yml
- ansible.builtin.import_playbook: hadoop-components-decommission/yarn_resourcemanager_decomm_nodemanager.yml
# Decommission Yarn nodemanager
- ansible.builtin.import_playbook: hadoop-components-decommission/hdfs_namenode_decomm_datanode.yml
# Decommission HDFS namenode
8 changes: 8 additions & 0 deletions playbooks/decommission/hbase-decommission.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Copyright 2022 TOSIT.IO
# SPDX-License-Identifier: Apache-2.0

---
# Add the host of the regionserver which should be decommissioned example:
# ansible-playbook tdpplaybooks/decommission/hbase-decommission.yaml -l worker-01
- ansible.builtin.import_playbook: ../hbase_regionserver_stop.yml
# Decommission HBase regionserver
35 changes: 35 additions & 0 deletions roles/hdfs/namenode/tasks/decommission.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright 2022 TOSIT.IO
# SPDX-License-Identifier: Apache-2.0

---
- name: Render dfs.exclude file
ansible.builtin.template:
src: dfs.exclude.j2
dest: "{{ hdfs_site['dfs.hosts.exclude'] }}"
owner: root
group: root
mode: "644"

- name: Update exlude nodes file
ansible.builtin.lineinfile:
path: /etc/hadoop/conf.nn/dfs.exclude
line: "{{ item }}"
state: present
loop: "{{ [excluded_node_fqdn] }}"

- name: kinit hdfs NN

Check warning on line 20 in roles/hdfs/namenode/tasks/decommission.yml

View workflow job for this annotation

GitHub Actions / ansible-lint

no-changed-when

Commands should not change things if nothing needs doing.
ansible.builtin.command: kinit -kt /etc/security/keytabs/nn.service.keytab nn/{{ ansible_fqdn }}@{{ realm }}
become_user: hdfs

- name: RefreshNodes

Check warning on line 24 in roles/hdfs/namenode/tasks/decommission.yml

View workflow job for this annotation

GitHub Actions / ansible-lint

no-changed-when

Commands should not change things if nothing needs doing.
ansible.builtin.command: /usr/bin/hdfs dfsadmin -refreshNodes
become_user: hdfs

- name: Check node status

Check warning on line 28 in roles/hdfs/namenode/tasks/decommission.yml

View workflow job for this annotation

GitHub Actions / ansible-lint

no-changed-when

Commands should not change things if nothing needs doing.
ansible.builtin.command: hdfs dfsadmin -report -decommissioning
register: hdfs_output
become_user: hdfs

- name: Print output of node status
ansible.builtin.debug:
var: hdfs_output.stdout
35 changes: 35 additions & 0 deletions roles/yarn/resourcemanager/tasks/decommission.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright 2022 TOSIT.IO
# SPDX-License-Identifier: Apache-2.0

---
- name: Render yarn.exclude file
ansible.builtin.template:
src: yarn.exclude.j2
dest: "{{ yarn_site['yarn.resourcemanager.nodes.exclude-path'] }}"
owner: root
group: root
mode: "644"

- name: Update exlude nodes file
ansible.builtin.lineinfile:
path: /etc/hadoop/conf.rm/yarn.exclude
line: "{{ item }}"
state: present
loop: "{{ [excluded_node_fqdn] }}"

- name: kinit yarn RM
ansible.builtin.command: kinit -kt /etc/security/keytabs/rm.service.keytab rm/{{ ansible_fqdn }}@{{ realm }}
become_user: yarn

- name: RefreshNodes
ansible.builtin.command: /usr/bin/yarn rmadmin -refreshNodes
become_user: yarn

- name: Check node status
ansible.builtin.command: yarn node -list -all
register: yarn_output
become_user: yarn

- name: Print output of node status
ansible.builtin.debug:
var: yarn_output.stdout

0 comments on commit bfe540b

Please sign in to comment.