From 0fb4045a2c5bd9dfca2e0845ad55e3bfb51c5452 Mon Sep 17 00:00:00 2001 From: Mohamed Mehdi BEN AISSA Date: Fri, 28 Jun 2024 10:40:06 +0200 Subject: [PATCH] feat(spark2|3): add ha support for spark-hs --- playbooks/spark3_kerberos_install.yml | 12 ++++++- playbooks/spark_kerberos_install.yml | 11 +++++++ .../tasks/kerberos-spnego-ha.yml | 31 +++++++++++++++++++ tdp_vars_defaults/knox/knox.yml | 6 ++-- tdp_vars_defaults/spark/spark.yml | 5 +-- tdp_vars_defaults/spark3/spark3.yml | 5 +-- tdp_vars_defaults/tdp-cluster/tdp-cluster.yml | 2 ++ topology.ini | 4 +++ 8 files changed, 69 insertions(+), 7 deletions(-) create mode 100644 roles/spark/historyserver/tasks/kerberos-spnego-ha.yml diff --git a/playbooks/spark3_kerberos_install.yml b/playbooks/spark3_kerberos_install.yml index b229fd9d..ce679ab6 100644 --- a/playbooks/spark3_kerberos_install.yml +++ b/playbooks/spark3_kerberos_install.yml @@ -13,7 +13,17 @@ name: tosit.tdp.spark.historyserver tasks_from: kerberos - ansible.builtin.meta: clear_facts # noqa unnamed-task - +- name: Kerberos Spark3 HistoryServer install + hosts: spnego_ha + strategy: linear + tasks: + - tosit.tdp.resolve: # noqa unnamed-task + node_name: spark3_kerberos + - name: Install Spark3 History Server Kerberos + ansible.builtin.import_role: + name: tosit.tdp.spark.historyserver + tasks_from: kerberos-spnego-ha + - ansible.builtin.meta: clear_facts # noqa unnamed-task - name: Spark3 Kerberos Client install hosts: spark3_client strategy: linear diff --git a/playbooks/spark_kerberos_install.yml b/playbooks/spark_kerberos_install.yml index b0de301f..29e87589 100644 --- a/playbooks/spark_kerberos_install.yml +++ b/playbooks/spark_kerberos_install.yml @@ -13,6 +13,17 @@ name: tosit.tdp.spark.historyserver tasks_from: kerberos - ansible.builtin.meta: clear_facts # noqa unnamed-task +- name: Kerberos Spark HistoryServer install + hosts: spnego_ha + strategy: linear + tasks: + - tosit.tdp.resolve: # noqa unnamed-task + node_name: spark_kerberos + - name: Install Spark History Spnego HA Kerberos + ansible.builtin.import_role: + name: tosit.tdp.spark.historyserver + tasks_from: kerberos-spnego-ha + - ansible.builtin.meta: clear_facts # noqa unnamed-task - name: Spark Kerberos Client install hosts: spark_client strategy: linear diff --git a/roles/spark/historyserver/tasks/kerberos-spnego-ha.yml b/roles/spark/historyserver/tasks/kerberos-spnego-ha.yml new file mode 100644 index 00000000..d5ebb286 --- /dev/null +++ b/roles/spark/historyserver/tasks/kerberos-spnego-ha.yml @@ -0,0 +1,31 @@ +# Copyright 2022 TOSIT.IO +# SPDX-License-Identifier: Apache-2.0 + +--- +- name: Spark HS keytabs creation + when: (krb_create_principals_keytabs) and (spark_hs_ha_address is defined) + block: + - name: Ensure HTTP HA spnego user's principal and keytab exist + ansible.builtin.import_role: + name: tosit.tdp.utils.kerberos + tasks_from: create_headless_principal_keytab + vars: + principal: HTTP/{{ spark_hs_ha_address | urlsplit("hostname") }} + keytab: '{{ spark_hs_ha_address | urlsplit("hostname") }}.service.keytab' + user: root + group: "{{ hadoop_group }}" + mode: "0640" + +- name: Spark HS keytabs check + when: (not krb_create_principals_keytabs) and (spark_hs_ha_address is defined) + block: + - name: Ensure HA HTTP spnego's keytab is working + ansible.builtin.import_role: + name: tosit.tdp.utils.kerberos + tasks_from: check_secure_keytab + vars: + principal: HTTP/{{ spark_hs_ha_address | urlsplit("hostname") }} + keytab: '{{ spark_hs_ha_address | urlsplit("hostname") }}.service.keytab' + user: root + group: "{{ hadoop_group }}" + mode: "640" diff --git a/tdp_vars_defaults/knox/knox.yml b/tdp_vars_defaults/knox/knox.yml index 1d2d2c9b..7d105e2e 100644 --- a/tdp_vars_defaults/knox/knox.yml +++ b/tdp_vars_defaults/knox/knox.yml @@ -157,11 +157,13 @@ tdpldap_services: location: /ws port: "{{ yarn_rm_https_port }}" SPARKHISTORYUI: - hosts: "{{ groups['spark_hs'] | default([]) | map('tosit.tdp.access_fqdn', hostvars) | list }}" + hosts: "{% if spark2_hs_ha_address is defined %}{{ spark2_hs_ha_address | urlsplit('hostname') | split(' ') | list }}{% else %}{{ groups['spark_hs'] | default([]) | map('tosit.tdp.access_fqdn', hostvars) | list }}{% endif %}" port: "{{ spark_hs_https_port }}" + scheme: "{% if spark2_hs_ha_address is defined %}{{ spark2_hs_ha_address | urlsplit('scheme') }}://{% endif %}" SPARK3HISTORYUI: - hosts: "{{ groups['spark3_hs'] | default([]) | map('tosit.tdp.access_fqdn', hostvars) | list }}" + hosts: "{% if spark3_hs_ha_address is defined %}{{ spark3_hs_ha_address | urlsplit('hostname') | split(' ') | list }}{% else %}{{ groups['spark3_hs'] | default([]) | map('tosit.tdp.access_fqdn', hostvars) | list }}{% endif %}" port: "{{ spark3_hs_https_port}}" + scheme: "{% if spark3_hs_ha_address is defined %}{{ spark3_hs_ha_address | urlsplit('scheme') }}://{% endif %}" WEBHBASE: hosts: "{{ groups['hbase_rest'] | default([]) | map('tosit.tdp.access_fqdn', hostvars) | list }}" port: "{{ hbase_rest_client_port }}" diff --git a/tdp_vars_defaults/spark/spark.yml b/tdp_vars_defaults/spark/spark.yml index 84b926ac..4a5862e1 100644 --- a/tdp_vars_defaults/spark/spark.yml +++ b/tdp_vars_defaults/spark/spark.yml @@ -70,8 +70,9 @@ spark_truststore_location: /etc/ssl/certs/truststore.jks spark_truststore_password: Truststore123! # Spark History Server kerberos -spark_ui_spnego_principal: "HTTP/{{ ansible_fqdn }}@{{ realm }}" -spark_ui_spnego_keytab: /etc/security/keytabs/spnego.service.keytab +spark_hs_ha_address: "{% if spark2_hs_ha_address is defined %}{{ spark2_hs_ha_address }}{% endif %}" +spark_ui_spnego_principal: HTTP/{% if spark_hs_ha_address != "" %}{{ spark_hs_ha_address | urlsplit("hostname") }}{% else %}{{ ansible_fqdn }}{% endif %}@{{ realm }} +spark_ui_spnego_keytab: /etc/security/keytabs/{% if spark_hs_ha_address != "" %}{{ spark_hs_ha_address | urlsplit("hostname") }}.service.keytab{% else %}spnego.service.keytab{% endif %} # spark-defaults.conf - common spark_defaults_common: diff --git a/tdp_vars_defaults/spark3/spark3.yml b/tdp_vars_defaults/spark3/spark3.yml index 88a3b704..9f35e5b0 100644 --- a/tdp_vars_defaults/spark3/spark3.yml +++ b/tdp_vars_defaults/spark3/spark3.yml @@ -71,8 +71,9 @@ hadoop_credentials_properties: value: '{{ spark_keystore_password }}' # Spark History Server kerberos -spark_ui_spnego_principal: "HTTP/{{ ansible_fqdn }}@{{ realm }}" -spark_ui_spnego_keytab: /etc/security/keytabs/spnego.service.keytab +spark_hs_ha_address: "{% if spark3_hs_ha_address is defined %}{{ spark3_hs_ha_address }}{% endif %}" +spark_ui_spnego_principal: HTTP/{% if spark_hs_ha_address != "" %}{{ spark_hs_ha_address | urlsplit("hostname") }}{% else %}{{ ansible_fqdn }}{% endif %}@{{ realm }} +spark_ui_spnego_keytab: /etc/security/keytabs/{% if spark_hs_ha_address != "" %}{{ spark_hs_ha_address | urlsplit("hostname") }}.service.keytab{% else %}spnego.service.keytab{% endif %} # spark-defaults.conf - common spark_defaults_common: diff --git a/tdp_vars_defaults/tdp-cluster/tdp-cluster.yml b/tdp_vars_defaults/tdp-cluster/tdp-cluster.yml index 8aaf4c7d..cacfb351 100644 --- a/tdp_vars_defaults/tdp-cluster/tdp-cluster.yml +++ b/tdp_vars_defaults/tdp-cluster/tdp-cluster.yml @@ -252,3 +252,5 @@ ldap: ############################# # ranger_ha_address: "http[s]://dns_alias:port" +# spark2_hs_ha_address: "http[s]://dns_alias:port" +# spark3_hs_ha_address: "http[s]://dns_alias:port" diff --git a/topology.ini b/topology.ini index 709ee77d..33d17ef4 100644 --- a/topology.ini +++ b/topology.ini @@ -97,12 +97,14 @@ master3 edge [spark_hs:children] +master2 master3 [spark_client:children] edge [spark3_hs:children] +master2 master3 [spark3_client:children] @@ -113,6 +115,8 @@ edge [spnego_ha:children] ranger_admin +spark_hs +spark3_hs # Section Postgresql_client from tdp_prerequisites [postgresql_client:children]