diff --git a/spark-rapids/mig.sh b/spark-rapids/mig.sh index 85300348d..6d0874ff3 100644 --- a/spark-rapids/mig.sh +++ b/spark-rapids/mig.sh @@ -26,34 +26,207 @@ set -euxo pipefail +function os_id() { + grep '^ID=' /etc/os-release | cut -d= -f2 | xargs +} + +function os_version() { + grep '^VERSION_ID=' /etc/os-release | cut -d= -f2 | xargs +} + +function is_debian() { + [[ "$(os_id)" == 'debian' ]] +} + +function is_debian10() { + is_debian && [[ "$(os_version)" == '10'* ]] +} + +function is_debian11() { + is_debian && [[ "$(os_version)" == '11'* ]] +} + +function is_debian12() { + is_debian && [[ "$(os_version)" == '12'* ]] +} + +function is_ubuntu() { + [[ "$(os_id)" == 'ubuntu' ]] +} + +function is_ubuntu18() { + is_ubuntu && [[ "$(os_version)" == '18.04'* ]] +} + +function is_ubuntu20() { + is_ubuntu && [[ "$(os_version)" == '20.04'* ]] +} + +function is_ubuntu22() { + is_ubuntu && [[ "$(os_version)" == '22.04'* ]] +} + +function is_rocky() { + [[ "$(os_id)" == 'rocky' ]] +} + +function is_rocky8() { + is_rocky && [[ "$(os_version)" == '8'* ]] +} + +function is_rocky9() { + is_rocky && [[ "$(os_version)" == '9'* ]] +} + +function os_vercat() { + if is_ubuntu ; then + os_version | sed -e 's/[^0-9]//g' + elif is_rocky ; then + os_version | sed -e 's/[^0-9].*$//g' + else + os_version + fi +} + function get_metadata_attribute() { local -r attribute_name=$1 - local -r default_value=$2 + local -r default_value="${2:-}" /usr/share/google/get_metadata_value "attributes/${attribute_name}" || echo -n "${default_value}" } +CA_TMPDIR="$(mktemp -u -d -p /run/tmp -t ca_dir-XXXX)" +PSN="$(get_metadata_attribute private_secret_name)" +readonly PSN +function configure_dkms_certs() { + if [[ -z "${PSN}" ]]; then + echo "No signing secret provided. skipping"; + return 0 + fi + + mkdir -p "${CA_TMPDIR}" + + # If the private key exists, verify it + if [[ -f "${CA_TMPDIR}/db.rsa" ]]; then + echo "Private key material exists" + + local expected_modulus_md5sum + expected_modulus_md5sum=$(get_metadata_attribute cert_modulus_md5sum) + if [[ -n "${expected_modulus_md5sum}" ]]; then + modulus_md5sum="${expected_modulus_md5sum}" + else + modulus_md5sum="bd40cf5905c7bba4225d330136fdbfd3" + fi + + # Verify that cert md5sum matches expected md5sum + if [[ "${modulus_md5sum}" != "$(openssl rsa -noout -modulus -in \"${CA_TMPDIR}/db.rsa\" | openssl md5 | awk '{print $2}')" ]]; then + echo "unmatched rsa key modulus" + fi + ln -sf "${CA_TMPDIR}/db.rsa" /var/lib/dkms/mok.key + + # Verify that key md5sum matches expected md5sum + if [[ "${modulus_md5sum}" != "$(openssl x509 -noout -modulus -in /var/lib/dkms/mok.pub | openssl md5 | awk '{print $2}')" ]]; then + echo "unmatched x509 cert modulus" + fi + + return + fi + + + # Retrieve cloud secrets keys + local sig_priv_secret_name + sig_priv_secret_name="${PSN}" + local sig_pub_secret_name + sig_pub_secret_name="$(get_metadata_attribute public_secret_name)" + local sig_secret_project + sig_secret_project="$(get_metadata_attribute secret_project)" + local sig_secret_version + sig_secret_version="$(get_metadata_attribute secret_version)" + + # If metadata values are not set, do not write mok keys + if [[ -z "${sig_priv_secret_name}" ]]; then return 0 ; fi + + # Write private material to volatile storage + gcloud secrets versions access "${sig_secret_version}" \ + --project="${sig_secret_project}" \ + --secret="${sig_priv_secret_name}" \ + | dd status=none of="${CA_TMPDIR}/db.rsa" + + # Write public material to volatile storage + gcloud secrets versions access "${sig_secret_version}" \ + --project="${sig_secret_project}" \ + --secret="${sig_pub_secret_name}" \ + | base64 --decode \ + | dd status=none of="${CA_TMPDIR}/db.der" + + # symlink private key and copy public cert from volatile storage for DKMS + if is_ubuntu ; then + mkdir -p /var/lib/shim-signed/mok + ln -sf "${CA_TMPDIR}/db.rsa" /var/lib/shim-signed/mok/MOK.priv + cp -f "${CA_TMPDIR}/db.der" /var/lib/shim-signed/mok/MOK.der + else + mkdir -p /var/lib/dkms/ + ln -sf "${CA_TMPDIR}/db.rsa" /var/lib/dkms/mok.key + cp -f "${CA_TMPDIR}/db.der" /var/lib/dkms/mok.pub + fi +} + +function clear_dkms_key { + if [[ -z "${PSN}" ]]; then + echo "No signing secret provided. skipping" >2 + return 0 + fi + echo "WARN -- PURGING SIGNING MATERIAL -- WARN" >2 + echo "future dkms runs will not use correct signing key" >2 + rm -rf "${CA_TMPDIR}" /var/lib/dkms/mok.key /var/lib/shim-signed/mok/MOK.priv +} + +function add_contrib_components() { + if ! is_debian ; then + return + fi + if is_debian12 ; then + # Include in sources file components on which nvidia-open-kernel-dkms depends + local -r debian_sources="/etc/apt/sources.list.d/debian.sources" + local components="main contrib" + + sed -i -e "s/Components: .*$/Components: ${components}/" "${debian_sources}" + elif is_debian ; then + sed -i -e 's/ main$/ main contrib/' /etc/apt/sources.list + fi +} + +# Short name for nvidia urls +if is_rocky ; then + shortname="$(os_id | sed -e 's/rocky/rhel/')$(os_vercat)" +else + shortname="$(os_id)$(os_vercat)" +fi +readonly shortname + +# Detect dataproc image version from its various names +if (! test -v DATAPROC_IMAGE_VERSION) && test -v DATAPROC_VERSION; then + DATAPROC_IMAGE_VERSION="${DATAPROC_VERSION}" +fi + # Fetch Linux Family distro and Dataproc Image version readonly OS_NAME=$(lsb_release -is | tr '[:upper:]' '[:lower:]') -readonly ROLE="$(/usr/share/google/get_metadata_value attributes/dataproc-role)" -DATAPROC_IMAGE_VERSION=$(/usr/share/google/get_metadata_value image|grep -Eo 'dataproc-[0-9]-[0-9]'|grep -Eo '[0-9]-[0-9]'|sed -e 's/-/./g') -echo "${DATAPROC_IMAGE_VERSION}" >> /usr/local/share/startup-mig-log # CUDA version and Driver version config -CUDA_VERSION=$(get_metadata_attribute 'cuda-version' '12.2.2') #12.2.2 -NVIDIA_DRIVER_VERSION=$(get_metadata_attribute 'driver-version' '535.104.05') #535.104.05 +CUDA_VERSION=$(get_metadata_attribute 'cuda-version' '12.4.1') #12.2.2 +NVIDIA_DRIVER_VERSION=$(get_metadata_attribute 'driver-version' '550.54.15') #535.104.05 CUDA_VERSION_MAJOR="${CUDA_VERSION%.*}" #12.2 +# EXCEPTIONS # Change CUDA version for Ubuntu 18 (Cuda 12.1.1 - Driver v530.30.02 is the latest version supported by Ubuntu 18) if [[ "${OS_NAME}" == "ubuntu" ]]; then - UBUNTU_VERSION=$(lsb_release -r | awk '{print $2}') # 20.04 - UBUNTU_VERSION=${UBUNTU_VERSION%.*} - if [[ "${UBUNTU_VERSION}" == "18" ]]; then + if is_ubuntu18 ; then CUDA_VERSION=$(get_metadata_attribute 'cuda-version' '12.1.1') #12.1.1 NVIDIA_DRIVER_VERSION=$(get_metadata_attribute 'driver-version' '530.30.02') #530.30.02 CUDA_VERSION_MAJOR="${CUDA_VERSION%.*}" #12.1 fi fi +# Verify Secure boot SECURE_BOOT="disabled" SECURE_BOOT=$(mokutil --sb-state|awk '{print $2}') @@ -92,74 +265,168 @@ EOF systemctl enable --now install-headers.service } +readonly NVIDIA_BASE_DL_URL='https://developer.download.nvidia.com/compute' +readonly NVIDIA_REPO_URL="${NVIDIA_BASE_DL_URL}/cuda/repos/${shortname}/x86_64" + +# Hold all NVIDIA-related packages from upgrading unintenionally or services like unattended-upgrades +# Users should run apt-mark unhold before they wish to upgrade these packages +function hold_nvidia_packages() { + apt-mark hold nvidia-* + apt-mark hold libnvidia-* + if dpkg -l | grep -q "xserver-xorg-video-nvidia"; then + apt-mark hold xserver-xorg-video-nvidia* + fi +} + # Install NVIDIA GPU driver provided by NVIDIA function install_nvidia_gpu_driver() { ## common steps for all linux family distros readonly NVIDIA_DRIVER_VERSION_PREFIX=${NVIDIA_DRIVER_VERSION%%.*} - ## installation steps based OS_NAME - if [[ ${OS_NAME} == "debian" ]]; then + ## For Debian & Ubuntu + readonly LOCAL_INSTALLER_DEB="cuda-repo-${shortname}-${CUDA_VERSION_MAJOR//./-}-local_${CUDA_VERSION}-${NVIDIA_DRIVER_VERSION}-1_amd64.deb" + readonly LOCAL_DEB_URL="${NVIDIA_BASE_DL_URL}/cuda/${CUDA_VERSION}/local_installers/${LOCAL_INSTALLER_DEB}" + readonly DIST_KEYRING_DIR="/var/cuda-repo-${shortname}-${CUDA_VERSION_MAJOR//./-}-local" + + ## installation steps based OS + if is_debian ; then - DEBIAN_VERSION=$(lsb_release -r|awk '{print $2}') # 10 or 11 export DEBIAN_FRONTEND=noninteractive execute_with_retries "apt-get install -y -q 'linux-headers-$(uname -r)'" - readonly LOCAL_INSTALLER_DEB="cuda-repo-debian${DEBIAN_VERSION}-${CUDA_VERSION_MAJOR//./-}-local_${CUDA_VERSION}-${NVIDIA_DRIVER_VERSION}-1_amd64.deb" curl -fsSL --retry-connrefused --retry 3 --retry-max-time 5 \ - "https://developer.download.nvidia.com/compute/cuda/${CUDA_VERSION}/local_installers/${LOCAL_INSTALLER_DEB}" -o /tmp/local-installer.deb + "${LOCAL_DEB_URL}" -o /tmp/local-installer.deb dpkg -i /tmp/local-installer.deb - cp /var/cuda-repo-debian${DEBIAN_VERSION}-${CUDA_VERSION_MAJOR//./-}-local/cuda-*-keyring.gpg /usr/share/keyrings/ - add-apt-repository contrib + rm /tmp/local-installer.deb + cp ${DIST_KEYRING_DIR}/cuda-*-keyring.gpg /usr/share/keyrings/ + + add_contrib_components + execute_with_retries "apt-get update" - if [[ ${DEBIAN_VERSION} == 10 ]]; then - apt remove -y libglvnd0 + ## EXCEPTION + if is_debian10 ; then + apt-get remove -y libglvnd0 + apt-get install -y ca-certificates-java fi - execute_with_retries "apt-get install -y -q --no-install-recommends cuda-drivers-${NVIDIA_DRIVER_VERSION_PREFIX}" - execute_with_retries "apt-get install -y -q --no-install-recommends cuda-toolkit-${CUDA_VERSION_MAJOR//./-}" + configure_dkms_certs + execute_with_retries "apt-get install -y -q nvidia-kernel-open-dkms" + clear_dkms_key + execute_with_retries \ + "apt-get install -y -q --no-install-recommends cuda-drivers-${NVIDIA_DRIVER_VERSION_PREFIX}" + execute_with_retries \ + "apt-get install -y -q --no-install-recommends cuda-toolkit-${CUDA_VERSION_MAJOR//./-}" + + modprobe nvidia # enable a systemd service that updates kernel headers after reboot setup_systemd_update_headers - - elif [[ ${OS_NAME} == "ubuntu" ]]; then + # prevent auto upgrading nvidia packages + hold_nvidia_packages - UBUNTU_VERSION=$(lsb_release -r|awk '{print $2}') # 20.04 or 22.04 - UBUNTU_VERSION=${UBUNTU_VERSION%.*} # 20 or 22 + elif is_ubuntu ; then execute_with_retries "apt-get install -y -q 'linux-headers-$(uname -r)'" - readonly UBUNTU_REPO_CUDA_PIN="https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}04/x86_64/cuda-ubuntu${UBUNTU_VERSION}04.pin" - curl -fsSL --retry-connrefused --retry 3 --retry-max-time 5 \ - "${UBUNTU_REPO_CUDA_PIN}" -o /etc/apt/preferences.d/cuda-repository-pin-600 - - readonly LOCAL_INSTALLER_DEB="cuda-repo-ubuntu${UBUNTU_VERSION}04-${CUDA_VERSION_MAJOR//./-}-local_${CUDA_VERSION}-${NVIDIA_DRIVER_VERSION}-1_amd64.deb" - curl -fsSL --retry-connrefused --retry 3 --retry-max-time 5 \ - "https://developer.download.nvidia.com/compute/cuda/${CUDA_VERSION}/local_installers/${LOCAL_INSTALLER_DEB}" -o /tmp/local-installer.deb + # Ubuntu 18.04 is not supported by new style NV debs; install from .run files + github + if is_ubuntu18 ; then + + # fetch .run file + curl -o driver.run \ + "https://download.nvidia.com/XFree86/Linux-x86_64/${NVIDIA_DRIVER_VERSION}/NVIDIA-Linux-x86_64-${NVIDIA_DRIVER_VERSION}.run" + # Install all but kernel driver + bash driver.run --no-kernel-modules --silent --install-libglvnd + rm driver.run + + WORKDIR=/opt/install-nvidia-driver + mkdir -p "${WORKDIR}" + pushd $_ + # Fetch open souce kernel module with corresponding tag + git clone https://github.com/NVIDIA/open-gpu-kernel-modules.git \ + --branch "${NVIDIA_DRIVER_VERSION}" --single-branch + cd ${WORKDIR}/open-gpu-kernel-modules + # + # build kernel modules + # + make -j$(nproc) modules \ + > /var/log/open-gpu-kernel-modules-build.log \ + 2> /var/log/open-gpu-kernel-modules-build_error.log + configure_dkms_certs + # sign + for module in $(find kernel-open -name '*.ko'); do + /lib/modules/$(uname -r)/build/scripts/sign-file sha256 \ + "${CA_TMPDIR}/db.rsa" \ + "${CA_TMPDIR}/db.der" \ + "${module}" + done + clear_dkms_key + # install + make modules_install \ + >> /var/log/open-gpu-kernel-modules-build.log \ + 2>> /var/log/open-gpu-kernel-modules-build_error.log + depmod -a + modprobe nvidia + popd + + # + # Install CUDA + # + cuda_runfile="cuda_${CUDA_VERSION}_${NVIDIA_DRIVER_VERSION}_linux.run" + curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ + "https://developer.download.nvidia.com/compute/cuda/${CUDA_VERSION}/local_installers/${cuda_runfile}" \ + -o cuda.run + bash cuda.run --silent --toolkit --no-opengl-libs + rm cuda.run + else + # Install from repo provided by NV + readonly UBUNTU_REPO_CUDA_PIN="${NVIDIA_REPO_URL}/cuda-${shortname}.pin" + + curl -fsSL --retry-connrefused --retry 3 --retry-max-time 5 \ + "${UBUNTU_REPO_CUDA_PIN}" -o /etc/apt/preferences.d/cuda-repository-pin-600 + + curl -fsSL --retry-connrefused --retry 3 --retry-max-time 5 \ + "${LOCAL_DEB_URL}" -o /tmp/local-installer.deb + + dpkg -i /tmp/local-installer.deb + rm /tmp/local-installer.deb + cp ${DIST_KEYRING_DIR}/cuda-*-keyring.gpg /usr/share/keyrings/ + execute_with_retries "apt-get update" + + execute_with_retries "apt-get install -y -q --no-install-recommends dkms" + configure_dkms_certs + for pkg in "nvidia-driver-${NVIDIA_DRIVER_VERSION_PREFIX}-open" \ + "cuda-drivers-${NVIDIA_DRIVER_VERSION_PREFIX}" \ + "cuda-toolkit-${CUDA_VERSION_MAJOR//./-}" ; do + execute_with_retries "apt-get install -y -q --no-install-recommends ${pkg}" + done + clear_dkms_key + + modprobe nvidia + fi - dpkg -i /tmp/local-installer.deb - cp /var/cuda-repo-ubuntu${UBUNTU_VERSION}04-${CUDA_VERSION_MAJOR//./-}-local/cuda-*-keyring.gpg /usr/share/keyrings/ - execute_with_retries "apt-get update" - - execute_with_retries "apt-get install -y -q --no-install-recommends cuda-drivers-${NVIDIA_DRIVER_VERSION_PREFIX}" - execute_with_retries "apt-get install -y -q --no-install-recommends cuda-toolkit-${CUDA_VERSION_MAJOR//./-}" # enable a systemd service that updates kernel headers after reboot setup_systemd_update_headers + # prevent auto upgrading nvidia packages + hold_nvidia_packages - elif [[ ${OS_NAME} == "rocky" ]]; then + elif is_rocky ; then - ROCKY_VERSION=$(lsb_release -r | awk '{print $2}') # 8.8 or 9.1 - ROCKY_VERSION=${ROCKY_VERSION%.*} # 8 or 9 + # Ensure the Correct Kernel Development Packages are Installed + execute_with_retries "dnf -y -q update --exclude=systemd*,kernel*" + execute_with_retries "dnf -y -q install pciutils kernel-devel gcc" - readonly NVIDIA_ROCKY_REPO_URL="https://developer.download.nvidia.com/compute/cuda/repos/rhel${ROCKY_VERSION}/x86_64/cuda-rhel${ROCKY_VERSION}.repo" + readonly NVIDIA_ROCKY_REPO_URL="${NVIDIA_REPO_URL}/cuda-${shortname}.repo" execute_with_retries "dnf config-manager --add-repo ${NVIDIA_ROCKY_REPO_URL}" execute_with_retries "dnf clean all" - execute_with_retries "dnf -y -q module install nvidia-driver:${NVIDIA_DRIVER_VERSION_PREFIX}" - execute_with_retries "dnf -y -q install cuda-toolkit-${CUDA_VERSION_MAJOR//./-}" + configure_dkms_certs + execute_with_retries "dnf -y -q module install nvidia-driver:latest-dkms" + clear_dkms_key + execute_with_retries "dnf -y -q install cuda-toolkit" modprobe nvidia else @@ -184,123 +451,53 @@ function configure_mig_cgi() { fi } -function upgrade_kernel() { - # Determine which kernel is installed - if [[ "${OS_NAME}" == "debian" ]]; then - CURRENT_KERNEL_VERSION=`cat /proc/version | perl -ne 'print( / Debian (\S+) / )'` - elif [[ "${OS_NAME}" == "ubuntu" ]]; then - CURRENT_KERNEL_VERSION=`cat /proc/version | perl -ne 'print( /^Linux version (\S+) / )'` - elif [[ ${OS_NAME} == rocky ]]; then - KERN_VER=$(yum info --installed kernel | awk '/^Version/ {print $3}') - KERN_REL=$(yum info --installed kernel | awk '/^Release/ {print $3}') - CURRENT_KERNEL_VERSION="${KERN_VER}-${KERN_REL}" - else - echo "unsupported OS: ${OS_NAME}!" - exit -1 - fi - - # Get latest version available in repos - if [[ "${OS_NAME}" == "debian" ]]; then - apt-get -qq update - TARGET_VERSION=$(apt-cache show --no-all-versions linux-image-amd64 | awk '/^Version/ {print $2}') - elif [[ "${OS_NAME}" == "ubuntu" ]]; then - apt-get -qq update - LATEST_VERSION=$(apt-cache show --no-all-versions linux-image-gcp | awk '/^Version/ {print $2}') - TARGET_VERSION=`echo ${LATEST_VERSION} | perl -ne 'printf(q{%s-%s-gcp},/(\d+\.\d+\.\d+)\.(\d+)/)'` - elif [[ "${OS_NAME}" == "rocky" ]]; then - if yum info --available kernel ; then - KERN_VER=$(yum info --available kernel | awk '/^Version/ {print $3}') - KERN_REL=$(yum info --available kernel | awk '/^Release/ {print $3}') - TARGET_VERSION="${KERN_VER}-${KERN_REL}" - else - TARGET_VERSION="${CURRENT_KERNEL_VERSION}" - fi - fi - - # Skip this script if we are already on the target version - if [[ "${CURRENT_KERNEL_VERSION}" == "${TARGET_VERSION}" ]]; then - echo "target kernel version [${TARGET_VERSION}] is installed" - - # Reboot may have interrupted dpkg. Bring package system to a good state - if [[ "${OS_NAME}" == "debian" || "${OS_NAME}" == "ubuntu" ]]; then - dpkg --configure -a - fi - - return 0 - fi - - # Install the latest kernel - if [[ ${OS_NAME} == debian ]]; then - apt-get install -y linux-image-amd64 - elif [[ "${OS_NAME}" == "ubuntu" ]]; then - apt-get install -y linux-image-gcp - elif [[ "${OS_NAME}" == "rocky" ]]; then - dnf -y -q install kernel - fi - - # Make it possible to reboot before init actions are complete - #1033 - DP_ROOT=/usr/local/share/google/dataproc - STARTUP_SCRIPT="${DP_ROOT}/startup-script.sh" - POST_HDFS_STARTUP_SCRIPT="${DP_ROOT}/post-hdfs-startup-script.sh" - - for startup_script in ${STARTUP_SCRIPT} ${POST_HDFS_STARTUP_SCRIPT} ; do - sed -i -e 's:/usr/bin/env bash:/usr/bin/env bash\nexit 0:' ${startup_script} - done - - cp /var/log/dataproc-initialization-script-0.log /var/log/dataproc-initialization-script-0.log.0 - - systemctl reboot -} - # Verify if compatible linux distros and secure boot options are used function check_os_and_secure_boot() { - if [[ "${OS_NAME}" == "debian" ]]; then - DEBIAN_VERSION=$(lsb_release -r | awk '{print $2}') # 10 or 11 - if [[ "${DEBIAN_VERSION}" != "10" && "${DEBIAN_VERSION}" != "11" ]]; then - echo "Error: The Debian version (${DEBIAN_VERSION}) is not supported. Please use a compatible Debian version." + if is_debian ; then + if ! is_debian10 && ! is_debian11 && ! is_debian12 ; then + echo "Error: The Debian version ($(os_version)) is not supported. Please use a compatible Debian version." exit 1 fi - elif [[ "${OS_NAME}" == "ubuntu" ]]; then - UBUNTU_VERSION=$(lsb_release -r | awk '{print $2}') # 20.04 - UBUNTU_VERSION=${UBUNTU_VERSION%.*} - if [[ "${UBUNTU_VERSION}" != "18" && "${UBUNTU_VERSION}" != "20" && "${UBUNTU_VERSION}" != "22" ]]; then - echo "Error: The Ubuntu version (${UBUNTU_VERSION}) is not supported. Please use a compatible Ubuntu version." + elif is_ubuntu ; then + if ! is_ubuntu18 && ! is_ubuntu20 && ! is_ubuntu22 ; then + echo "Error: The Ubuntu version ($(os_version)) is not supported. Please use a compatible Ubuntu version." exit 1 fi - elif [[ "${OS_NAME}" == "rocky" ]]; then - ROCKY_VERSION=$(lsb_release -r | awk '{print $2}') # 8 or 9 - ROCKY_VERSION=${ROCKY_VERSION%.*} - if [[ "${ROCKY_VERSION}" != "8" && "${ROCKY_VERSION}" != "9" ]]; then - echo "Error: The Rocky Linux version (${ROCKY_VERSION}) is not supported. Please use a compatible Rocky Linux version." + elif is_rocky ; then + if ! is_rocky8 && ! is_rocky9 ; then + echo "Error: The Rocky Linux version ($(os_version)) is not supported. Please use a compatible Rocky Linux version." exit 1 fi fi - if [[ "${SECURE_BOOT}" == "enabled" ]]; then - echo "Error: Secure Boot is enabled. Please disable Secure Boot while creating the cluster." + if [[ "${SECURE_BOOT}" == "enabled" && $(echo "${DATAPROC_IMAGE_VERSION} <= 2.1" | bc -l) == 1 ]]; then + echo "Error: Secure Boot is not supported before image 2.2. Please disable Secure Boot while creating the cluster." exit 1 + elif [[ "${SECURE_BOOT}" == "enabled" ]] && [[ -z "${PSN}" ]]; then + echo "Secure boot is enabled, but no signing material provided." + echo "Please either disable secure boot or provide signing material as per" + echo "https://github.com/GoogleCloudDataproc/custom-images/tree/master/examples/secure-boot" + return 1 fi } -# Detect dataproc image version from its various names -if (! test -v DATAPROC_IMAGE_VERSION) && test -v DATAPROC_VERSION; then - DATAPROC_IMAGE_VERSION="${DATAPROC_VERSION}" -fi - function remove_old_backports { # This script uses 'apt-get update' and is therefore potentially dependent on # backports repositories which have been archived. In order to mitigate this # problem, we will remove any reference to backports repos older than oldstable # https://github.com/GoogleCloudDataproc/initialization-actions/issues/1157 + oldoldstable=$(curl -s https://deb.debian.org/debian/dists/oldoldstable/Release | awk '/^Codename/ {print $2}'); oldstable=$(curl -s https://deb.debian.org/debian/dists/oldstable/Release | awk '/^Codename/ {print $2}'); stable=$(curl -s https://deb.debian.org/debian/dists/stable/Release | awk '/^Codename/ {print $2}'); - matched_files="$(grep -rsil '\-backports' /etc/apt/sources.list*)" + matched_files=( $(test -d /etc/apt && grep -rsil '\-backports' /etc/apt/sources.list*||:) ) + if [[ -n "$matched_files" ]]; then - for filename in "$matched_files"; do - grep -e "$oldstable-backports" -e "$stable-backports" "$filename" || \ - sed -i -e 's/^.*-backports.*$//' "$filename" + for filename in "${matched_files[@]}"; do + # Fetch from archive.debian.org for ${oldoldstable}-backports + perl -pi -e "s{^(deb[^\s]*) https?://[^/]+/debian ${oldoldstable}-backports } + {\$1 https://archive.debian.org/debian ${oldoldstable}-backports }g" "${filename}" done fi } @@ -312,20 +509,15 @@ function main() { check_os_and_secure_boot - if [[ "${OS_NAME}" == "rocky" ]]; then - if dnf list kernel-devel-$(uname -r) && dnf list kernel-headers-$(uname -r); then - echo "kernel devel and headers packages are available. Proceed without kernel upgrade." - else - upgrade_kernel - fi - fi - if [[ ${OS_NAME} == debian ]] || [[ ${OS_NAME} == ubuntu ]]; then export DEBIAN_FRONTEND=noninteractive - execute_with_retries "apt-get update" + execute_with_retries "apt-get --allow-releaseinfo-change update" execute_with_retries "apt-get install -y -q pciutils" elif [[ ${OS_NAME} == rocky ]] ; then execute_with_retries "dnf -y -q install pciutils" + else + echo "Unsupported OS: '${OS_NAME}'" + exit 1 fi # default MIG to on when this script is used