Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add multi-mount parallelstore support #3256

Merged
merged 2 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 29 additions & 19 deletions modules/file-system/parallelstore/scripts/mount-daos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
set -e -o pipefail

OS_ID=$(awk -F '=' '/^ID=/ {print $2}' /etc/os-release | sed -e 's/"//g')
OS_ID_LIKE=$(awk -F '=' '/^ID_LIKE=/ {print $2}' /etc/os-release | sed -e 's/"//g')
OS_VERSION=$(awk -F '=' '/VERSION_ID/ {print $2}' /etc/os-release | sed -e 's/"//g')
OS_VERSION_MAJOR=$(awk -F '=' '/VERSION_ID/ {print $2}' /etc/os-release | sed -e 's/"//g' -e 's/\..*$//')

Expand All @@ -41,6 +42,20 @@ sed -i "s/#.*transport_config/transport_config/g" $daos_config
sed -i "s/#.*allow_insecure:.*false/ allow_insecure: true/g" $daos_config
sed -i "s/.*access_points.*/access_points: $access_points/g" $daos_config
tpdownes marked this conversation as resolved.
Show resolved Hide resolved

# Get names of network interfaces not in first PCI slot
# The first PCI slot is a standard network adapter while remaining interfaces
# are typically network cards dedicated to GPU or workload communication
if [[ "$OS_ID_LIKE" == "debian" ]]; then
extra_interfaces=$(find /sys/class/net/ -not -name 'enp0s*' -regextype posix-extended -regex '.*/enp[0-9]+s.*' -printf '"%f"\n' | paste -s -d ',')
elif [[ "$OS_ID_LIKE" =~ "rhel" ]]; then
extra_interfaces=$(find /sys/class/net/ -not -name eth0 -regextype posix-extended -regex '.*/eth[0-9]+' -printf '"%f"\n' | paste -s -d ',')
fi

if [[ -n "$extra_interfaces" ]]; then
exclude_fabric_ifaces="lo,$extra_interfaces"
sed -i "s/#.*exclude_fabric_ifaces: \[.*/exclude_fabric_ifaces: [$exclude_fabric_ifaces]/" $daos_config
fi

# Start service
if { [ "${OS_ID}" = "rocky" ] || [ "${OS_ID}" = "rhel" ]; } && { [ "${OS_VERSION_MAJOR}" = "8" ] || [ "${OS_VERSION_MAJOR}" = "9" ]; }; then
# TODO: Update script to change default log destination folder, after daos_agent user is supported in debian and ubuntu.
Expand Down Expand Up @@ -69,39 +84,34 @@ sed -i "s/#.*user_allow_other/user_allow_other/g" $fuse_config
# make sure limit of open files is high enough for dfuse (1M of open files)
ulimit -n 1048576

for i in {1..10}; do
# To parse mount_options as --disable-wb-cache --eq-count=8.
# shellcheck disable=SC2086
dfuse -m "$local_mount" --pool default-pool --container default-container --multi-user $mount_options && break

echo "dfuse failed, retrying in 1 seconds (attempt $i/10)..."
sleep 1
done

if ! mountpoint -q "$local_mount"; then
exit 1
fi

# Store the mounting logic in a variable
mount_command='for i in {1..10}; do /bin/dfuse -m '$local_mount' --pool default-pool --container default-container --multi-user '$mount_options' --foreground && break; echo \"dfuse, failed, retrying in 1 second (attempt '$i'/10)\"; sleep 1; done'
# Construct the service name with the local_mount suffix
safe_mount_name=$(systemd-escape -p "${local_mount}")
service_name="mount_parallelstore_${safe_mount_name}.service"

# --- Begin: Add systemd service creation ---
cat >/usr/lib/systemd/system/mount_parallelstore.service <<EOF
cat >/etc/systemd/system/"${service_name}" <<EOF
[Unit]
Description=DAOS Mount Service
After=network-online.target daos_agent.service
tpdownes marked this conversation as resolved.
Show resolved Hide resolved
Before=slurmd.service
ConditionPathIsMountPoint=!${local_mount}

[Service]
Type=oneshot
Type=simple
User=root
Group=root
ExecStart=/bin/bash -c '$mount_command'
Restart=on-failure
RestartSec=10
ExecStart=/bin/dfuse -m $local_mount --pool default-pool --container default-container --multi-user $mount_options --foreground
ExecStop=/usr/bin/fusermount3 -u $local_mount

[Install]
WantedBy=multi-user.target
EOF

systemctl enable mount_parallelstore.service
systemctl daemon-reload
systemctl enable "${service_name}"
systemctl start "${service_name}"
# --- End: Add systemd service creation ---

exit 0
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
set -e -o pipefail

OS_ID=$(awk -F '=' '/^ID=/ {print $2}' /etc/os-release | sed -e 's/"//g')
OS_ID_LIKE=$(awk -F '=' '/^ID_LIKE=/ {print $2}' /etc/os-release | sed -e 's/"//g')
OS_VERSION=$(awk -F '=' '/VERSION_ID/ {print $2}' /etc/os-release | sed -e 's/"//g')
OS_VERSION_MAJOR=$(awk -F '=' '/VERSION_ID/ {print $2}' /etc/os-release | sed -e 's/"//g' -e 's/\..*$//')

Expand All @@ -41,6 +42,20 @@ sed -i "s/#.*transport_config/transport_config/g" $daos_config
sed -i "s/#.*allow_insecure:.*false/ allow_insecure: true/g" $daos_config
sed -i "s/.*access_points.*/access_points: $access_points/g" $daos_config

# Get names of network interfaces not in first PCI slot
# The first PCI slot is a standard network adapter while remaining interfaces
# are typically network cards dedicated to GPU or workload communication
if [[ "$OS_ID_LIKE" == "debian" ]]; then
extra_interfaces=$(find /sys/class/net/ -not -name 'enp0s*' -regextype posix-extended -regex '.*/enp[0-9]+s.*' -printf '"%f"\n' | paste -s -d ',')
elif [[ "$OS_ID_LIKE" =~ "rhel" ]]; then
extra_interfaces=$(find /sys/class/net/ -not -name eth0 -regextype posix-extended -regex '.*/eth[0-9]+' -printf '"%f"\n' | paste -s -d ',')
fi

if [[ -n "$extra_interfaces" ]]; then
exclude_fabric_ifaces="lo,$extra_interfaces"
sed -i "s/#.*exclude_fabric_ifaces: \[.*/exclude_fabric_ifaces: [$exclude_fabric_ifaces]/" $daos_config
fi

# Start service
if { [ "${OS_ID}" = "rocky" ] || [ "${OS_ID}" = "rhel" ]; } && { [ "${OS_VERSION_MAJOR}" = "8" ] || [ "${OS_VERSION_MAJOR}" = "9" ]; }; then
# TODO: Update script to change default log destination folder, after daos_agent user is supported in debian and ubuntu.
Expand Down Expand Up @@ -69,39 +84,34 @@ sed -i "s/#.*user_allow_other/user_allow_other/g" $fuse_config
# make sure limit of open files is high enough for dfuse (1M of open files)
ulimit -n 1048576

for i in {1..10}; do
# To parse mount_options as --disable-wb-cache --eq-count=8.
# shellcheck disable=SC2086
dfuse -m "$local_mount" --pool default-pool --container default-container --multi-user $mount_options && break

echo "dfuse failed, retrying in 1 seconds (attempt $i/10)..."
sleep 1
done

if ! mountpoint -q "$local_mount"; then
exit 1
fi

# Store the mounting logic in a variable
mount_command='for i in {1..10}; do /bin/dfuse -m '$local_mount' --pool default-pool --container default-container --multi-user '$mount_options' --foreground && break; echo \"dfuse, failed, retrying in 1 second (attempt '$i'/10)\"; sleep 1; done'
# Construct the service name with the local_mount suffix
safe_mount_name=$(systemd-escape -p "${local_mount}")
service_name="mount_parallelstore_${safe_mount_name}.service"

# --- Begin: Add systemd service creation ---
cat >/usr/lib/systemd/system/mount_parallelstore.service <<EOF
cat >/etc/systemd/system/"${service_name}" <<EOF
[Unit]
Description=DAOS Mount Service
After=network-online.target daos_agent.service
Before=slurmd.service
ConditionPathIsMountPoint=!${local_mount}

[Service]
Type=oneshot
Type=simple
User=root
Group=root
ExecStart=/bin/bash -c '$mount_command'
Restart=on-failure
RestartSec=10
ExecStart=/bin/dfuse -m $local_mount --pool default-pool --container default-container --multi-user $mount_options --foreground
ExecStop=/usr/bin/fusermount3 -u $local_mount

[Install]
WantedBy=multi-user.target
EOF

systemctl enable mount_parallelstore.service
systemctl daemon-reload
systemctl enable "${service_name}"
systemctl start "${service_name}"
# --- End: Add systemd service creation ---

exit 0
Loading