Skip to content

Commit

Permalink
Address feedback during review of GoogleCloudPlatform#3256
Browse files Browse the repository at this point in the history
TESTED:
- simple Debian and Ubuntu VMs with one NIC

TODO:
- rewrite find command to address 2 gVNIC?
- fix quoting of ignored interfaces
  • Loading branch information
tpdownes committed Dec 3, 2024
1 parent 02c7962 commit 597a559
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 32 deletions.
32 changes: 16 additions & 16 deletions modules/file-system/parallelstore/scripts/mount-daos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,13 @@ sed -i "s/#.*transport_config/transport_config/g" $daos_config
sed -i "s/#.*allow_insecure:.*false/ allow_insecure: true/g" $daos_config
sed -i "s/.*access_points.*/access_points: $access_points/g" $daos_config

# Get interface names with "s0f0" suffix
if ifconfig -a | grep 's0f0'; then
sof0_interfaces=$(ifconfig -a | grep 's0f0:' | awk '{print $1}' | tr ':' '\n' | grep -v '^$' | awk '!a[$0]++' | sed 's/^/"/g' | sed 's/$/"/g' | paste -sd, -)
# Get names of network interfaces not in first PCI slot
# The first PCI slot is a standard network adapter while remaining interfaces
# are typically network cards dedicated to GPU or workload communication

# Append the sof0_interfaces to the existing list
exclude_fabric_ifaces="lo,$sof0_interfaces"

# Update the file with the new list
extra_interfaces=$(find /sys/class/net/ -not -name 'enp0s*' -regextype posix-extended -regex '.*/enp[0-9]+s.*' -printf "%f\n" | paste -s -d ',')
if [[ -n "$extra_interfaces" ]]; then
exclude_fabric_ifaces="lo,$extra_interfaces"
sed -i "s/#.*exclude_fabric_ifaces: \[.*/exclude_fabric_ifaces: [$exclude_fabric_ifaces]/" $daos_config
fi

Expand Down Expand Up @@ -80,31 +79,32 @@ sed -i "s/#.*user_allow_other/user_allow_other/g" $fuse_config
# make sure limit of open files is high enough for dfuse (1M of open files)
ulimit -n 1048576

# Store the mounting logic in a variable
mount_command="if mountpoint -q '$local_mount'; then fusermount3 -u '$local_mount'; fi; for i in {1..10}; do /bin/dfuse -m '$local_mount' --pool default-pool --container default-container --multi-user $mount_options --foreground && break; sleep 1; done"

# Construct the service name with the local_mount suffix
service_name="mount_parallelstore_${local_mount//\//_}.service"
safe_mount_name=$(systemd-escape -p "${local_mount}")
service_name="mount_parallelstore_${safe_mount_name}.service"

# --- Begin: Add systemd service creation ---
cat >/usr/lib/systemd/system/"${service_name}" <<EOF
cat >/etc/systemd/system/"${service_name}" <<EOF
[Unit]
Description=DAOS Mount Service
After=network-online.target daos_agent.service
Before=slurmd.service
ConditionPathIsMountPoint=!${local_mount}
[Service]
Type=simple
User=root
Group=root
Restart=always
RestartSec=1
ExecStart=/bin/bash -c "$mount_command"
ExecStop=fusermount3 -u '$local_mount'
Restart=on-failure
RestartSec=10
ExecStart=/bin/dfuse -m $local_mount --pool default-pool --container default-container --multi-user $mount_options --foreground
ExecStop=/usr/bin/fusermount3 -u $local_mount
[Install]
WantedBy=multi-user.target
EOF

systemctl daemon-reload
systemctl enable "${service_name}"
systemctl start "${service_name}"
# --- End: Add systemd service creation ---
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,13 @@ sed -i "s/#.*transport_config/transport_config/g" $daos_config
sed -i "s/#.*allow_insecure:.*false/ allow_insecure: true/g" $daos_config
sed -i "s/.*access_points.*/access_points: $access_points/g" $daos_config

# Get interface names with "s0f0" suffix
if ifconfig -a | grep 's0f0'; then
sof0_interfaces=$(ifconfig -a | grep 's0f0:' | awk '{print $1}' | tr ':' '\n' | grep -v '^$' | awk '!a[$0]++' | sed 's/^/"/g' | sed 's/$/"/g' | paste -sd, -)
# Get names of network interfaces not in first PCI slot
# The first PCI slot is a standard network adapter while remaining interfaces
# are typically network cards dedicated to GPU or workload communication

# Append the sof0_interfaces to the existing list
exclude_fabric_ifaces="lo,$sof0_interfaces"

# Update the file with the new list
extra_interfaces=$(find /sys/class/net/ -not -name 'enp0s*' -regextype posix-extended -regex '.*/enp[0-9]+s.*' -printf "%f\n" | paste -s -d ',')
if [[ -n "$extra_interfaces" ]]; then
exclude_fabric_ifaces="lo,$extra_interfaces"
sed -i "s/#.*exclude_fabric_ifaces: \[.*/exclude_fabric_ifaces: [$exclude_fabric_ifaces]/" $daos_config
fi

Expand Down Expand Up @@ -80,31 +79,32 @@ sed -i "s/#.*user_allow_other/user_allow_other/g" $fuse_config
# make sure limit of open files is high enough for dfuse (1M of open files)
ulimit -n 1048576

# Store the mounting logic in a variable
mount_command="if mountpoint -q '$local_mount'; then fusermount3 -u '$local_mount'; fi; for i in {1..10}; do /bin/dfuse -m '$local_mount' --pool default-pool --container default-container --multi-user $mount_options --foreground && break; sleep 1; done"

# Construct the service name with the local_mount suffix
service_name="mount_parallelstore_${local_mount//\//_}.service"
safe_mount_name=$(systemd-escape -p "${local_mount}")
service_name="mount_parallelstore_${safe_mount_name}.service"

# --- Begin: Add systemd service creation ---
cat >/usr/lib/systemd/system/"${service_name}" <<EOF
cat >/etc/systemd/system/"${service_name}" <<EOF
[Unit]
Description=DAOS Mount Service
After=network-online.target daos_agent.service
Before=slurmd.service
ConditionPathIsMountPoint=!${local_mount}
[Service]
Type=simple
User=root
Group=root
Restart=always
RestartSec=1
ExecStart=/bin/bash -c "$mount_command"
ExecStop=fusermount3 -u '$local_mount'
Restart=on-failure
RestartSec=10
ExecStart=/bin/dfuse -m $local_mount --pool default-pool --container default-container --multi-user $mount_options --foreground
ExecStop=/usr/bin/fusermount3 -u $local_mount
[Install]
WantedBy=multi-user.target
EOF

systemctl daemon-reload
systemctl enable "${service_name}"
systemctl start "${service_name}"
# --- End: Add systemd service creation ---
Expand Down

0 comments on commit 597a559

Please sign in to comment.