Skip to content

Commit

Permalink
feat(mount): Add client limit glibc malloc arenas
Browse files Browse the repository at this point in the history
This change adds the `limitglibcmallocarenas` option to the client.
This options allows to reduce the virtual memory used by the client,
at the cost of possible performance drop for multi-threading
(due to contention).

The option is meant to be used in environments with limited RAM.

Signed-off-by: Rolando Sánchez Ramos <[email protected]>
  • Loading branch information
rolysr committed Jan 7, 2025
1 parent 6480bac commit 8dea53a
Show file tree
Hide file tree
Showing 7 changed files with 144 additions and 5 deletions.
8 changes: 8 additions & 0 deletions doc/sfsmount.1.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,14 @@ during crashes when allegedly flushed data is still being processed (default: 0)
Whether to use FUSE DirectIO. This may improve performance when reading large
files under certain conditions.

*-o limitglibcmallocarenas=*'N'::
Linux only: limit glibc malloc arenas to given value - prevents from using
huge amount of virtual memory. This can influence performance by reducing
memory fragmentation and improving cache locality, but it may also lead to
contention and reduced parallelism in multi-threaded applications.
Use it in constrained memory environments, recommended values are 4 or 8.
(default is 0: disabled or let glibc decide)

General mount options (see *mount*(8) manual):

*-o rw* | *-o ro*::
Expand Down
28 changes: 28 additions & 0 deletions src/mount/fuse/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <dirent.h>
#include <errno.h>
#include <fstream>
#include <malloc.h>
#include <ostream>
#include <fuse.h>
#include <fuse_lowlevel.h>
Expand Down Expand Up @@ -50,6 +51,10 @@
#define STR_AUX(x) #x
#define STR(x) STR_AUX(x)

constexpr const char *kEnvironmentVariableNotDefined = nullptr;
static uint8_t gLimitGlibcArenas =
SaunaClient::FsInitParams::kDefaultLimitGlibcMallocArenas;

static void sfs_fsinit(void *userdata, struct fuse_conn_info *conn);

static struct fuse_lowlevel_ops sfs_meta_oper;
Expand Down Expand Up @@ -506,6 +511,26 @@ static int read_masterhost_if_present(struct fuse_args *args) {
return 0;
}

/// (glibc specific) Tune glibc malloc arenas to avoid high virtual memory usage
inline void tuneMalloc() {
#if defined(SAUNAFS_HAVE_MALLOPT) && defined(M_ARENA_MAX) && \
defined(M_ARENA_TEST)

if (gLimitGlibcArenas > 0) {
if (::getenv("MALLOC_ARENA_MAX") == kEnvironmentVariableNotDefined) {
safs::log_info("Setting glibc malloc arena max to {}",
gLimitGlibcArenas);
::mallopt(M_ARENA_MAX, static_cast<int>(gLimitGlibcArenas));
}
if (::getenv("MALLOC_ARENA_TEST") == kEnvironmentVariableNotDefined) {
safs::log_info("Setting glibc malloc arena test to {}",
gLimitGlibcArenas);
::mallopt(M_ARENA_TEST, static_cast<int>(gLimitGlibcArenas));
}
}
#endif
}

int main(int argc, char *argv[]) try {
struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
struct fuse_args defaultargs = FUSE_ARGS_INIT(0, NULL);
Expand Down Expand Up @@ -669,6 +694,9 @@ int main(int argc, char *argv[]) try {
gMountOptions.direntrycachesize = 10000000;
}

gLimitGlibcArenas = gMountOptions.limitglibcmallocarenas;
tuneMalloc();

make_fsname(&args);

struct fuse_cmdline_opts fuse_opts;
Expand Down
7 changes: 6 additions & 1 deletion src/mount/fuse/mount_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ struct fuse_opt gSfsOptsStage2[] = {
SFS_OPT("sfsdirentrycachesize=%u", direntrycachesize, 0),
SFS_OPT("nostdmountoptions", nostdmountoptions, 1),
SFS_OPT("sfsignoreflush=%d", ignoreflush, 0),
SFS_OPT("limitglibcmallocarenas=%d", limitglibcmallocarenas, 0),

SFS_OPT("enablefilelocks=%u", filelocks, 0),
SFS_OPT("nonempty", nonemptymount, 1),
Expand Down Expand Up @@ -235,6 +236,9 @@ void usage(const char *progname) {
" -o nonempty allow mounts over non-empty file/dir\n"
" -o sfsdebug print some debugging information\n"
" -o sfssubfolder=PATH define subfolder to mount as root (default: %s)\n"
" -o limitglibcmallocarenas=N limit glibc malloc arenas to given value - prevents "
"from using huge amount of virtual memory. Use it in constrained memory "
"environments (default: %u)\n"
"\n",
SaunaClient::FsInitParams::kDefaultCacheExpirationTime,
SaunaClient::FsInitParams::kDefaultReadaheadMaxWindowSize,
Expand Down Expand Up @@ -264,7 +268,8 @@ void usage(const char *progname) {
SaunaClient::FsInitParams::kDefaultAclCacheSize,
SaunaClient::FsInitParams::kDefaultIoRetries,
SaunaClient::FsInitParams::kDefaultSymlinkCacheTimeout,
SaunaClient::FsInitParams::kDefaultSubfolder
SaunaClient::FsInitParams::kDefaultSubfolder,
SaunaClient::FsInitParams::kDefaultLimitGlibcMallocArenas
);
printf(
"CMODE can be set to:\n"
Expand Down
4 changes: 3 additions & 1 deletion src/mount/fuse/mount_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ struct sfsopts_ {
int nonemptymount;
bool directio;
int ignoreflush;
unsigned limitglibcmallocarenas;

sfsopts_()
: masterhost(NULL),
Expand Down Expand Up @@ -171,7 +172,8 @@ struct sfsopts_ {
bandwidthoveruse(SaunaClient::FsInitParams::kDefaultBandwidthOveruse),
nonemptymount(SaunaClient::FsInitParams::kDefaultNonEmptyMounts),
directio(SaunaClient::FsInitParams::kDirectIO),
ignoreflush(SaunaClient::FsInitParams::kDefaultIgnoreFlush)
ignoreflush(SaunaClient::FsInitParams::kDefaultIgnoreFlush),
limitglibcmallocarenas(SaunaClient::FsInitParams::kDefaultLimitGlibcMallocArenas)
{ }
};

Expand Down
19 changes: 16 additions & 3 deletions src/mount/sauna_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,9 @@ struct FsInitParams {
static constexpr unsigned kDefaultAclCacheSize = 1000;
static constexpr bool kDefaultVerbose = false;
static constexpr bool kDirectIO = false;

#ifndef _WIN32
static constexpr unsigned kDefaultLimitGlibcMallocArenas = 0;
#endif
// Thank you, GCC 4.6, for no delegating constructors
FsInitParams()
: bind_host(), host(), port(), meta(false), mountpoint(), subfolder(kDefaultSubfolder),
Expand Down Expand Up @@ -157,7 +159,11 @@ struct FsInitParams {
clean_acquired_files_timeout(kDefaultCleanAcquiredFilesTimeout),
enable_status_updater_thread(kDefaultEnableStatusUpdaterThread),
#endif
ignore_flush(kDefaultIgnoreFlush), verbose(kDefaultVerbose), direct_io(kDirectIO) {
ignore_flush(kDefaultIgnoreFlush), verbose(kDefaultVerbose), direct_io(kDirectIO)
#ifndef _WIN32
,limit_glibc_malloc_arenas(kDefaultLimitGlibcMallocArenas)
#endif
{
}

FsInitParams(const std::string &bind_host, const std::string &host, const std::string &port, const std::string &mountpoint)
Expand Down Expand Up @@ -193,7 +199,11 @@ struct FsInitParams {
clean_acquired_files_timeout(kDefaultCleanAcquiredFilesTimeout),
enable_status_updater_thread(kDefaultEnableStatusUpdaterThread),
#endif
ignore_flush(kDefaultIgnoreFlush), verbose(kDefaultVerbose), direct_io(kDirectIO) {
ignore_flush(kDefaultIgnoreFlush), verbose(kDefaultVerbose), direct_io(kDirectIO)
#ifndef _WIN32
,limit_glibc_malloc_arenas(kDefaultLimitGlibcMallocArenas)
#endif
{
}

std::string bind_host;
Expand Down Expand Up @@ -251,6 +261,9 @@ struct FsInitParams {
bool ignore_flush;
bool verbose;
bool direct_io;
#ifndef _WIN32
unsigned limit_glibc_malloc_arenas;
#endif

std::string io_limits_config_file;
};
Expand Down
1 change: 1 addition & 0 deletions tests/setup_machine.sh
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ apt_packages=(
libyaml-cpp-dev
netcat-openbsd
python3-venv
sysstat
uuid-dev
zlib1g-dev
## For NFS-Ganesha tests (duplicate are commented out as reference)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
timeout_set 5 minutes

CHUNKSERVERS=1 \
MOUNTS=2 \
MOUNT_EXTRA_CONFIG="sfscachemode=NEVER`
`|cacheexpirationtime=10000`
`|readcachemaxsizepercentage=1`
`|sfschunkserverwavereadto=2000`
`|sfsioretries=50`
`|readaheadmaxwindowsize=5000`
`|sfschunkservertotalreadto=8000" \
MOUNT_0_EXTRA_CONFIG="limitglibcmallocarenas=8" \
MOUNT_1_EXTRA_CONFIG="limitglibcmallocarenas=2" \
setup_local_empty_saunafs info

num_jobs=18
five_percent_ram_mb=$(awk '/MemTotal/ {print int($2 / 1024 * 0.05)}' /proc/meminfo)
size_per_job=$(echo "${five_percent_ram_mb} / ${num_jobs}" | bc)
echo "five_percent_ram_mb: ${five_percent_ram_mb}, num_jobs: ${num_jobs}, size_per_job: ${size_per_job}"

pgrep -fa sfsmount
# Get the PIDs of the sfsmount processes
pids=($(pgrep -fa sfsmount | awk '{print $1}'))

# Access the PIDs separately
pid1=${pids[0]}
pid2=${pids[1]}

echo "pids: ${pids[@]}"
echo "pid1: $pid1, pid2: $pid2"

function getVirtualMemoryForPid() {
pid=${1}
ps -o vsz= -p ${pid}
}

# Function to run fio commands and measure VSZ for a given mount point
run_fio_and_measure_vsz() {
# Prepare measure the average virtual memory usage during the fio read command
local mount_point=$1
local pid=$2
local output_file="pidstat_output_$pid.txt"

cd "$mount_point"

fio --name=test_multiple_reads --directory=$mount_point --size=${size_per_job}M --rw=write --ioengine=libaio --group_reporting --numjobs=${num_jobs} --bs=1M --direct=1 --iodepth=1 > /dev/null 2>&1

# Run the fio read command in the background
fio --name=test_multiple_reads --directory=$mount_point --size=${size_per_job}M --rw=read --ioengine=libaio --group_reporting --numjobs=${num_jobs} --bs=1M --direct=1 --iodepth=1 > /dev/null 2>&1 &

# Store the fio process ID
fio_pid=$(pgrep -f "fio --name=test_multiple_reads --directory=$mount_point --size=${size_per_job}M --rw=read")

# Run pidstat in the background to monitor virtual memory usage
sleep 1
virtualMemory=$(getVirtualMemoryForPid $pid)

# Wait for the fio command to complete
wait $fio_pid

# Calculate the average virtual memory usage
echo $virtualMemory
}

# Run the fio commands and measure VSZ for both mount points
echo "Running fio commands and measuring VSZ for both mount points"
avg_vsz_mount0=$(run_fio_and_measure_vsz "${info[mount0]}" $pid1)
avg_vsz_mount1=$(run_fio_and_measure_vsz "${info[mount1]}" $pid2)

# Print the average VSZ values
echo "Average VSZ for ${info[mount0]}: ${avg_vsz_mount0}"
echo "Average VSZ for ${info[mount1]}: ${avg_vsz_mount1}"

# Compare the average VSZ values in one line
result=0
if (( $(echo "$avg_vsz_mount1 < $avg_vsz_mount0") )); then
result=1
else
result=0
fi

assert_equals "1" "$result"

0 comments on commit 8dea53a

Please sign in to comment.