diff --git a/doc/sfsmount.1.adoc b/doc/sfsmount.1.adoc index a6d256e7b..d1f8850cf 100644 --- a/doc/sfsmount.1.adoc +++ b/doc/sfsmount.1.adoc @@ -216,13 +216,14 @@ Equivalent to *--nostdopts* (*-n*) option for use in fstab. *-o nonempty*:: Equivalent to *--nonempty* option for use in fstab. -*-o sfsuseoldwritealgorithm*:: -Use legacy write algorithm. Though new algorithm is faster in most cases, old -algorithm may prove useful in some scenarios. - -*-o sfsignoreflush*'0|1':: -Advanced: use with caution. Ignore flush usual behavior by replying SUCCESS to it -immediately. Targets fast creation of small files, but may cause data loss +*-o sfsuseinodebasedwritealgorithm=*'0|1':: +Use inode based write algorithm when set to 1. Use chunk based write algorithm +when set to 0. Though chunk based algorithm is faster in most cases, inode +based algorithm may prove useful in some scenarios (default: 0). + +*-o sfsignoreflush=*'0|1':: +Advanced: use with caution. Ignore flush usual behavior by replying SUCCESS to +it immediately. Targets fast creation of small files, but may cause data loss during crashes when allegedly flushed data is still being processed (default: 0). *-o sfsdirectio=*'0|1':: diff --git a/src/mount/fuse/main.cc b/src/mount/fuse/main.cc index d527081c5..004b5b80e 100644 --- a/src/mount/fuse/main.cc +++ b/src/mount/fuse/main.cc @@ -253,7 +253,8 @@ static int mainloop(struct fuse_args *args, struct fuse_cmdline_opts *fuse_opts, params.acl_cache_size = gMountOptions.aclcachesize; params.debug_mode = gMountOptions.debug; params.direct_io = gMountOptions.directio; - params.use_old_write_algorithm = gMountOptions.useoldwritealgorithm; + params.use_inode_based_write_algorithm = + gMountOptions.useinodebasedwritealgorithm; params.ignore_flush = gMountOptions.ignoreflush; if (!gMountOptions.meta) { diff --git a/src/mount/fuse/mount_config.cc b/src/mount/fuse/mount_config.cc index 02f722564..4bd299e75 100644 --- a/src/mount/fuse/mount_config.cc +++ b/src/mount/fuse/mount_config.cc @@ -93,7 +93,7 @@ struct fuse_opt gSfsOptsStage2[] = { SFS_OPT("bandwidthoveruse=%lf", bandwidthoveruse, 1), SFS_OPT("sfsdirentrycachesize=%u", direntrycachesize, 0), SFS_OPT("nostdmountoptions", nostdmountoptions, 1), - SFS_OPT("sfsuseoldwritealgorithm", useoldwritealgorithm, 1), + SFS_OPT("sfsuseinodebasedwritealgorithm=%d", useinodebasedwritealgorithm, 0), SFS_OPT("sfsignoreflush=%d", ignoreflush, 0), SFS_OPT("enablefilelocks=%u", filelocks, 0), @@ -172,7 +172,9 @@ void usage(const char *progname) { " -o sfswriteworkers=N define number of write workers (default: %u)\n" " -o sfswritewindowsize=N define write window size (in blocks) for " "each chunk (default: %u)\n" -" -o sfsuseoldwritealgorithm use legacy write algorithm.\n" +" -o sfsuseinodebasedwritealgorithm=0|1 use inode based write algorithm when " + "set to 1. Use chunk based write algorithm when set to 0 " + "(default: %d)\n" " -o sfsignoreflush=0|1 Advanced: use with caution. Ignore flush usual " "behavior by replying SUCCESS to it immediately. Targets fast " "creation of small files, but may cause data loss during crashes " @@ -255,6 +257,7 @@ void usage(const char *progname) { SaunaClient::FsInitParams::kDefaultCachePerInodePercentage, SaunaClient::FsInitParams::kDefaultWriteWorkers, SaunaClient::FsInitParams::kDefaultWriteWindowSize, + SaunaClient::FsInitParams::kDefaultUseInodeBasedWriteAlgorithm, SaunaClient::FsInitParams::kDefaultIgnoreFlush, SaunaClient::FsInitParams::kDefaultUseRwLock, SaunaClient::FsInitParams::kDefaultMkdirCopySgid, diff --git a/src/mount/fuse/mount_config.h b/src/mount/fuse/mount_config.h index bceee852e..33671a5a3 100644 --- a/src/mount/fuse/mount_config.h +++ b/src/mount/fuse/mount_config.h @@ -115,7 +115,7 @@ struct sfsopts_ { double bandwidthoveruse; int nonemptymount; bool directio; - int useoldwritealgorithm; + int useinodebasedwritealgorithm; int ignoreflush; sfsopts_() @@ -174,7 +174,7 @@ struct sfsopts_ { bandwidthoveruse(SaunaClient::FsInitParams::kDefaultBandwidthOveruse), nonemptymount(SaunaClient::FsInitParams::kDefaultNonEmptyMounts), directio(SaunaClient::FsInitParams::kDirectIO), - useoldwritealgorithm(SaunaClient::FsInitParams::kDefaultUseOldWriteAlgorithm), + useinodebasedwritealgorithm(SaunaClient::FsInitParams::kDefaultUseInodeBasedWriteAlgorithm), ignoreflush(SaunaClient::FsInitParams::kDefaultIgnoreFlush) { } }; diff --git a/src/mount/sauna_client.cc b/src/mount/sauna_client.cc index 3db4865f0..a1bb55da4 100644 --- a/src/mount/sauna_client.cc +++ b/src/mount/sauna_client.cc @@ -3497,7 +3497,7 @@ void fs_init(FsInitParams ¶ms) { params.prefetch_xor_stripes, std::max(params.bandwidth_overuse, 1.)); - gUseOldWriteAlgorithm = params.use_old_write_algorithm; + gUseInodeBasedWriteAlgorithm = params.use_inode_based_write_algorithm; write_data_init( params.write_cache_size, params.io_retries, params.write_workers, params.write_window_size, params.chunkserver_write_timeout_ms, diff --git a/src/mount/sauna_client.h b/src/mount/sauna_client.h index faaa68a17..6f6d907ae 100644 --- a/src/mount/sauna_client.h +++ b/src/mount/sauna_client.h @@ -81,7 +81,7 @@ struct FsInitParams { static constexpr float kDefaultBandwidthOveruse = 1.0; static constexpr unsigned kDefaultChunkserverWriteTo = 5000; static constexpr bool kDefaultIgnoreFlush = false; - static constexpr bool kDefaultUseOldWriteAlgorithm = false; + static constexpr bool kDefaultUseInodeBasedWriteAlgorithm = false; static constexpr unsigned kDefaultWriteCacheSize = 128; #ifdef _WIN32 static constexpr unsigned kDefaultWriteWaveTo = 10; @@ -152,7 +152,7 @@ struct FsInitParams { #ifdef _WIN32 mounting_uid(USE_LOCAL_ID), mounting_gid(USE_LOCAL_ID), #endif - use_old_write_algorithm(kDefaultUseOldWriteAlgorithm), + use_inode_based_write_algorithm(kDefaultUseInodeBasedWriteAlgorithm), ignore_flush(kDefaultIgnoreFlush), verbose(kDefaultVerbose), direct_io(kDirectIO) { } @@ -187,7 +187,7 @@ struct FsInitParams { #ifdef _WIN32 mounting_uid(USE_LOCAL_ID), mounting_gid(USE_LOCAL_ID), #endif - use_old_write_algorithm(kDefaultUseOldWriteAlgorithm), + use_inode_based_write_algorithm(kDefaultUseInodeBasedWriteAlgorithm), ignore_flush(kDefaultIgnoreFlush), verbose(kDefaultVerbose), direct_io(kDirectIO) { } @@ -241,7 +241,7 @@ struct FsInitParams { std::unordered_set allowed_users; #endif - bool use_old_write_algorithm; + bool use_inode_based_write_algorithm; bool ignore_flush; bool verbose; bool direct_io; diff --git a/src/mount/writedata.cc b/src/mount/writedata.cc index b62902752..6676723cf 100644 --- a/src/mount/writedata.cc +++ b/src/mount/writedata.cc @@ -64,7 +64,7 @@ #define NO_INODEDATA nullptr #define NO_CHUNKDATA nullptr -namespace OldWriteAlgorithm { +namespace InodeBasedWriteAlgorithm { struct inodedata { uint32_t inode; @@ -105,7 +105,7 @@ struct inodedata { newDataInChainPipe[0] = newDataInChainPipe[1] = -1; #else if (pipe(newDataInChainPipe) < 0) { - safs_pretty_syslog(LOG_WARNING, "creating pipe error: %s", strerr(errno)); + safs::log_warn("creating pipe error: {}", strerr(errno)); newDataInChainPipe[0] = -1; } #endif @@ -127,7 +127,7 @@ struct inodedata { */ if (workerWaitingForData && dataChain.size() == 1 && isDataChainPipeValid()) { if (write(newDataInChainPipe[1], " ", 1) != 1) { - safs_pretty_syslog(LOG_ERR, "write pipe error: %s", strerr(errno)); + safs::log_err("write pipe error: {}", strerr(errno)); } workerWaitingForData = false; } @@ -202,7 +202,7 @@ struct DelayedQueueEntry { }; static std::atomic maxretries; -static std::atomic gWaveTimeout; +static std::atomic gWriteWaveTimeout; static std::mutex gMutex; typedef std::unique_lock Glock; @@ -343,7 +343,8 @@ void write_job_delayed_end(inodedata* id, int status, int seconds, Glock &lock) LOG_AVG_TILL_END_OF_SCOPE1("write_job_delayed_end#sec", seconds); id->locator.reset(); if (status != SAUNAFS_STATUS_OK) { - safs_pretty_syslog(LOG_WARNING, "error writing file number %" PRIu32 ": %s", id->inode, saunafs_error_string(status)); + safs::log_warn("error writing file number {}: {}", id->inode, + saunafs_error_string(status)); id->status = status; } status = id->status; @@ -412,7 +413,7 @@ void InodeChunkWriter::processJob(inodedata* inodeData) { } else { // No data, no unfinished jobs -- something wrong! // This should never happen, so the status doesn't really matter - safs_pretty_syslog(LOG_WARNING, "got inode with no data to write!!!"); + safs::log_warn("got inode with no data to write!!!"); haveDataToWrite = false; status = SAUNAFS_ERROR_EINVAL; } @@ -484,8 +485,8 @@ void InodeChunkWriter::processJob(inodedata* inodeData) { returnJournalToDataChain(writer.releaseJournal(), lock); lock.unlock(); - safs_pretty_syslog(LOG_WARNING, "write file error, inode: %" PRIu32 ", index: %" PRIu32 " - %s", - inodeData_->inode, chunkIndex_, errorString.c_str()); + safs::log_warn("write file error, inode: {}, index: {} - {}", + inodeData_->inode, chunkIndex_, errorString.c_str()); if (inodeData_->trycnt >= maxretries) { // Convert error to an unrecoverable error throw UnrecoverableWriteException(e.message(), e.status()); @@ -588,7 +589,7 @@ void InodeChunkWriter::processDataChain(ChunkWriter& writer) { SAUNAFS_ERROR_TIMEOUT); } - writer.processOperations(gWaveTimeout); + writer.processOperations(gWriteWaveTimeout); } } @@ -685,7 +686,7 @@ void write_data_init(uint32_t cachesize, uint32_t retries, uint32_t workers, gWriteWindowSize = writewindowsize; gChunkserverTimeout_ms = chunkserverTimeout_ms; maxretries = retries; - gWaveTimeout = waveTimeout; + gWriteWaveTimeout = waveTimeout; if (cacheblockcount < 10) { cacheblockcount = 10; } @@ -705,7 +706,7 @@ void write_data_init(uint32_t cachesize, uint32_t retries, uint32_t workers, pthread_attr_destroy(&thattr); gTweaks.registerVariable("WriteMaxRetries", maxretries); - gTweaks.registerVariable("WriteWaveTimeout", gWaveTimeout); + gTweaks.registerVariable("WriteWaveTimeout", gWriteWaveTimeout); } void write_data_term(void) { @@ -947,8 +948,9 @@ int write_data_truncate(uint32_t inode, bool opened, uint32_t uid, uint32_t gid, do { status = fs_truncate(inode, opened, uid, gid, length, writeNeeded, attr, oldLength, lockId); if (status != SAUNAFS_STATUS_OK) { - safs_pretty_syslog(LOG_INFO, "truncate file %" PRIu32 " to length %" PRIu64 ": %s (try %d/%d)", - inode, length, saunafs_error_string(status), int(retries + 1), int(maxretries)); + safs::log_info("truncate file {} to length {}: {} (try {}/{})", + inode, length, saunafs_error_string(status), + int(retries + 1), int(maxretries)); } if (retries >= maxretries) { break; @@ -973,8 +975,7 @@ int write_data_truncate(uint32_t inode, bool opened, uint32_t uid, uint32_t gid, } return 0; } else { - // status is now SFS status, so we cannot return any errno - throw UnrecoverableWriteException("fs_truncate failed", status); + return status; } } @@ -1017,15 +1018,17 @@ int write_data_truncate(uint32_t inode, bool opened, uint32_t uid, uint32_t gid, } id->maxfleng = length; - // Now we can tell the master server to finish the truncate operation and then unblock the inode + // Now we can tell the master server to finish the truncate operation and + // then unblock the inode lock.unlock(); status = fs_truncateend(inode, uid, gid, length, lockId, attr); write_data_flushwaiting_decrease(id, lock); write_data_lcnt_decrease(id, lock); if (status != SAUNAFS_STATUS_OK) { - // status is now SFS status, so we cannot return any errno - throw UnrecoverableWriteException("fs_truncateend failed", status); + safs::log_warn("truncateend on file {} to length {}: {}", inode, length, + saunafs_error_string(status)); + return status; } return 0; } @@ -1041,9 +1044,9 @@ int write_data_end(void* vid) { return status; } -} // namespace OldWriteAlgorithm +} // namespace InodeBasedWriteAlgorithm -namespace NewWriteAlgorithm { +namespace ChunkBasedWriteAlgorithm { struct ChunkData; @@ -1121,7 +1124,7 @@ struct ChunkData { newDataInChainPipe[0] = newDataInChainPipe[1] = -1; #else if (pipe(newDataInChainPipe) < 0) { - safs_pretty_syslog(LOG_WARNING, "creating pipe error: %s", strerr(errno)); + safs::log_warn("creating pipe error: {}", strerr(errno)); newDataInChainPipe[0] = -1; } #endif @@ -1148,7 +1151,7 @@ struct ChunkData { */ if (workerWaitingForData && dataChain.size() == 1 && isDataChainPipeValid()) { if (write(newDataInChainPipe[1], " ", 1) != 1) { - safs_pretty_syslog(LOG_ERR, "write pipe error: %s", strerr(errno)); + safs::log_err("write pipe error: {}", strerr(errno)); } workerWaitingForData = false; } @@ -1207,7 +1210,7 @@ struct DelayedQueueEntry { }; static std::atomic maxretries; -static std::atomic gWaveTimeout; +static std::atomic gWriteWaveTimeout; static std::mutex gMutex; static std::mutex fcbcondMutex; @@ -1443,7 +1446,8 @@ void write_job_delayed_end(ChunkData* chunkData, int status, int seconds, Lock & chunkData->locator.reset(); if (status != SAUNAFS_STATUS_OK) { - safs_pretty_syslog(LOG_WARNING, "error writing file number %" PRIu32 ": %s", parent->inode, saunafs_error_string(status)); + safs::log_warn("error writing file number {}: {}", parent->inode, + saunafs_error_string(status)); chunkData->updateParentStatus(status); } status = chunkData->getParentStatus(); @@ -1601,8 +1605,8 @@ void ChunkJobWriter::processJob(ChunkData* chunkData) { returnJournalToDataChain(writer.releaseJournal(), inodeLock); inodeLock.unlock(); - safs_pretty_syslog(LOG_WARNING, "write file error, inode: %" PRIu32 ", index: %" PRIu32 " - %s", - parent->inode, chunkIndex_, errorString.c_str()); + safs::log_warn("write file error, inode: {}, index: {} - {}", + parent->inode, chunkIndex_, errorString.c_str()); if (chunkData_->tryCounter >= maxretries) { // Convert error to an unrecoverable error throw UnrecoverableWriteException(e.message(), e.status()); @@ -1711,7 +1715,7 @@ void ChunkJobWriter::processDataChain(ChunkWriter& writer) { SAUNAFS_ERROR_TIMEOUT); } - writer.processOperations(gWaveTimeout); + writer.processOperations(gWriteWaveTimeout); } } @@ -1792,7 +1796,7 @@ void write_data_init(uint32_t cachesize, uint32_t retries, uint32_t workers, gWriteWindowSize = writewindowsize; gChunkserverTimeout_ms = chunkserverTimeout_ms; maxretries = retries; - gWaveTimeout = waveTimeout; + gWriteWaveTimeout = waveTimeout; if (cacheblockcount < 10) { cacheblockcount = 10; } @@ -1812,7 +1816,7 @@ void write_data_init(uint32_t cachesize, uint32_t retries, uint32_t workers, pthread_attr_destroy(&thattr); gTweaks.registerVariable("WriteMaxRetries", maxretries); - gTweaks.registerVariable("WriteWaveTimeout", gWaveTimeout); + gTweaks.registerVariable("WriteWaveTimeout", gWriteWaveTimeout); } void write_data_term(void) { @@ -2119,8 +2123,9 @@ int write_data_truncate(uint32_t inode, bool opened, uint32_t uid, uint32_t gid, do { status = fs_truncate(inode, opened, uid, gid, length, writeNeeded, attr, oldLength, lockId); if (status != SAUNAFS_STATUS_OK) { - safs_pretty_syslog(LOG_INFO, "truncate file %" PRIu32 " to length %" PRIu64 ": %s (try %d/%d)", - inode, length, saunafs_error_string(status), int(retries + 1), int(maxretries)); + safs::log_info("truncate file {} to length {}: {} (try {}/{})", + inode, length, saunafs_error_string(status), + int(retries + 1), int(maxretries)); } if (retries >= maxretries) { break; @@ -2145,8 +2150,7 @@ int write_data_truncate(uint32_t inode, bool opened, uint32_t uid, uint32_t gid, } return 0; } else { - // status is now SFS status, so we cannot return any errno - throw UnrecoverableWriteException("fs_truncate failed", status); + return status; } } @@ -2199,7 +2203,8 @@ int write_data_truncate(uint32_t inode, bool opened, uint32_t uid, uint32_t gid, } id->maxfleng = length; - // Now we can tell the master server to finish the truncate operation and then unblock the inode + // Now we can tell the master server to finish the truncate operation and + // then unblock the inode inodeLock.unlock(); status = fs_truncateend(inode, uid, gid, length, lockId, attr); @@ -2209,8 +2214,9 @@ int write_data_truncate(uint32_t inode, bool opened, uint32_t uid, uint32_t gid, write_data_lcnt_decrease(id, inodeLock); if (status != SAUNAFS_STATUS_OK) { - // status is now SFS status, so we cannot return any errno - throw UnrecoverableWriteException("fs_truncateend failed", status); + safs::log_warn("truncateend on file {} to length {}: {}", inode, length, + saunafs_error_string(status)); + return status; } return 0; } @@ -2229,80 +2235,81 @@ int write_data_end(void* vid) { return status; } -} // namespace NewWriteAlgorithm +} // namespace ChunkBasedWriteAlgorithm void write_data_init(uint32_t cachesize, uint32_t retries, uint32_t workers, uint32_t writewindowsize, uint32_t chunkserverTimeout_ms, uint32_t cachePerInodePercentage, uint32_t waveTimeout) { - if (gUseOldWriteAlgorithm) { - OldWriteAlgorithm::write_data_init( + if (gUseInodeBasedWriteAlgorithm) { + InodeBasedWriteAlgorithm::write_data_init( cachesize, retries, workers, writewindowsize, chunkserverTimeout_ms, cachePerInodePercentage, waveTimeout); } else { - NewWriteAlgorithm::write_data_init( + ChunkBasedWriteAlgorithm::write_data_init( cachesize, retries, workers, writewindowsize, chunkserverTimeout_ms, cachePerInodePercentage, waveTimeout); } } void write_data_term(void) { - if (gUseOldWriteAlgorithm) { - OldWriteAlgorithm::write_data_term(); + if (gUseInodeBasedWriteAlgorithm) { + InodeBasedWriteAlgorithm::write_data_term(); } else { - NewWriteAlgorithm::write_data_term(); + ChunkBasedWriteAlgorithm::write_data_term(); } } void *write_data_new(uint32_t inode) { - if (gUseOldWriteAlgorithm) { - return OldWriteAlgorithm::write_data_new(inode); + if (gUseInodeBasedWriteAlgorithm) { + return InodeBasedWriteAlgorithm::write_data_new(inode); } - return NewWriteAlgorithm::write_data_new(inode); + return ChunkBasedWriteAlgorithm::write_data_new(inode); } int write_data_end(void *vid) { - if (gUseOldWriteAlgorithm) { - return OldWriteAlgorithm::write_data_end(vid); + if (gUseInodeBasedWriteAlgorithm) { + return InodeBasedWriteAlgorithm::write_data_end(vid); } - return NewWriteAlgorithm::write_data_end(vid); + return ChunkBasedWriteAlgorithm::write_data_end(vid); } int write_data_flush(void *vid) { - if (gUseOldWriteAlgorithm) { - return OldWriteAlgorithm::write_data_flush(vid); + if (gUseInodeBasedWriteAlgorithm) { + return InodeBasedWriteAlgorithm::write_data_flush(vid); } - return NewWriteAlgorithm::write_data_flush(vid); + return ChunkBasedWriteAlgorithm::write_data_flush(vid); } uint64_t write_data_getmaxfleng(uint32_t inode) { - if (gUseOldWriteAlgorithm) { - return OldWriteAlgorithm::write_data_getmaxfleng(inode); + if (gUseInodeBasedWriteAlgorithm) { + return InodeBasedWriteAlgorithm::write_data_getmaxfleng(inode); } - return NewWriteAlgorithm::write_data_getmaxfleng(inode); + return ChunkBasedWriteAlgorithm::write_data_getmaxfleng(inode); } int write_data_flush_inode(uint32_t inode) { - if (gUseOldWriteAlgorithm) { - return OldWriteAlgorithm::write_data_flush_inode(inode); + if (gUseInodeBasedWriteAlgorithm) { + return InodeBasedWriteAlgorithm::write_data_flush_inode(inode); } - return NewWriteAlgorithm::write_data_flush_inode(inode); + return ChunkBasedWriteAlgorithm::write_data_flush_inode(inode); } int write_data_truncate(uint32_t inode, bool opened, uint32_t uid, uint32_t gid, uint64_t length, Attributes &attr) { - if (gUseOldWriteAlgorithm) { - return OldWriteAlgorithm::write_data_truncate(inode, opened, uid, gid, - length, attr); + if (gUseInodeBasedWriteAlgorithm) { + return InodeBasedWriteAlgorithm::write_data_truncate(inode, opened, uid, + gid, length, attr); } - return NewWriteAlgorithm::write_data_truncate(inode, opened, uid, gid, - length, attr); + return ChunkBasedWriteAlgorithm::write_data_truncate(inode, opened, uid, + gid, length, attr); } int write_data(void *vid, uint64_t offset, uint32_t size, const uint8_t *buff, size_t currentSize) { - if (gUseOldWriteAlgorithm) { - return OldWriteAlgorithm::write_data(vid, offset, size, buff, - currentSize); + if (gUseInodeBasedWriteAlgorithm) { + return InodeBasedWriteAlgorithm::write_data(vid, offset, size, buff, + currentSize); } - return NewWriteAlgorithm::write_data(vid, offset, size, buff, currentSize); + return ChunkBasedWriteAlgorithm::write_data(vid, offset, size, buff, + currentSize); } diff --git a/src/mount/writedata.h b/src/mount/writedata.h index 3ba5268cf..7053202de 100644 --- a/src/mount/writedata.h +++ b/src/mount/writedata.h @@ -27,7 +27,7 @@ #include "common/attributes.h" -inline bool gUseOldWriteAlgorithm; +inline bool gUseInodeBasedWriteAlgorithm; void write_data_init(uint32_t cachesize, uint32_t retries, uint32_t workers, uint32_t writewindowsize, uint32_t chunkserverTimeout_ms,