Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature chunk trash bin #215

Draft
wants to merge 10 commits into
base: dev
Choose a base branch
from
Draft
16 changes: 16 additions & 0 deletions doc/sfschunkserver.cfg.5.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,22 @@ the operation is considered failed and is immediately aborted (default: 1000)
replication. After this timeout, next wave of read requests is sent to other
chunkservers (default: 500)

*CHUNK_TRASH_ENABLED*:: enables or disables the chunk trash feature. When
enabled, deleted chunks are moved to a trash directory instead of being
immediately removed. (Default: 1)

*CHUNK_TRASH_EXPIRATION_SECONDS*:: specifies the timeout in seconds for chunks to remain
in the trash before being permanently deleted. (Default: 259200)

*CHUNK_TRASH_FREE_SPACE_THRESHOLD_GB*:: sets the available space threshold in
gigabytes. If the available space on the disk falls below this threshold, the
system will start deleting older chunks from the trash to free up space.
(Default: 1024)

*CHUNK_TRASH_GC_BATCH_SIZE*:: defines the bulk size for the garbage collector
when processing chunks in the trash. This determines how many files are
processed in each garbage collection cycle. (Default: 1000)

*LOG_LEVEL*:: Setup logging. Uses the environment variable SAUNAFS_LOG_LEVEL or
config value LOG_LEVEL to determine logging level. Valid log levels are
- 'trace'
Expand Down
5 changes: 5 additions & 0 deletions src/admin/dump_config_command.cc
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,11 @@ const static std::unordered_map<std::string, std::string> defaultOptionsCS = {
{"REPLICATION_TOTAL_TIMEOUT_MS", "60000"},
{"REPLICATION_CONNECTION_TIMEOUT_MS", "1000"},
{"REPLICATION_WAVE_TIMEOUT_MS", "500"},
{"CHUNK_TRASH_ENABLED", "1"},
{"CHUNK_TRASH_EXPIRATION_SECONDS", "259200"},
{"CHUNK_TRASH_FREE_SPACE_THRESHOLD_GB", "1024"},
{"CHUNK_TRASH_GC_BATCH_SIZE", "1000"},
{"CHUNK_TRASH_GC_SPACE_RECOVERY_BATCH_SIZE", "100"},
};

const static std::unordered_map<std::string, std::string> defaultOptionsMeta = {
Expand Down
14 changes: 12 additions & 2 deletions src/chunkserver/chunkserver-common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,16 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
collect_sources(CHUNKSERVER_PLUGINS)
shared_add_library(chunkserver-common ${CHUNKSERVER_PLUGINS_SOURCES})
target_link_libraries(chunkserver-common safsprotocol sfscommon
${Boost_LIBRARIES} ${ADDITIONAL_LIBS})
${Boost_LIBRARIES} ${ADDITIONAL_LIBS})
list(REMOVE_ITEM CHUNKSERVER_PLUGINS_TESTS
${CMAKE_CURRENT_SOURCE_DIR}/cmr_disk_unittest.cc)
create_unittest(chunkserver-common ${CHUNKSERVER_PLUGINS_TESTS})
link_unittest(chunkserver-common chunkserver-common sfscommon)
link_unittest(chunkserver-common gmock gtest chunkserver-common sfscommon)

add_executable(chunkserver-common-mocked-time-unittest
${CMAKE_CURRENT_SOURCE_DIR}/cmr_disk_unittest.cc)
target_link_options(chunkserver-common-mocked-time-unittest PRIVATE "-Wl,--wrap=time")
target_link_libraries(chunkserver-common-mocked-time-unittest gmock gtest
chunkserver-common gtest_main sfscommon ${Boost_LIBRARIES}
${ADDITIONAL_LIBS})
add_test(NAME CmrDiskTest COMMAND chunkserver-common-mocked-time-unittest)
135 changes: 135 additions & 0 deletions src/chunkserver/chunkserver-common/chunk_trash_index.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/*
Copyright 2023-2024 Leil Storage OÜ

This file is part of SaunaFS.

SaunaFS is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, version 3.

SaunaFS is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with SaunaFS. If not, see <http://www.gnu.org/licenses/>.
*/

#include "chunk_trash_index.h"
#include "chunk_trash_manager.h"

ChunkTrashIndex &ChunkTrashIndex::instance() {
static ChunkTrashIndex instance;
return instance;
}

void ChunkTrashIndex::reset(const std::filesystem::path &diskPath) {
std::scoped_lock<std::mutex> const lock(trashIndexMutex);
trashIndex.erase(diskPath);
trashIndex[diskPath] = {};
}

void
ChunkTrashIndex::add(const time_t &deletionTime, const std::string &filePath,
const std::string &diskPath) {
std::scoped_lock<std::mutex> const lock(trashIndexMutex);
trashIndex[diskPath].emplace(deletionTime, filePath);
}

void ChunkTrashIndex::removeInternal(const time_t &deletionTime,
const std::string &filePath,
const std::string &diskPath) {
auto range = trashIndex[diskPath].equal_range(deletionTime);
for (auto it = range.first; it != range.second; ++it) {
if (it->second == filePath) {
trashIndex[diskPath].erase(it);
return; // Avoid further iteration after removal
}
}
}

void ChunkTrashIndex::remove(const time_t &deletionTime,
const std::string &filePath,
const std::string &diskPath) {
std::scoped_lock<std::mutex> const lock(trashIndexMutex);
removeInternal(deletionTime, filePath, diskPath);
}

void ChunkTrashIndex::remove(const time_t &deletionTime,
const std::string &filePath) {
std::scoped_lock<std::mutex> const lock(trashIndexMutex);
for (const auto &diskEntry: trashIndex) {
removeInternal(deletionTime, filePath, diskEntry.first);
return; // Avoid further iteration after removal
}
}

ChunkTrashIndex::TrashIndexDiskEntries
ChunkTrashIndex::getExpiredFiles(const time_t &timeLimit, size_t bulkSize) {
std::scoped_lock<std::mutex> const lock(trashIndexMutex);
return getExpiredFilesInternal(timeLimit, bulkSize);
}


ChunkTrashIndex::TrashIndexDiskEntries
ChunkTrashIndex::getExpiredFilesInternal(const time_t &timeLimit, size_t bulkSize) {
TrashIndexDiskEntries expiredFiles;
size_t count = 0;
for (const auto &diskEntry: trashIndex) {
count += getExpiredFilesInternal(diskEntry.first, timeLimit,
expiredFiles,
bulkSize);
if (bulkSize != 0 && count >= bulkSize) {
break;
}
}

return expiredFiles;
}

size_t ChunkTrashIndex::getExpiredFilesInternal(const std::filesystem::path &diskPath,
const std::time_t &timeLimit,
std::unordered_map<std::string, std::multimap<std::time_t, std::string>> &expiredFiles,
size_t bulkSize) {
auto &diskTrashIndex = trashIndex[diskPath];
auto limit = diskTrashIndex.upper_bound(timeLimit);

expiredFiles[diskPath] = {};
size_t count = 0;
for (auto it = diskTrashIndex.begin(); it != limit; ++it) {
expiredFiles[diskPath].emplace(it->first, it->second);
if (bulkSize != 0 && ++count >= bulkSize) {
break;
}
}

return count;
}

ChunkTrashIndex::TrashIndexFileEntries
ChunkTrashIndex::getOlderFiles(const std::string &diskPath,
const size_t removalStepSize) {
std::scoped_lock<std::mutex> const lock(trashIndexMutex);
auto &diskTrashIndex = trashIndex[diskPath];
TrashIndexFileEntries olderFiles;
size_t count = 0;
for (auto it = diskTrashIndex.begin(); it != diskTrashIndex.end(); ++it) {
olderFiles.emplace(it->first, it->second);
if (removalStepSize != 0 && ++count >= removalStepSize) {
break;
}
}

return olderFiles;
}

std::vector<std::string> ChunkTrashIndex::getDiskPaths() {
std::scoped_lock<std::mutex> const lock(trashIndexMutex);
std::vector<std::string> diskPaths;
for (const auto &diskEntry: trashIndex) {
diskPaths.push_back(diskEntry.first);
}

return diskPaths;
}
174 changes: 174 additions & 0 deletions src/chunkserver/chunkserver-common/chunk_trash_index.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
/*
Copyright 2023-2024 Leil Storage OÜ

This file is part of SaunaFS.

SaunaFS is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, version 3.

SaunaFS is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with SaunaFS. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once

#include "common/platform.h"

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Warray-bounds"
#pragma GCC diagnostic ignored "-Wstringop-overflow"

#include <filesystem>

#pragma GCC diagnostic pop

#include <map>
#include <unordered_map>
#include <string>
#include <ctime>
#include <mutex>
#include <vector>

/**
* @brief Manages the index of files in the chunk trash.
*
* This class provides functionality to add, remove, and retrieve files
* based on their deletion time, ensuring thread safety with mutex protection.
*/
class ChunkTrashIndex {
public:
using TrashIndexFileEntries = std::multimap<std::time_t, std::string>; ///< Type for storing file entries with their deletion time.
using TrashIndexDiskEntries = std::unordered_map<std::string, TrashIndexFileEntries>; ///< Type for storing disk path entries and their associated file entries.
using TrashIndexType = TrashIndexDiskEntries; ///< Alias for the trash index type.

/**
* @brief Gets the singleton instance of the ChunkTrashIndex.
*
* @return Reference to the singleton instance of ChunkTrashIndex.
*/
static ChunkTrashIndex &instance();

/**
* @brief Resets the trash index for a specific disk path.
*
* This method clears all entries associated with the specified disk path.
*
* @param diskPath The path of the disk whose index will be reset.
*/
void reset(const std::filesystem::path &diskPath);

/**
* @brief Retrieves expired files from the trash index.
*
* This method returns a map of expired files across all disks with the
* specified time limit and bulk size.
*
* @param timeLimit The time limit to determine expired files.
* @param bulkSize The maximum number of files to retrieve (default is 0, which means no limit).
* @return A map containing expired files.
*/
TrashIndexDiskEntries getExpiredFiles(const std::time_t &timeLimit,
size_t bulkSize = 0);

/**
* @brief Adds a file entry to the trash index with its deletion time.
*
* @param deletionTime The time when the file was deleted.
* @param filePath The path of the file being added.
* @param diskPath The path of the disk associated with the file.
*/
void add(const std::time_t &deletionTime, const std::string &filePath,
const std::string &diskPath);

/**
* @brief Removes a file entry from the trash index by its deletion time and path.
*
* @param deletionTime The time when the file was deleted.
* @param filePath The path of the file being removed.
*/
void remove(const time_t &deletionTime, const std::string &filePath);

/**
* @brief Removes a file entry from the trash index for a specific disk path.
*
* @param deletionTime The time when the file was deleted.
* @param filePath The path of the file being removed.
* @param diskPath The path of the disk associated with the file.
*/
void remove(const time_t &deletionTime, const std::string &filePath,
const std::string &diskPath);

// Deleted to enforce singleton behavior
ChunkTrashIndex(
const ChunkTrashIndex &) = delete; ///< Copy constructor is deleted.

ChunkTrashIndex &operator=(
const ChunkTrashIndex &) = delete; ///< Copy assignment operator is deleted.

ChunkTrashIndex(
ChunkTrashIndex &&) = delete; ///< Move constructor is deleted.

ChunkTrashIndex &operator=(
ChunkTrashIndex &&) = delete; ///< Move assignment operator is deleted.

TrashIndexFileEntries
getOlderFiles(const std::string &diskPath, const size_t removalStepSize);

std::vector<std::string> getDiskPaths();

private:
// Constructor is private to enforce singleton behavior
ChunkTrashIndex() = default; ///< Default constructor.

~ChunkTrashIndex() = default; ///< Destructor.


TrashIndexType trashIndex; ///< The main data structure holding the trash index.
std::mutex trashIndexMutex; ///< Mutex for thread-safe access to the trash index.



/**
* @brief Retrieves expired files from the trash index for a specific disk path.
*
* This method populates the provided expiredFiles map with entries that
* have a deletion time earlier than the specified time limit.
*
* @param diskPath The path of the disk to retrieve expired files from.
* @param timeLimit The time limit to determine expired files.
* @param expiredFiles Reference to a map that will be populated with expired files.
* @param bulkSize The maximum number of files to retrieve (default is 0, which means no limit).
* @return The number of expired files retrieved.
*/
size_t getExpiredFilesInternal(const std::filesystem::path &diskPath,
const std::time_t &timeLimit,
std::unordered_map<std::string, std::multimap<std::time_t, std::string>> &expiredFiles,
size_t bulkSize = 0);

/**
* @brief Retrieves expired files from the trash index.
*
* This method returns a map of expired files across all disks with the
* specified time limit and bulk size.
*
* @param timeLimit The time limit to determine expired files.
* @param bulkSize The maximum number of files to retrieve (default is 0, which means no limit).
* @return A map containing expired files.
*/
TrashIndexDiskEntries getExpiredFilesInternal(const std::time_t &timeLimit,
size_t bulkSize = 0);

/**
* @brief Removes a file entry from the trash index for a specific disk path.
* @param deletionTime
* @param filePath
* @param diskPath
*/
void removeInternal(const time_t &deletionTime, const std::string &filePath,
const std::string &diskPath);
};
Loading