diff --git a/examples/StringExample.cpp b/examples/StringExample.cpp index e70c721..4c24189 100644 --- a/examples/StringExample.cpp +++ b/examples/StringExample.cpp @@ -28,14 +28,14 @@ auto main() -> int // a b c // Split comma separated - for (const auto& s : split("a,b,c", ',')) { + for (const auto& s : split("a,b,c", ",")) { std::cout << s << " "; } std::cout << "\n"; // a b c // Split multiple delimiters - for (const auto& s : split("a+b-c", '+', '-')) { + for (const auto& s : split("a+b-c", "+", "-")) { std::cout << s << " "; } std::cout << "\n"; diff --git a/include/educelab/core/utils/String.hpp b/include/educelab/core/utils/String.hpp index 7864f8e..0b0d641 100644 --- a/include/educelab/core/utils/String.hpp +++ b/include/educelab/core/utils/String.hpp @@ -150,40 +150,63 @@ static auto trim_copy(const std::string_view s) -> std::string return std::string{trim(s)}; } -/** @brief Split a string by a delimiter */ +/** + * @brief Split a string by a delimiter + * + * When provided conflicting delimiters, the largest delimiter will take + * precedence: + * + * ```{.cpp} + * split("a->b->c", "-", "->"); // returns {"a", "b", "c"} + * ``` + */ template static auto split(std::string_view s, const Ds&... ds) -> std::vector { + constexpr std::string_view DEFAULT_DELIM{" "}; + // Build delimiters list - std::vector delimiters; + std::vector delimiters; if (sizeof...(ds) > 0) { delimiters = {ds...}; } else { - delimiters.emplace_back(' '); + delimiters.emplace_back(DEFAULT_DELIM); } - // Get a list of all delimiter start positions - std::vector delimPos; + // Get a list of all delimiter start pos and sizes + std::vector< + std::pair> + delimPos; for (const auto& delim : delimiters) { auto b = s.find(delim, 0); while (b != std::string_view::npos) { - delimPos.emplace_back(b); - b = s.find(delim, b + 1); + delimPos.emplace_back(b, delim.size()); + b = s.find(delim, b + delim.size()); } } - // Sort the delimiter start positions - std::sort(delimPos.begin(), delimPos.end()); + // Sort the delimiter start positions by first and largest + std::sort( + delimPos.begin(), delimPos.end(), + [](const auto& l, const auto& r) { return l.second > r.second; }); + std::sort( + delimPos.begin(), delimPos.end(), + [](const auto& l, const auto& r) { return l.first < r.first; }); // Split string std::vector tokens; std::string_view::size_type begin{0}; - for (const auto end : delimPos) { + for (const auto [end, size] : delimPos) { + // ignore nested delimiters + if (end < begin) { + continue; + } + // get from begin to delim start if (auto t = s.substr(begin, end - begin); not t.empty()) { tokens.emplace_back(t); } - begin = end + 1; + begin = end + size; } if (auto t = s.substr(begin); not t.empty()) { tokens.emplace_back(t); diff --git a/tests/src/TestString.cpp b/tests/src/TestString.cpp index 9a6d7e1..c874b5f 100644 --- a/tests/src/TestString.cpp +++ b/tests/src/TestString.cpp @@ -139,13 +139,23 @@ TEST(String, Split) EXPECT_EQ(split(" a b c "), expected); // Space separated (explicit) - EXPECT_EQ(split("a b c", ' '), expected); + EXPECT_EQ(split("a b c", " "), expected); // Comma separated - EXPECT_EQ(split("a,b,c", ','), expected); + EXPECT_EQ(split("a,b,c", ","), expected); // Multi-delimited - EXPECT_EQ(split("a+b-c", '+', '-'), expected); + EXPECT_EQ(split("a+b-c", "+", "-"), expected); + + // Multi-character delimiter + EXPECT_EQ(split("a b->c", " ", "->"), expected); + + // Multi-character, ignore nested + EXPECT_EQ(split("a-b->c", "-", "->"), expected); + + // Overlapping will only consume first delim + expected = {"a", "b", ">c"}; + EXPECT_EQ(split("a--b-->c", "--", "->"), expected); // Sentence expected = {"This", "is", "only", "a", "test."};