Skip to content

Commit

Permalink
(String) Add split support for multi-character delimiters (#5)
Browse files Browse the repository at this point in the history
`split` support for multi-character delimiters:
```c++
split("a b->c***d", " ", "->", "***");    // {"a", "b", "c", "d"}
```

To support multi-character delimiters, the delimiters must now be
`string` or `string_view` and not `char`.
  • Loading branch information
csparker247 authored Aug 8, 2024
1 parent 5a1b9c1 commit 3fa81bd
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 16 deletions.
4 changes: 2 additions & 2 deletions examples/StringExample.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ auto main() -> int
// a b c

// Split comma separated
for (const auto& s : split("a,b,c", ',')) {
for (const auto& s : split("a,b,c", ",")) {
std::cout << s << " ";
}
std::cout << "\n";
// a b c

// Split multiple delimiters
for (const auto& s : split("a+b-c", '+', '-')) {
for (const auto& s : split("a+b-c", "+", "-")) {
std::cout << s << " ";
}
std::cout << "\n";
Expand Down
45 changes: 34 additions & 11 deletions include/educelab/core/utils/String.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,40 +150,63 @@ static auto trim_copy(const std::string_view s) -> std::string
return std::string{trim(s)};
}

/** @brief Split a string by a delimiter */
/**
* @brief Split a string by a delimiter
*
* When provided conflicting delimiters, the largest delimiter will take
* precedence:
*
* ```{.cpp}
* split("a->b->c", "-", "->"); // returns {"a", "b", "c"}
* ```
*/
template <typename... Ds>
static auto split(std::string_view s, const Ds&... ds)
-> std::vector<std::string_view>
{
constexpr std::string_view DEFAULT_DELIM{" "};

// Build delimiters list
std::vector<char> delimiters;
std::vector<std::string_view> delimiters;
if (sizeof...(ds) > 0) {
delimiters = {ds...};
} else {
delimiters.emplace_back(' ');
delimiters.emplace_back(DEFAULT_DELIM);
}

// Get a list of all delimiter start positions
std::vector<std::string_view::size_type> delimPos;
// Get a list of all delimiter start pos and sizes
std::vector<
std::pair<std::string_view::size_type, std::string_view::size_type>>
delimPos;
for (const auto& delim : delimiters) {
auto b = s.find(delim, 0);
while (b != std::string_view::npos) {
delimPos.emplace_back(b);
b = s.find(delim, b + 1);
delimPos.emplace_back(b, delim.size());
b = s.find(delim, b + delim.size());
}
}

// Sort the delimiter start positions
std::sort(delimPos.begin(), delimPos.end());
// Sort the delimiter start positions by first and largest
std::sort(
delimPos.begin(), delimPos.end(),
[](const auto& l, const auto& r) { return l.second > r.second; });
std::sort(
delimPos.begin(), delimPos.end(),
[](const auto& l, const auto& r) { return l.first < r.first; });

// Split string
std::vector<std::string_view> tokens;
std::string_view::size_type begin{0};
for (const auto end : delimPos) {
for (const auto [end, size] : delimPos) {
// ignore nested delimiters
if (end < begin) {
continue;
}
// get from begin to delim start
if (auto t = s.substr(begin, end - begin); not t.empty()) {
tokens.emplace_back(t);
}
begin = end + 1;
begin = end + size;
}
if (auto t = s.substr(begin); not t.empty()) {
tokens.emplace_back(t);
Expand Down
16 changes: 13 additions & 3 deletions tests/src/TestString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,13 +139,23 @@ TEST(String, Split)
EXPECT_EQ(split(" a b c "), expected);

// Space separated (explicit)
EXPECT_EQ(split("a b c", ' '), expected);
EXPECT_EQ(split("a b c", " "), expected);

// Comma separated
EXPECT_EQ(split("a,b,c", ','), expected);
EXPECT_EQ(split("a,b,c", ","), expected);

// Multi-delimited
EXPECT_EQ(split("a+b-c", '+', '-'), expected);
EXPECT_EQ(split("a+b-c", "+", "-"), expected);

// Multi-character delimiter
EXPECT_EQ(split("a b->c", " ", "->"), expected);

// Multi-character, ignore nested
EXPECT_EQ(split("a-b->c", "-", "->"), expected);

// Overlapping will only consume first delim
expected = {"a", "b", ">c"};
EXPECT_EQ(split("a--b-->c", "--", "->"), expected);

// Sentence
expected = {"This", "is", "only", "a", "test."};
Expand Down

0 comments on commit 3fa81bd

Please sign in to comment.