utils::string - split functions extended

Summary:
- RSplit added
- split limits added
- tests

Reviewers: teon.banek, dgleich

Reviewed By: teon.banek

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D930
This commit is contained in:
florijan 2017-10-25 13:02:54 +02:00
parent df4933ea0f
commit 9f7ef8e0e9
2 changed files with 103 additions and 12 deletions
src/utils
tests/unit

View File

@ -2,6 +2,7 @@
#include <algorithm>
#include <cctype>
#include <iostream>
#include <iterator>
#include <regex>
#include <sstream>
@ -90,29 +91,70 @@ inline std::string Replace(std::string src, const std::string &match,
/**
* Split string by delimeter and return vector of results.
* If the delimiter is not provided, a different splitting algorithm is used.
* Runs of consecutive whitespace are regarded as a single delimiter.
* Additionally, the result will not contain empty strings at the start of end
* as if the string was trimmed before splitting.
*
* @param src - The string to split.
* @param delimitier - The delimiter to split on.
* @param splits - The maximum number of splits. For the given value N the
* returned vector will contain at most (N + 1) elements. If given a negative
* value, all possible splits are performed.
* @return - a vector of splits.
*/
inline std::vector<std::string> Split(const std::string &src,
const std::string &delimiter) {
const std::string &delimiter,
int splits = -1) {
std::vector<std::string> res;
if (src.empty()) {
return {};
return res;
}
size_t index = 0;
size_t n = std::string::npos;
std::vector<std::string> res;
do {
n = src.find(delimiter, index);
while (splits < 0 || splits-- != 0) {
auto n = src.find(delimiter, index);
if (n == std::string::npos) break;
res.emplace_back(src.substr(index, n - index));
index = n + delimiter.size();
} while (n != std::string::npos);
}
res.emplace_back(src.substr(index));
return res;
}
/**
* Split string by delimeter, from right to left, and return vector of results.
* For example, RSplit("a.b.c.", ".", 1) results in {"a.b", "c"}.
*
* @param src - The string to split.
* @param delimitier - The delimiter to split on.
* @param splits - The maximum number of splits. For the given value N the
* returned vector will contain at most (N + 1) elements. If given a negative
* value, all possible splits are performed.
*/
inline std::vector<std::string> RSplit(const std::string &src,
const std::string &delimiter,
int splits = -1) {
std::vector<std::string> res;
if (src.empty()) {
return res;
}
size_t index = src.size();
while (splits < 0 || splits-- != 0) {
auto n = src.rfind(delimiter, index - 1);
if (n == std::string::npos) break;
res.emplace_back(
src.substr(n + delimiter.size(), index - n - delimiter.size()));
index = n;
if (n == 0) break;
}
res.emplace_back(src.substr(0, index));
std::reverse(res.begin(), res.end());
return res;
}
/**
* Split string by whitespace and return vector of results.
* Runs of consecutive whitespace are regarded as a single delimiter.
* Additionally, the result will not contain empty strings at the start of end
* as if the string was trimmed before splitting.
*/
inline std::vector<std::string> Split(const std::string &src) {
if (src.empty()) {
@ -160,5 +202,4 @@ inline bool EndsWith(const std::string &s, const std::string &suffix) {
inline bool StartsWith(const std::string &s, const std::string &prefix) {
return s.size() >= prefix.size() && s.compare(0, prefix.size(), prefix) == 0;
}
}

View File

@ -0,0 +1,50 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "utils/string.hpp"
using vec = std::vector<std::string>;
TEST(String, SplitNoLimit) {
EXPECT_EQ(utils::Split("aba", "a"), vec({"", "b", ""}));
EXPECT_EQ(utils::Split("aba", "b"), vec({"a", "a"}));
EXPECT_EQ(utils::Split("abba", "b"), vec({"a", "", "a"}));
EXPECT_EQ(utils::Split("aba", "c"), vec{"aba"});
}
TEST(String, RSplitNoLimit) {
// Tests same like for Split
EXPECT_EQ(utils::RSplit("aba", "a"), vec({"", "b", ""}));
EXPECT_EQ(utils::RSplit("aba", "b"), vec({"a", "a"}));
EXPECT_EQ(utils::RSplit("abba", "b"), vec({"a", "", "a"}));
EXPECT_EQ(utils::RSplit("aba", "c"), vec{"aba"});
}
TEST(String, SplitWithLimit) {
EXPECT_EQ(utils::Split("a.b.c.d", ".", 0), vec({"a.b.c.d"}));
EXPECT_EQ(utils::Split("a.b.c.d", ".", 1), vec({"a", "b.c.d"}));
EXPECT_EQ(utils::Split("a.b.c.d", ".", 2), vec({"a", "b", "c.d"}));
EXPECT_EQ(utils::Split("a.b.c.d", ".", 100), vec({"a", "b", "c", "d"}));
EXPECT_EQ(utils::Split("a.b.c.d", ".", -1), vec({"a", "b", "c", "d"}));
EXPECT_EQ(utils::Split("a.b.c.d", ".", -2), vec({"a", "b", "c", "d"}));
EXPECT_EQ(utils::Split("a.b.c.d", ".", -100), vec({"a", "b", "c", "d"}));
EXPECT_EQ(utils::Split("a..b..c", ".", 1), vec({"a", ".b..c"}));
EXPECT_EQ(utils::Split("a..b..c", ".", 2), vec({"a","", "b..c"}));
}
TEST(String, RSplitWithLimit) {
EXPECT_EQ(utils::RSplit("a.b.c.d", ".", 0), vec({"a.b.c.d"}));
EXPECT_EQ(utils::RSplit("a.b.c.d", ".", 1), vec({"a.b.c", "d"}));
EXPECT_EQ(utils::RSplit("a.b.c.d", ".", 2), vec({"a.b", "c", "d"}));
EXPECT_EQ(utils::RSplit("a.b.c.d", ".", 100), vec({"a", "b", "c", "d"}));
EXPECT_EQ(utils::RSplit("a.b.c.d", ".", -1), vec({"a", "b", "c", "d"}));
EXPECT_EQ(utils::RSplit("a.b.c.d", ".", -2), vec({"a", "b", "c", "d"}));
EXPECT_EQ(utils::RSplit("a.b.c.d", ".", -100), vec({"a", "b", "c", "d"}));
EXPECT_EQ(utils::RSplit("a..b..c", ".", 1), vec({"a..b.", "c"}));
EXPECT_EQ(utils::RSplit("a..b..c", ".", 2), vec({"a..b","", "c"}));
}
TEST(String, SplitWhistespace) {
EXPECT_EQ(utils::Split(" "), vec({}));
EXPECT_EQ(utils::Split(" a b "), vec({"a", "b"}));
}