memgraph/src/utils/string.hpp
Teon Banek 38c6625e2c utils: Support std::basic_string with allocators
Reviewers: mtomic, mferencevic, msantl

Reviewed By: mtomic

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D2102
2019-06-03 17:07:49 +02:00

434 lines
14 KiB
C++

/** @file */
#pragma once
#include <algorithm>
#include <cctype>
#include <cstring>
#include <iostream>
#include <iterator>
#include <random>
#include <regex>
#include <sstream>
#include <string>
#include <string_view>
#include <vector>
#include "utils/exceptions.hpp"
namespace utils {
/** Remove whitespace characters from the start of a string. */
inline std::string_view LTrim(const std::string_view &s) {
size_t start = 0;
while (start < s.size() && isspace(s[start])) {
++start;
}
return std::string_view(s.data() + start, s.size() - start);
}
/** Remove characters found in `chars` from the start of a string. */
inline std::string_view LTrim(const std::string_view &s,
const std::string_view &chars) {
size_t start = 0;
while (start < s.size() && chars.find(s[start]) != std::string::npos) {
++start;
}
return std::string_view(s.data() + start, s.size() - start);
}
/** Remove whitespace characters from the end of a string. */
inline std::string_view RTrim(const std::string_view &s) {
size_t count = s.size();
while (count > static_cast<size_t>(0) && isspace(s[count - 1])) {
--count;
}
return std::string_view(s.data(), count);
}
/** Remove characters found in `chars` from the end of a string. */
inline std::string_view RTrim(const std::string_view &s,
const std::string_view &chars) {
size_t count = s.size();
while (count > static_cast<size_t>(0) &&
chars.find(s[count - 1]) != std::string::npos) {
--count;
}
return std::string_view(s.data(), count);
}
/** Remove whitespace characters from the start and from the end of a string. */
inline std::string_view Trim(const std::string_view &s) {
size_t start = 0;
size_t count = s.size();
while (start < s.size() && isspace(s[start])) {
++start;
}
while (count > start && isspace(s[count - 1])) {
--count;
}
return std::string_view(s.data() + start, count - start);
}
/** Remove characters found in `chars` from the start and the end of `s`. */
inline std::string_view Trim(const std::string_view &s,
const std::string_view &chars) {
size_t start = 0;
size_t count = s.size();
while (start < s.size() && chars.find(s[start]) != std::string::npos) {
++start;
}
while (count > start && chars.find(s[count - 1]) != std::string::npos) {
--count;
}
return std::string_view(s.data() + start, count - start);
}
/**
* Lowercase all characters of a string and store the result in `out`.
* Transformation is locale independent.
* @return pointer to `out`.
*/
template <class TAllocator>
std::basic_string<char, std::char_traits<char>, TAllocator> *ToLowerCase(
std::basic_string<char, std::char_traits<char>, TAllocator> *out,
const std::string_view &s) {
out->resize(s.size());
std::transform(s.begin(), s.end(), out->begin(),
[](char c) { return tolower(c); });
return out;
}
/**
* Lowercase all characters of a string.
* Transformation is locale independent.
*/
inline std::string ToLowerCase(const std::string_view &s) {
std::string res;
ToLowerCase(&res, s);
return res;
}
/**
* Uppercase all characters of a string and store the result in `out`.
* Transformation is locale independent.
* @return pointer to `out`.
*/
template <class TAllocator>
std::basic_string<char, std::char_traits<char>, TAllocator> *ToUpperCase(
std::basic_string<char, std::char_traits<char>, TAllocator> *out,
const std::string_view &s) {
out->resize(s.size());
std::transform(s.begin(), s.end(), out->begin(),
[](char c) { return toupper(c); });
return out;
}
/**
* Uppercase all characters of a string and store the result in `out`.
* Transformation is locale independent.
*/
inline std::string ToUpperCase(const std::string_view &s) {
std::string res;
ToUpperCase(&res, s);
return res;
}
/**
* Join the `strings` collection separated by a given separator into `out`.
* @return pointer to `out`.
*/
template <class TCollection, class TAllocator>
std::basic_string<char, std::char_traits<char>, TAllocator> *Join(
std::basic_string<char, std::char_traits<char>, TAllocator> *out,
const TCollection &strings, const std::string_view &separator) {
out->clear();
if (strings.empty()) return out;
int64_t total_size = 0;
for (const auto &x : strings) {
total_size += x.size();
}
total_size += separator.size() * (static_cast<int64_t>(strings.size()) - 1);
out->reserve(total_size);
*out += strings[0];
for (auto it = strings.begin() + 1; it != strings.end(); ++it) {
*out += separator;
*out += *it;
}
return out;
}
/**
* Join the `strings` collection separated by a given separator.
*/
inline std::string Join(const std::vector<std::string> &strings,
const std::string_view &separator) {
std::string res;
Join(&res, strings, separator);
return res;
}
/**
* Replace all occurrences of `match` in `src` with `replacement`.
* @return pointer to `out`.
*/
template <class TAllocator>
std::basic_string<char, std::char_traits<char>, TAllocator> *Replace(
std::basic_string<char, std::char_traits<char>, TAllocator> *out,
const std::string_view &src, const std::string_view &match,
const std::string_view &replacement, const TAllocator &alloc) {
// TODO: This could be implemented much more efficiently.
*out = src;
for (size_t pos = out->find(match); pos != std::string::npos;
pos = out->find(match, pos + replacement.size())) {
out->erase(pos, match.length()).insert(pos, replacement);
}
return out;
}
/** Replace all occurrences of `match` in `src` with `replacement`. */
inline std::string Replace(const std::string_view &src,
const std::string_view &match,
const std::string_view &replacement) {
std::string res;
Replace(&res, src, match, replacement, std::allocator<char>());
return res;
}
/**
* Split a string by `delimiter` with a maximum of `splits` into a vector.
* The vector will have at most `splits` + 1 elements. Negative value of
* `splits` indicates to perform all possible splits.
* @return pointer to `out`.
*/
template <class TString, class TAllocator>
std::vector<TString, TAllocator> *Split(std::vector<TString, TAllocator> *out,
const std::string_view &src,
const std::string_view &delimiter,
int splits = -1) {
out->clear();
if (src.empty()) return out;
size_t index = 0;
while (splits < 0 || splits-- != 0) {
auto n = src.find(delimiter, index);
if (n == std::string::npos) break;
out->emplace_back(src.substr(index, n - index));
index = n + delimiter.size();
}
out->emplace_back(src.substr(index));
return out;
}
/**
* Split a string by `delimiter` with a maximum of `splits` into a vector.
* The vector will have at most `splits` + 1 elements. Negative value of
* `splits` indicates to perform all possible splits.
*/
inline std::vector<std::string> Split(const std::string_view &src,
const std::string_view &delimiter,
int splits = -1) {
std::vector<std::string> res;
Split(&res, src, delimiter, splits);
return res;
}
/**
* Split a string by whitespace into a vector.
* Runs of consecutive whitespace are regarded as a single delimiter.
* Additionally, the result will not contain empty strings at the start or end
* as if the string was trimmed before splitting.
* @return pointer to `out`.
*/
template <class TString, class TAllocator>
std::vector<TString, TAllocator> *Split(std::vector<TString, TAllocator> *out,
const std::string_view &src) {
out->clear();
if (src.empty()) return out;
// TODO: Investigate how much regex allocate and perhaps replace with custom
// solution doing no allocations.
std::regex not_whitespace("[^\\s]+");
auto matches_begin =
std::cregex_iterator(src.data(), src.data() + src.size(), not_whitespace);
auto matches_end = std::cregex_iterator();
out->reserve(std::distance(matches_begin, matches_end));
for (auto match = matches_begin; match != matches_end; ++match) {
std::string_view match_view(&src[match->position()], match->length());
out->emplace_back(match_view);
}
return out;
}
/**
* Split a string by whitespace into a vector.
* Runs of consecutive whitespace are regarded as a single delimiter.
* Additionally, the result will not contain empty strings at the start or end
* as if the string was trimmed before splitting.
*/
inline std::vector<std::string> Split(const std::string_view &src) {
std::vector<std::string> res;
Split(&res, src);
return res;
}
/**
* Like `Split` but string is processed from right to left.
* For example, RSplit("a.b.c.", ".", 1) results in {"a.b", "c"}.
* The returned vector and its elements use `std::allocator<>`. The vector will
* have at most `splits` + 1 elements. Negative value of `splits` indicates to
* perform all possible splits.
* @return pointer to `out`.
*/
template <class TString, class TAllocator>
std::vector<TString, TAllocator> *RSplit(std::vector<TString, TAllocator> *out,
const std::string_view &src,
const std::string_view &delimiter,
int splits = -1) {
out->clear();
if (src.empty()) return out;
size_t index = src.size();
while (splits < 0 || splits-- != 0) {
auto n = src.rfind(delimiter, index - 1);
if (n == std::string::npos) break;
out->emplace_back(
src.substr(n + delimiter.size(), index - n - delimiter.size()));
index = n;
if (n == 0) break;
}
out->emplace_back(src.substr(0, index));
std::reverse(out->begin(), out->end());
return out;
}
/**
* Like `Split` but string is processed from right to left.
* For example, RSplit("a.b.c.", ".", 1) results in {"a.b", "c"}.
* The returned vector and its elements use `std::allocator<>`. The vector will
* have at most `splits` + 1 elements. Negative value of `splits` indicates to
* perform all possible splits.
*/
inline std::vector<std::string> RSplit(const std::string_view &src,
const std::string_view &delimiter,
int splits = -1) {
std::vector<std::string> res;
RSplit(&res, src, delimiter, splits);
return res;
}
/**
* Parse a double floating point value from a string using classic locale.
* Note, the current implementation copies the given string which may perform a
* heap allocation if the string is big enough.
*
* @throw BasicException if unable to parse the whole string.
*/
inline double ParseDouble(const std::string_view &s) {
// stod would be nicer but it uses current locale so we shouldn't use it.
double t = 0.0;
// NOTE: Constructing std::istringstream will make a copy of the string, which
// may make a heap allocation if string is large enough. There is no
// std::istringstream constructor accepting a custom allocator. We could pass
// a std::basic_string with a custom allocator, but std::istringstream will
// probably invoke
// std::allocator_traits<>::select_on_container_copy_construction which
// doesn't really help as most allocators default to global new/delete
// allocator.
std::istringstream iss(std::string(s.data(), s.size()));
iss.imbue(std::locale::classic());
iss >> t;
if (iss.fail() || !iss.eof()) {
throw BasicException("Couldn't parse string");
}
return t;
}
/** Check if the given string `s` ends with the given `suffix`. */
inline bool EndsWith(const std::string_view &s,
const std::string_view &suffix) {
return s.size() >= suffix.size() &&
s.compare(s.size() - suffix.size(), std::string::npos, suffix) == 0;
}
/** Check if the given string `s` starts with the given `prefix`. */
inline bool StartsWith(const std::string_view &s,
const std::string_view &prefix) {
return s.size() >= prefix.size() && s.compare(0, prefix.size(), prefix) == 0;
}
/** Perform case-insensitive string equality test. */
inline bool IEquals(const std::string_view &lhs, const std::string_view &rhs) {
if (lhs.size() != rhs.size()) return false;
for (size_t i = 0; i < lhs.size(); ++i) {
if (tolower(lhs[i]) != tolower(rhs[i])) return false;
}
return true;
}
/**
* Create a random alphanumeric string of the given length.
* @return pointer to `out`.
*/
template <class TAllocator>
std::basic_string<char, std::char_traits<char>, TAllocator> *RandomString(
std::basic_string<char, std::char_traits<char>, TAllocator> *out,
size_t length) {
static const char charset[] =
"0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz";
static thread_local std::mt19937 pseudo_rand_gen{std::random_device{}()};
static thread_local std::uniform_int_distribution<size_t> rand_dist{
0, strlen(charset) - 1};
out->resize(length);
for (size_t i = 0; i < length; ++i)
(*out)[i] = charset[rand_dist(pseudo_rand_gen)];
return out;
}
/** Create a random alphanumeric string of the given length. */
inline std::string RandomString(size_t length) {
std::string res;
RandomString(&res, length);
return res;
}
/**
* Escape all whitespace and quotation characters in the given string.
* @return pointer to `out`.
*/
template <class TAllocator>
std::basic_string<char, std::char_traits<char>, TAllocator> *Escape(
std::basic_string<char, std::char_traits<char>, TAllocator> *out,
const std::string_view &src) {
out->clear();
out->reserve(src.size() + 2);
out->append(1, '"');
for (auto c : src) {
if (c == '\\' || c == '\'' || c == '"') {
out->append(1, '\\');
out->append(1, c);
} else if (c == '\b') {
out->append("\\b");
} else if (c == '\f') {
out->append("\\f");
} else if (c == '\n') {
out->append("\\n");
} else if (c == '\r') {
out->append("\\r");
} else if (c == '\t') {
out->append("\\t");
} else {
out->append(1, c);
}
}
out->append(1, '"');
return out;
}
/** Escape all whitespace and quotation characters in the given string. */
inline std::string Escape(const std::string_view &src) {
std::string res;
Escape(&res, src);
return res;
}
} // namespace utils