memgraph/src/utils/string.hpp
florijan e218bc1c69 Improve utils::RandomString
Reviewers: dgleich

Reviewed By: dgleich

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D1023
2017-12-05 10:10:36 +01:00

222 lines
6.4 KiB
C++

#pragma once
#include <algorithm>
#include <cctype>
#include <cstring>
#include <iostream>
#include <iterator>
#include <random>
#include <regex>
#include <sstream>
#include <string>
#include <vector>
#include "utils/exceptions.hpp"
namespace utils {
/**
* Removes whitespace characters from the start and from the end of a string.
*
* @param str string that is going to be trimmed
*
* @return trimmed string
*/
inline std::string Trim(const std::string &s) {
auto begin = s.begin();
auto end = s.end();
if (begin == end) {
// Need to check this to be sure that prev(end) exists.
return s;
}
while (begin < end && isspace(*begin)) {
++begin;
}
while (end > begin && isspace(*prev(end))) {
--end;
}
return std::string(begin, end);
}
/**
* Return string with all lowercased characters (locale independent).
*/
inline std::string ToLowerCase(std::string s) {
std::transform(s.begin(), s.end(), s.begin(),
[](char c) { return tolower(c); });
return s;
}
/**
* Return string with all uppercased characters (locale independent).
*/
inline std::string ToUpperCase(std::string s) {
std::transform(s.begin(), s.end(), s.begin(),
[](char c) { return toupper(c); });
return s;
}
/**
* Join strings in vector separated by a given separator.
*/
inline std::string Join(const std::vector<std::string> &strings,
const std::string &separator) {
if (strings.size() == 0U) return "";
int64_t total_size = 0;
for (const auto &x : strings) {
total_size += x.size();
}
total_size += separator.size() * (static_cast<int64_t>(strings.size()) - 1);
std::string s;
s.reserve(total_size);
s += strings[0];
for (auto it = strings.begin() + 1; it != strings.end(); ++it) {
s += separator;
s += *it;
}
return s;
}
/**
* Replaces all occurences of <match> in <src> with <replacement>.
*/
// TODO: This could be implemented much more efficient.
inline std::string Replace(std::string src, const std::string &match,
const std::string &replacement) {
for (size_t pos = src.find(match); pos != std::string::npos;
pos = src.find(match, pos + replacement.size())) {
src.erase(pos, match.length()).insert(pos, replacement);
}
return src;
}
/**
* Split string by delimeter and return vector of results.
*
* @param src - The string to split.
* @param delimitier - The delimiter to split on.
* @param splits - The maximum number of splits. For the given value N the
* returned vector will contain at most (N + 1) elements. If given a negative
* value, all possible splits are performed.
* @return - a vector of splits.
*/
inline std::vector<std::string> Split(const std::string &src,
const std::string &delimiter,
int splits = -1) {
std::vector<std::string> res;
if (src.empty()) {
return res;
}
size_t index = 0;
while (splits < 0 || splits-- != 0) {
auto n = src.find(delimiter, index);
if (n == std::string::npos) break;
res.emplace_back(src.substr(index, n - index));
index = n + delimiter.size();
}
res.emplace_back(src.substr(index));
return res;
}
/**
* Split string by delimeter, from right to left, and return vector of results.
* For example, RSplit("a.b.c.", ".", 1) results in {"a.b", "c"}.
*
* @param src - The string to split.
* @param delimitier - The delimiter to split on.
* @param splits - The maximum number of splits. For the given value N the
* returned vector will contain at most (N + 1) elements. If given a negative
* value, all possible splits are performed.
*/
inline std::vector<std::string> RSplit(const std::string &src,
const std::string &delimiter,
int splits = -1) {
std::vector<std::string> res;
if (src.empty()) {
return res;
}
size_t index = src.size();
while (splits < 0 || splits-- != 0) {
auto n = src.rfind(delimiter, index - 1);
if (n == std::string::npos) break;
res.emplace_back(
src.substr(n + delimiter.size(), index - n - delimiter.size()));
index = n;
if (n == 0) break;
}
res.emplace_back(src.substr(0, index));
std::reverse(res.begin(), res.end());
return res;
}
/**
* Split string by whitespace and return vector of results.
* Runs of consecutive whitespace are regarded as a single delimiter.
* Additionally, the result will not contain empty strings at the start of end
* as if the string was trimmed before splitting.
*/
inline std::vector<std::string> Split(const std::string &src) {
if (src.empty()) {
return {};
}
std::regex not_whitespace("[^\\s]+");
auto matches_begin =
std::sregex_iterator(src.begin(), src.end(), not_whitespace);
auto matches_end = std::sregex_iterator();
std::vector<std::string> res;
res.reserve(std::distance(matches_begin, matches_end));
for (auto match = matches_begin; match != matches_end; ++match) {
res.emplace_back(match->str());
}
return res;
}
/**
* Parse double using classic locale, throws BasicException if it wasn't able to
* parse whole string.
*/
inline double ParseDouble(const std::string &s) {
// stod would be nicer but it uses current locale so we shouldn't use it.
double t = 0.0;
std::istringstream iss(s);
iss.imbue(std::locale::classic());
iss >> t;
if (iss.fail() || !iss.eof()) {
throw BasicException("Couldn't parse string");
}
return t;
}
/**
* Checks if the given string `s` ends with the given `suffix`.
*/
inline bool EndsWith(const std::string &s, const std::string &suffix) {
return s.size() >= suffix.size() &&
s.compare(s.size() - suffix.size(), std::string::npos, suffix) == 0;
}
/**
* Checks if the given string `s` starts with the given `prefix`.
*/
inline bool StartsWith(const std::string &s, const std::string &prefix) {
return s.size() >= prefix.size() && s.compare(0, prefix.size(), prefix) == 0;
}
/** Creates a random alphanumeric string of the given length. */
inline std::string RandomString(size_t length) {
static const char charset[] =
"0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz";
static thread_local std::mt19937 pseudo_rand_gen{std::random_device{}()};
static thread_local std::uniform_int_distribution<size_t> rand_dist{
0, strlen(charset) - 1};
std::string str(length, 0);
for (size_t i = 0; i < length; ++i)
str[i] = charset[rand_dist(pseudo_rand_gen)];
return str;
}
} // namespace utils