Add functions for treating character strings as byte strings
Reviewers: mferencevic, ipaljak Reviewed By: mferencevic, ipaljak Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D2565
This commit is contained in:
parent
7467d52d5b
commit
673ebf428c
@ -1038,6 +1038,66 @@ TypedValue Substring(const TypedValue *args, int64_t nargs,
|
||||
return TypedValue(utils::Substr(str, start, len), ctx.memory);
|
||||
}
|
||||
|
||||
TypedValue ToByteString(const TypedValue *args, int64_t nargs,
|
||||
const FunctionContext &ctx) {
|
||||
FType<String>("toByteString", args, nargs);
|
||||
const auto &str = args[0].ValueString();
|
||||
if (str.empty()) return TypedValue("", ctx.memory);
|
||||
if (!utils::StartsWith(str, "0x") && !utils::StartsWith(str, "0X")) {
|
||||
throw QueryRuntimeException("'toByteString' argument must start with '0x'");
|
||||
}
|
||||
const auto &hex_str = utils::Substr(str, 2);
|
||||
auto read_hex = [](const char ch) -> unsigned char {
|
||||
if (ch >= '0' && ch <= '9') return ch - '0';
|
||||
if (ch >= 'a' && ch <= 'f') return ch - 'a' + 10;
|
||||
if (ch >= 'A' && ch <= 'F') return ch - 'A' + 10;
|
||||
throw QueryRuntimeException(
|
||||
"'toByteString' argument has an invalid character '{}'", ch);
|
||||
};
|
||||
utils::pmr::string bytes(ctx.memory);
|
||||
bytes.reserve((1 + hex_str.size()) / 2);
|
||||
size_t i = 0;
|
||||
// Treat odd length hex string as having a leading zero.
|
||||
if (hex_str.size() % 2) bytes.append(1, read_hex(hex_str[i++]));
|
||||
for (; i < hex_str.size(); i += 2) {
|
||||
unsigned char byte = read_hex(hex_str[i]) * 16U + read_hex(hex_str[i + 1]);
|
||||
// MemcpyCast in case we are converting to a signed value, so as to avoid
|
||||
// undefined behaviour.
|
||||
bytes.append(1, utils::MemcpyCast<decltype(bytes)::value_type>(byte));
|
||||
}
|
||||
return TypedValue(std::move(bytes));
|
||||
}
|
||||
|
||||
TypedValue FromByteString(const TypedValue *args, int64_t nargs,
|
||||
const FunctionContext &ctx) {
|
||||
FType<String, Optional<PositiveInteger>>("fromByteString", args, nargs);
|
||||
const auto &bytes = args[0].ValueString();
|
||||
if (bytes.empty()) return TypedValue("", ctx.memory);
|
||||
size_t min_length = bytes.size();
|
||||
if (nargs == 2)
|
||||
min_length = std::max(min_length, static_cast<size_t>(args[1].ValueInt()));
|
||||
utils::pmr::string str(ctx.memory);
|
||||
str.reserve(min_length * 2 + 2);
|
||||
str.append("0x");
|
||||
for (size_t pad = 0; pad < min_length - bytes.size(); ++pad)
|
||||
str.append(2, '0');
|
||||
// Convert the bytes to a character string in hex representation.
|
||||
// Unfortunately, we don't know whether the default `char` is signed or
|
||||
// unsigned, so we have to work around any potential undefined behaviour when
|
||||
// conversions between the 2 occur. That's why this function is more
|
||||
// complicated than it should be.
|
||||
auto to_hex = [](const unsigned char val) -> char {
|
||||
unsigned char ch = val < 10U ? static_cast<unsigned char>('0') + val
|
||||
: static_cast<unsigned char>('a') + val - 10U;
|
||||
return utils::MemcpyCast<char>(ch);
|
||||
};
|
||||
for (unsigned char byte : bytes) {
|
||||
str.append(1, to_hex(byte / 16U));
|
||||
str.append(1, to_hex(byte % 16U));
|
||||
}
|
||||
return TypedValue(std::move(str));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::function<TypedValue(const TypedValue *, int64_t,
|
||||
@ -1114,6 +1174,8 @@ NameToFunction(const std::string &function_name) {
|
||||
// Memgraph specific functions
|
||||
if (function_name == "ASSERT") return Assert;
|
||||
if (function_name == "COUNTER") return Counter;
|
||||
if (function_name == "TOBYTESTRING") return ToByteString;
|
||||
if (function_name == "FROMBYTESTRING") return FromByteString;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -1705,4 +1705,38 @@ TEST_F(FunctionTest, ToUpper) {
|
||||
EXPECT_EQ(EvaluateFunction("TOUPPER", "Ab__C").ValueString(), "AB__C");
|
||||
}
|
||||
|
||||
TEST_F(FunctionTest, ToByteString) {
|
||||
EXPECT_THROW(EvaluateFunction("TOBYTESTRING"), QueryRuntimeException);
|
||||
EXPECT_THROW(EvaluateFunction("TOBYTESTRING", 42), QueryRuntimeException);
|
||||
EXPECT_THROW(EvaluateFunction("TOBYTESTRING", TypedValue()),
|
||||
QueryRuntimeException);
|
||||
EXPECT_THROW(EvaluateFunction("TOBYTESTRING", "", 42), QueryRuntimeException);
|
||||
EXPECT_THROW(EvaluateFunction("TOBYTESTRING", "ff"), QueryRuntimeException);
|
||||
EXPECT_THROW(EvaluateFunction("TOBYTESTRING", "00"), QueryRuntimeException);
|
||||
EXPECT_THROW(EvaluateFunction("TOBYTESTRING", "0xG"), QueryRuntimeException);
|
||||
EXPECT_EQ(EvaluateFunction("TOBYTESTRING", "").ValueString(), "");
|
||||
EXPECT_EQ(EvaluateFunction("TOBYTESTRING", "0x").ValueString(), "");
|
||||
EXPECT_EQ(EvaluateFunction("TOBYTESTRING", "0X").ValueString(), "");
|
||||
EXPECT_EQ(EvaluateFunction("TOBYTESTRING", "0x0123456789aAbBcCdDeEfF")
|
||||
.ValueString(),
|
||||
"\x01\x23\x45\x67\x89\xAA\xBB\xCC\xDD\xEE\xFF");
|
||||
EXPECT_EQ(EvaluateFunction("TOBYTESTRING", "0x042").ValueString().size(), 2);
|
||||
EXPECT_EQ(EvaluateFunction("TOBYTESTRING", "0x042").ValueString(),
|
||||
utils::pmr::string("\x00\x42", 2, utils::NewDeleteResource()));
|
||||
}
|
||||
|
||||
TEST_F(FunctionTest, FromByteString) {
|
||||
EXPECT_THROW(EvaluateFunction("FROMBYTESTRING"), QueryRuntimeException);
|
||||
EXPECT_THROW(EvaluateFunction("FROMBYTESTRING", 42), QueryRuntimeException);
|
||||
EXPECT_THROW(EvaluateFunction("FROMBYTESTRING", TypedValue()),
|
||||
QueryRuntimeException);
|
||||
EXPECT_EQ(EvaluateFunction("FROMBYTESTRING", "").ValueString(), "");
|
||||
auto bytestring = EvaluateFunction("TOBYTESTRING", "0x123456789aAbBcCdDeEfF");
|
||||
EXPECT_EQ(EvaluateFunction("FROMBYTESTRING", bytestring).ValueString(),
|
||||
"0x0123456789aabbccddeeff");
|
||||
EXPECT_EQ(EvaluateFunction("FROMBYTESTRING", std::string("\x00\x42", 2))
|
||||
.ValueString(),
|
||||
"0x0042");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
Loading…
Reference in New Issue
Block a user