Implement buffering for utils file wrappers
Summary: This change increases the throughput of the storage v2 durability 20x. With this change, the storage v2 durability is 3x faster than the storage v1 durability in both recovery and snapshotting (before the change v2 durability is slower than v1 durability). Reviewers: teon.banek Reviewed By: teon.banek Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D2529
This commit is contained in:
parent
47d6196b32
commit
a835939a6e
@ -5,6 +5,7 @@
|
|||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
@ -73,8 +74,20 @@ static_assert(std::is_same_v<off_t, ssize_t>, "off_t must fit into ssize_t!");
|
|||||||
InputFile::~InputFile() { Close(); }
|
InputFile::~InputFile() { Close(); }
|
||||||
|
|
||||||
InputFile::InputFile(InputFile &&other) noexcept
|
InputFile::InputFile(InputFile &&other) noexcept
|
||||||
: fd_(other.fd_), path_(std::move(other.path_)) {
|
: fd_(other.fd_),
|
||||||
|
path_(std::move(other.path_)),
|
||||||
|
file_size_(other.file_size_),
|
||||||
|
file_position_(other.file_position_),
|
||||||
|
buffer_start_(other.buffer_start_),
|
||||||
|
buffer_size_(other.buffer_size_),
|
||||||
|
buffer_position_(other.buffer_position_) {
|
||||||
|
memcpy(buffer_, other.buffer_, kFileBufferSize);
|
||||||
other.fd_ = -1;
|
other.fd_ = -1;
|
||||||
|
other.file_size_ = 0;
|
||||||
|
other.file_position_ = 0;
|
||||||
|
other.buffer_start_ = std::nullopt;
|
||||||
|
other.buffer_size_ = 0;
|
||||||
|
other.buffer_position_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
InputFile &InputFile::operator=(InputFile &&other) noexcept {
|
InputFile &InputFile::operator=(InputFile &&other) noexcept {
|
||||||
@ -82,8 +95,19 @@ InputFile &InputFile::operator=(InputFile &&other) noexcept {
|
|||||||
|
|
||||||
fd_ = other.fd_;
|
fd_ = other.fd_;
|
||||||
path_ = std::move(other.path_);
|
path_ = std::move(other.path_);
|
||||||
|
file_size_ = other.file_size_;
|
||||||
|
file_position_ = other.file_position_;
|
||||||
|
buffer_start_ = other.buffer_start_;
|
||||||
|
buffer_size_ = other.buffer_size_;
|
||||||
|
buffer_position_ = other.buffer_position_;
|
||||||
|
memcpy(buffer_, other.buffer_, kFileBufferSize);
|
||||||
|
|
||||||
other.fd_ = -1;
|
other.fd_ = -1;
|
||||||
|
other.file_size_ = 0;
|
||||||
|
other.file_position_ = 0;
|
||||||
|
other.buffer_start_ = std::nullopt;
|
||||||
|
other.buffer_size_ = 0;
|
||||||
|
other.buffer_position_ = 0;
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
@ -105,7 +129,17 @@ bool InputFile::Open(const std::filesystem::path &path) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return fd_ != -1;
|
if (fd_ == -1) return false;
|
||||||
|
|
||||||
|
// Get file size.
|
||||||
|
auto size = SetPosition(Position::RELATIVE_TO_END, 0);
|
||||||
|
if (!size || !SetPosition(Position::SET, 0)) {
|
||||||
|
Close();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
file_size_ = *size;
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool InputFile::IsOpen() const { return fd_ != -1; }
|
bool InputFile::IsOpen() const { return fd_ != -1; }
|
||||||
@ -116,55 +150,45 @@ bool InputFile::Read(uint8_t *data, size_t size) {
|
|||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
|
|
||||||
while (size > 0) {
|
while (size > 0) {
|
||||||
auto got = read(fd_, data + offset, size);
|
auto buffer_left = buffer_size_ - buffer_position_;
|
||||||
if (got == -1 && errno == EINTR) {
|
if (!buffer_start_ || buffer_left == 0) {
|
||||||
|
if (!LoadBuffer()) return false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
auto to_copy = size < buffer_left ? size : buffer_left;
|
||||||
if (got <= 0) {
|
memcpy(data + offset, buffer_ + buffer_position_, to_copy);
|
||||||
return false;
|
size -= to_copy;
|
||||||
}
|
offset += to_copy;
|
||||||
|
buffer_position_ += to_copy;
|
||||||
size -= got;
|
|
||||||
offset += got;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool InputFile::Peek(uint8_t *data, size_t size) {
|
bool InputFile::Peek(uint8_t *data, size_t size) {
|
||||||
size_t offset = 0;
|
auto old_buffer_start = buffer_start_;
|
||||||
|
auto old_buffer_position = buffer_position_;
|
||||||
|
auto real_position = GetPosition();
|
||||||
|
|
||||||
while (size > 0) {
|
auto ret = Read(data, size);
|
||||||
auto got = read(fd_, data + offset, size);
|
|
||||||
if (got == -1 && errno == EINTR) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (got <= 0) {
|
if (buffer_start_ == old_buffer_start) {
|
||||||
SetPosition(Position::RELATIVE_TO_CURRENT, -offset);
|
// If we are still within the same buffer (eg. the `size` was small enough),
|
||||||
return false;
|
// we don't reset the buffer and just set the buffer position to the old
|
||||||
}
|
// buffer position.
|
||||||
|
buffer_position_ = old_buffer_position;
|
||||||
size -= got;
|
} else {
|
||||||
offset += got;
|
SetPosition(Position::SET, real_position);
|
||||||
}
|
}
|
||||||
|
|
||||||
SetPosition(Position::RELATIVE_TO_CURRENT, -offset);
|
return ret;
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<size_t> InputFile::GetSize() {
|
size_t InputFile::GetSize() { return file_size_; }
|
||||||
auto current = GetPosition();
|
|
||||||
if (!current) return std::nullopt;
|
|
||||||
auto size = SetPosition(Position::RELATIVE_TO_END, 0);
|
|
||||||
if (!size) return std::nullopt;
|
|
||||||
if (!SetPosition(Position::SET, *current)) return std::nullopt;
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::optional<size_t> InputFile::GetPosition() {
|
size_t InputFile::GetPosition() {
|
||||||
return SetPosition(Position::RELATIVE_TO_CURRENT, 0);
|
if (buffer_start_) return *buffer_start_ + buffer_position_;
|
||||||
|
return file_position_;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<size_t> InputFile::SetPosition(Position position,
|
std::optional<size_t> InputFile::SetPosition(Position position,
|
||||||
@ -187,6 +211,10 @@ std::optional<size_t> InputFile::SetPosition(Position position,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (pos < 0) return std::nullopt;
|
if (pos < 0) return std::nullopt;
|
||||||
|
file_position_ = pos;
|
||||||
|
buffer_start_ = std::nullopt;
|
||||||
|
buffer_size_ = 0;
|
||||||
|
buffer_position_ = 0;
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -216,6 +244,39 @@ void InputFile::Close() noexcept {
|
|||||||
fd_ = -1;
|
fd_ = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool InputFile::LoadBuffer() {
|
||||||
|
buffer_start_ = std::nullopt;
|
||||||
|
buffer_size_ = 0;
|
||||||
|
buffer_position_ = 0;
|
||||||
|
|
||||||
|
size_t size = kFileBufferSize;
|
||||||
|
if (file_position_ + size >= file_size_) {
|
||||||
|
size = file_size_ - file_position_;
|
||||||
|
}
|
||||||
|
if (size == 0) return false;
|
||||||
|
buffer_size_ = size;
|
||||||
|
|
||||||
|
size_t offset = 0;
|
||||||
|
while (size > 0) {
|
||||||
|
auto got = read(fd_, buffer_ + offset, size);
|
||||||
|
if (got == -1 && errno == EINTR) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (got <= 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
size -= got;
|
||||||
|
offset += got;
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer_start_ = file_position_;
|
||||||
|
file_position_ += buffer_size_;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
OutputFile::~OutputFile() {
|
OutputFile::~OutputFile() {
|
||||||
if (IsOpen()) Close();
|
if (IsOpen()) Close();
|
||||||
}
|
}
|
||||||
@ -223,9 +284,12 @@ OutputFile::~OutputFile() {
|
|||||||
OutputFile::OutputFile(OutputFile &&other) noexcept
|
OutputFile::OutputFile(OutputFile &&other) noexcept
|
||||||
: fd_(other.fd_),
|
: fd_(other.fd_),
|
||||||
written_since_last_sync_(other.written_since_last_sync_),
|
written_since_last_sync_(other.written_since_last_sync_),
|
||||||
path_(std::move(other.path_)) {
|
path_(std::move(other.path_)),
|
||||||
|
buffer_position_(other.buffer_position_) {
|
||||||
|
memcpy(buffer_, other.buffer_, kFileBufferSize);
|
||||||
other.fd_ = -1;
|
other.fd_ = -1;
|
||||||
other.written_since_last_sync_ = 0;
|
other.written_since_last_sync_ = 0;
|
||||||
|
other.buffer_position_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
OutputFile &OutputFile::operator=(OutputFile &&other) noexcept {
|
OutputFile &OutputFile::operator=(OutputFile &&other) noexcept {
|
||||||
@ -234,9 +298,12 @@ OutputFile &OutputFile::operator=(OutputFile &&other) noexcept {
|
|||||||
fd_ = other.fd_;
|
fd_ = other.fd_;
|
||||||
written_since_last_sync_ = other.written_since_last_sync_;
|
written_since_last_sync_ = other.written_since_last_sync_;
|
||||||
path_ = std::move(other.path_);
|
path_ = std::move(other.path_);
|
||||||
|
buffer_position_ = other.buffer_position_;
|
||||||
|
memcpy(buffer_, other.buffer_, kFileBufferSize);
|
||||||
|
|
||||||
other.fd_ = -1;
|
other.fd_ = -1;
|
||||||
other.written_since_last_sync_ = 0;
|
other.written_since_last_sync_ = 0;
|
||||||
|
other.buffer_position_ = 0;
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
@ -275,28 +342,21 @@ bool OutputFile::IsOpen() const { return fd_ != -1; }
|
|||||||
|
|
||||||
const std::filesystem::path &OutputFile::path() const { return path_; }
|
const std::filesystem::path &OutputFile::path() const { return path_; }
|
||||||
|
|
||||||
void OutputFile::Write(const char *data, size_t size) {
|
void OutputFile::Write(const uint8_t *data, size_t size) {
|
||||||
while (size > 0) {
|
while (size > 0) {
|
||||||
auto written = write(fd_, data, size);
|
FlushBuffer(false);
|
||||||
if (written == -1 && errno == EINTR) {
|
auto buffer_left = kFileBufferSize - buffer_position_;
|
||||||
continue;
|
auto to_write = size < buffer_left ? size : buffer_left;
|
||||||
}
|
memcpy(buffer_ + buffer_position_, data, to_write);
|
||||||
|
size -= to_write;
|
||||||
CHECK(written > 0)
|
data += to_write;
|
||||||
<< "While trying to write to " << path_
|
buffer_position_ += to_write;
|
||||||
<< " an error occurred: " << strerror(errno) << " (" << errno
|
written_since_last_sync_ += to_write;
|
||||||
<< "). Possibly " << size
|
|
||||||
<< " bytes of data were lost from this call and possibly "
|
|
||||||
<< written_since_last_sync_ << " bytes were lost from previous calls.";
|
|
||||||
|
|
||||||
size -= written;
|
|
||||||
data += written;
|
|
||||||
written_since_last_sync_ += written;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void OutputFile::Write(const uint8_t *data, size_t size) {
|
void OutputFile::Write(const char *data, size_t size) {
|
||||||
Write(reinterpret_cast<const char *>(data), size);
|
Write(reinterpret_cast<const uint8_t *>(data), size);
|
||||||
}
|
}
|
||||||
void OutputFile::Write(const std::string_view &data) {
|
void OutputFile::Write(const std::string_view &data) {
|
||||||
Write(data.data(), data.size());
|
Write(data.data(), data.size());
|
||||||
@ -307,6 +367,8 @@ size_t OutputFile::GetPosition() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
size_t OutputFile::SetPosition(Position position, ssize_t offset) {
|
size_t OutputFile::SetPosition(Position position, ssize_t offset) {
|
||||||
|
FlushBuffer(true);
|
||||||
|
|
||||||
int whence;
|
int whence;
|
||||||
switch (position) {
|
switch (position) {
|
||||||
case Position::SET:
|
case Position::SET:
|
||||||
@ -332,6 +394,8 @@ size_t OutputFile::SetPosition(Position position, ssize_t offset) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void OutputFile::Sync() {
|
void OutputFile::Sync() {
|
||||||
|
FlushBuffer(true);
|
||||||
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
while (true) {
|
while (true) {
|
||||||
ret = fsync(fd_);
|
ret = fsync(fd_);
|
||||||
@ -378,6 +442,8 @@ void OutputFile::Sync() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void OutputFile::Close() noexcept {
|
void OutputFile::Close() noexcept {
|
||||||
|
FlushBuffer(true);
|
||||||
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
while (true) {
|
while (true) {
|
||||||
ret = close(fd_);
|
ret = close(fd_);
|
||||||
@ -400,4 +466,32 @@ void OutputFile::Close() noexcept {
|
|||||||
written_since_last_sync_ = 0;
|
written_since_last_sync_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void OutputFile::FlushBuffer(bool force_flush) {
|
||||||
|
CHECK(IsOpen());
|
||||||
|
|
||||||
|
if (!force_flush && buffer_position_ < kFileBufferSize) return;
|
||||||
|
|
||||||
|
CHECK(buffer_position_ <= kFileBufferSize)
|
||||||
|
<< "While trying to write to " << path_
|
||||||
|
<< " more file was written to the buffer than the buffer has space!";
|
||||||
|
|
||||||
|
auto *buffer = buffer_;
|
||||||
|
while (buffer_position_ > 0) {
|
||||||
|
auto written = write(fd_, buffer, buffer_position_);
|
||||||
|
if (written == -1 && errno == EINTR) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
CHECK(written > 0)
|
||||||
|
<< "While trying to write to " << path_
|
||||||
|
<< " an error occurred: " << strerror(errno) << " (" << errno
|
||||||
|
<< "). Possibly " << buffer_position_
|
||||||
|
<< " bytes of data were lost from this call and possibly "
|
||||||
|
<< written_since_last_sync_ << " bytes were lost from previous calls.";
|
||||||
|
|
||||||
|
buffer_position_ -= written;
|
||||||
|
buffer += written;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace utils
|
} // namespace utils
|
||||||
|
@ -45,6 +45,13 @@ bool CopyFile(const std::filesystem::path &src,
|
|||||||
bool RenamePath(const std::filesystem::path &src,
|
bool RenamePath(const std::filesystem::path &src,
|
||||||
const std::filesystem::path &dst);
|
const std::filesystem::path &dst);
|
||||||
|
|
||||||
|
/// Buffer size used for `InputFile` and `OutputFile` implementations. Using
|
||||||
|
/// system calls is very expensive and we can't afford to call either `read` or
|
||||||
|
/// `write` for each of our (very small) logical reads/writes. Because of that,
|
||||||
|
/// `read` or `write` is only called when the buffer is full and/or needs
|
||||||
|
/// emptying.
|
||||||
|
const size_t kFileBufferSize = 262144;
|
||||||
|
|
||||||
/// This class implements a file handler that is used to read binary files. It
|
/// This class implements a file handler that is used to read binary files. It
|
||||||
/// was developed because the C++ standard library has an awful API and makes
|
/// was developed because the C++ standard library has an awful API and makes
|
||||||
/// handling of binary data extremely tedious.
|
/// handling of binary data extremely tedious.
|
||||||
@ -89,13 +96,11 @@ class InputFile {
|
|||||||
/// doesn't change the current position in the file.
|
/// doesn't change the current position in the file.
|
||||||
bool Peek(uint8_t *data, size_t size);
|
bool Peek(uint8_t *data, size_t size);
|
||||||
|
|
||||||
/// This method gets the size of the file. On failure it returns
|
/// This method gets the size of the file.
|
||||||
/// `std::nullopt`.
|
size_t GetSize();
|
||||||
std::optional<size_t> GetSize();
|
|
||||||
|
|
||||||
/// This method gets the current absolute position in the file. On failure it
|
/// This method gets the current absolute position in the file.
|
||||||
/// returns `std::nullopt`.
|
size_t GetPosition();
|
||||||
std::optional<size_t> GetPosition();
|
|
||||||
|
|
||||||
/// This method sets the current position in the file and returns the absolute
|
/// This method sets the current position in the file and returns the absolute
|
||||||
/// set position in the file. The position is set to `offset` with the
|
/// set position in the file. The position is set to `offset` with the
|
||||||
@ -107,8 +112,17 @@ class InputFile {
|
|||||||
void Close() noexcept;
|
void Close() noexcept;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
bool LoadBuffer();
|
||||||
|
|
||||||
int fd_{-1};
|
int fd_{-1};
|
||||||
std::filesystem::path path_;
|
std::filesystem::path path_;
|
||||||
|
size_t file_size_{0};
|
||||||
|
size_t file_position_{0};
|
||||||
|
|
||||||
|
uint8_t buffer_[kFileBufferSize];
|
||||||
|
std::optional<size_t> buffer_start_;
|
||||||
|
size_t buffer_size_{0};
|
||||||
|
size_t buffer_position_{0};
|
||||||
};
|
};
|
||||||
|
|
||||||
/// This class implements a file handler that is used for mission critical files
|
/// This class implements a file handler that is used for mission critical files
|
||||||
@ -171,8 +185,8 @@ class OutputFile {
|
|||||||
|
|
||||||
/// Writes data to the currently opened file. On failure and misuse it crashes
|
/// Writes data to the currently opened file. On failure and misuse it crashes
|
||||||
/// the program.
|
/// the program.
|
||||||
void Write(const char *data, size_t size);
|
|
||||||
void Write(const uint8_t *data, size_t size);
|
void Write(const uint8_t *data, size_t size);
|
||||||
|
void Write(const char *data, size_t size);
|
||||||
void Write(const std::string_view &data);
|
void Write(const std::string_view &data);
|
||||||
|
|
||||||
/// This method gets the current absolute position in the file. On failure and
|
/// This method gets the current absolute position in the file. On failure and
|
||||||
@ -194,9 +208,13 @@ class OutputFile {
|
|||||||
void Close() noexcept;
|
void Close() noexcept;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void FlushBuffer(bool force_flush);
|
||||||
|
|
||||||
int fd_{-1};
|
int fd_{-1};
|
||||||
size_t written_since_last_sync_{0};
|
size_t written_since_last_sync_{0};
|
||||||
std::filesystem::path path_;
|
std::filesystem::path path_;
|
||||||
|
uint8_t buffer_[kFileBufferSize];
|
||||||
|
size_t buffer_position_{0};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace utils
|
} // namespace utils
|
||||||
|
Loading…
Reference in New Issue
Block a user