2018-01-16 20:34:19 +08:00
|
|
|
#include "utils/file.hpp"
|
2018-01-15 18:16:19 +08:00
|
|
|
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
2019-02-14 17:34:09 +08:00
|
|
|
#include <fstream>
|
2019-09-12 20:49:45 +08:00
|
|
|
#include <type_traits>
|
2018-01-15 18:16:19 +08:00
|
|
|
|
2019-02-14 17:34:09 +08:00
|
|
|
#include <glog/logging.h>
|
2018-01-16 20:34:19 +08:00
|
|
|
|
2019-02-14 17:34:09 +08:00
|
|
|
namespace utils {
|
2018-01-16 20:34:19 +08:00
|
|
|
|
2019-04-23 17:00:49 +08:00
|
|
|
std::vector<std::string> ReadLines(const std::filesystem::path &path) noexcept {
|
2018-01-16 20:34:19 +08:00
|
|
|
std::vector<std::string> lines;
|
2018-01-15 18:16:19 +08:00
|
|
|
|
2018-01-16 20:34:19 +08:00
|
|
|
std::ifstream stream(path.c_str());
|
2019-02-14 17:34:09 +08:00
|
|
|
// We don't have to check the failed bit of the stream because `getline` won't
|
|
|
|
// read anything in that case and that is exactly the behavior that we want.
|
2018-01-16 20:34:19 +08:00
|
|
|
std::string line;
|
|
|
|
while (std::getline(stream, line)) {
|
|
|
|
lines.emplace_back(line);
|
|
|
|
}
|
|
|
|
|
|
|
|
return lines;
|
2018-01-15 18:16:19 +08:00
|
|
|
}
|
|
|
|
|
2019-04-23 17:00:49 +08:00
|
|
|
bool EnsureDir(const std::filesystem::path &dir) noexcept {
|
2019-02-14 17:34:09 +08:00
|
|
|
std::error_code error_code; // For exception suppression.
|
2019-04-23 17:00:49 +08:00
|
|
|
if (std::filesystem::exists(dir, error_code))
|
|
|
|
return std::filesystem::is_directory(dir, error_code);
|
|
|
|
return std::filesystem::create_directories(dir, error_code);
|
2018-01-15 18:16:19 +08:00
|
|
|
}
|
|
|
|
|
2019-04-23 17:00:49 +08:00
|
|
|
void EnsureDirOrDie(const std::filesystem::path &dir) {
|
2019-02-14 17:34:09 +08:00
|
|
|
CHECK(EnsureDir(dir)) << "Couldn't create directory '" << dir
|
|
|
|
<< "' due to a permission issue or the path exists and "
|
|
|
|
"isn't a directory!";
|
2018-04-27 17:23:40 +08:00
|
|
|
}
|
|
|
|
|
2019-04-23 17:00:49 +08:00
|
|
|
bool DeleteDir(const std::filesystem::path &dir) noexcept {
|
2019-02-14 17:34:09 +08:00
|
|
|
std::error_code error_code; // For exception suppression.
|
2019-04-23 17:00:49 +08:00
|
|
|
if (!std::filesystem::is_directory(dir, error_code)) return false;
|
|
|
|
return std::filesystem::remove_all(dir, error_code) > 0;
|
2018-04-27 17:23:40 +08:00
|
|
|
}
|
|
|
|
|
2019-04-23 17:00:49 +08:00
|
|
|
bool CopyFile(const std::filesystem::path &src,
|
|
|
|
const std::filesystem::path &dst) noexcept {
|
2019-02-14 17:34:09 +08:00
|
|
|
std::error_code error_code; // For exception suppression.
|
2019-04-23 17:00:49 +08:00
|
|
|
return std::filesystem::copy_file(src, dst, error_code);
|
2018-08-06 19:05:42 +08:00
|
|
|
}
|
|
|
|
|
2019-09-12 20:49:45 +08:00
|
|
|
static_assert(std::is_same_v<off_t, ssize_t>, "off_t must fit into ssize_t!");
|
|
|
|
|
|
|
|
InputFile::~InputFile() {
|
|
|
|
if (IsOpen()) Close();
|
|
|
|
}
|
|
|
|
|
|
|
|
InputFile::InputFile(InputFile &&other) noexcept
|
|
|
|
: fd_(other.fd_), path_(std::move(other.path_)) {
|
|
|
|
other.fd_ = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
InputFile &InputFile::operator=(InputFile &&other) noexcept {
|
|
|
|
if (IsOpen()) Close();
|
|
|
|
|
|
|
|
fd_ = other.fd_;
|
|
|
|
path_ = std::move(other.path_);
|
|
|
|
|
|
|
|
other.fd_ = -1;
|
|
|
|
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
void InputFile::Open(const std::filesystem::path &path) {
|
|
|
|
CHECK(!IsOpen())
|
|
|
|
<< "While trying to open " << path
|
|
|
|
<< " for writing the database used a handle that already has " << path_
|
|
|
|
<< " opened in it!";
|
|
|
|
|
|
|
|
path_ = path;
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
fd_ = open(path_.c_str(), O_RDONLY | O_CLOEXEC);
|
|
|
|
if (fd_ == -1 && errno == EINTR) {
|
|
|
|
// The call was interrupted, try again...
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
// All other possible errors are fatal errors and are handled in the CHECK
|
|
|
|
// below.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
CHECK(fd_ != -1) << "While trying to open " << path_
|
|
|
|
<< " for reading an error occurred: " << strerror(errno)
|
|
|
|
<< " (" << errno << ").";
|
|
|
|
}
|
|
|
|
|
|
|
|
bool InputFile::IsOpen() const { return fd_ != -1; }
|
|
|
|
|
|
|
|
const std::filesystem::path &InputFile::path() const { return path_; }
|
|
|
|
|
|
|
|
bool InputFile::Read(uint8_t *data, size_t size) {
|
|
|
|
size_t offset = 0;
|
|
|
|
|
|
|
|
while (size > 0) {
|
|
|
|
auto got = read(fd_, data + offset, size);
|
|
|
|
if (got == -1 && errno == EINTR) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (got <= 0) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
size -= got;
|
|
|
|
offset += got;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool InputFile::Peek(uint8_t *data, size_t size) {
|
|
|
|
size_t offset = 0;
|
|
|
|
|
|
|
|
while (size > 0) {
|
|
|
|
auto got = read(fd_, data + offset, size);
|
|
|
|
if (got == -1 && errno == EINTR) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (got <= 0) {
|
|
|
|
SetPosition(Position::RELATIVE_TO_CURRENT, -offset);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
size -= got;
|
|
|
|
offset += got;
|
|
|
|
}
|
|
|
|
|
|
|
|
SetPosition(Position::RELATIVE_TO_CURRENT, -offset);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t InputFile::GetSize() {
|
|
|
|
size_t current = GetPosition();
|
|
|
|
size_t size = SetPosition(Position::RELATIVE_TO_END, 0);
|
|
|
|
SetPosition(Position::SET, current);
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t InputFile::GetPosition() {
|
|
|
|
return SetPosition(Position::RELATIVE_TO_CURRENT, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t InputFile::SetPosition(Position position, ssize_t offset) {
|
|
|
|
int whence;
|
|
|
|
switch (position) {
|
|
|
|
case Position::SET:
|
|
|
|
whence = SEEK_SET;
|
|
|
|
break;
|
|
|
|
case Position::RELATIVE_TO_CURRENT:
|
|
|
|
whence = SEEK_CUR;
|
|
|
|
break;
|
|
|
|
case Position::RELATIVE_TO_END:
|
|
|
|
whence = SEEK_END;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
while (true) {
|
|
|
|
auto pos = lseek(fd_, offset, whence);
|
|
|
|
if (pos == -1 && errno == EINTR) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
CHECK(pos >= 0) << "While trying to set the position in " << path_
|
|
|
|
<< " an error occurred: " << strerror(errno) << " ("
|
|
|
|
<< errno << ").";
|
|
|
|
return pos;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void InputFile::Close() noexcept {
|
|
|
|
int ret = 0;
|
|
|
|
while (true) {
|
|
|
|
ret = close(fd_);
|
|
|
|
if (ret == -1 && errno == EINTR) {
|
|
|
|
// The call was interrupted, try again...
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
// All other possible errors are fatal errors and are handled in the CHECK
|
|
|
|
// below.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
CHECK(ret == 0) << "While trying to close " << path_
|
|
|
|
<< " an error occurred: " << strerror(errno) << " (" << errno
|
|
|
|
<< ").";
|
|
|
|
|
|
|
|
fd_ = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
OutputFile::~OutputFile() {
|
2019-02-14 17:34:09 +08:00
|
|
|
if (IsOpen()) Close();
|
2018-08-06 19:05:42 +08:00
|
|
|
}
|
|
|
|
|
2019-09-12 20:49:45 +08:00
|
|
|
OutputFile::OutputFile(OutputFile &&other) noexcept
|
2019-02-14 17:34:09 +08:00
|
|
|
: fd_(other.fd_),
|
|
|
|
written_since_last_sync_(other.written_since_last_sync_),
|
2019-09-12 20:49:45 +08:00
|
|
|
path_(std::move(other.path_)) {
|
2019-02-14 17:34:09 +08:00
|
|
|
other.fd_ = -1;
|
|
|
|
other.written_since_last_sync_ = 0;
|
|
|
|
}
|
2018-01-16 20:34:19 +08:00
|
|
|
|
2019-09-12 20:49:45 +08:00
|
|
|
OutputFile &OutputFile::operator=(OutputFile &&other) noexcept {
|
2019-02-14 17:34:09 +08:00
|
|
|
if (IsOpen()) Close();
|
2018-01-16 20:34:19 +08:00
|
|
|
|
2019-02-14 17:34:09 +08:00
|
|
|
fd_ = other.fd_;
|
|
|
|
written_since_last_sync_ = other.written_since_last_sync_;
|
2019-09-12 20:49:45 +08:00
|
|
|
path_ = std::move(other.path_);
|
2018-01-16 20:34:19 +08:00
|
|
|
|
2019-02-14 17:34:09 +08:00
|
|
|
other.fd_ = -1;
|
|
|
|
other.written_since_last_sync_ = 0;
|
2018-01-16 20:34:19 +08:00
|
|
|
|
2018-01-15 18:16:19 +08:00
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2019-09-12 20:49:45 +08:00
|
|
|
void OutputFile::Open(const std::filesystem::path &path, Mode mode) {
|
2019-02-14 17:34:09 +08:00
|
|
|
CHECK(!IsOpen())
|
|
|
|
<< "While trying to open " << path
|
|
|
|
<< " for writing the database used a handle that already has " << path_
|
|
|
|
<< " opened in it!";
|
|
|
|
|
|
|
|
path_ = path;
|
|
|
|
written_since_last_sync_ = 0;
|
|
|
|
|
2019-09-12 20:49:45 +08:00
|
|
|
int flags = O_WRONLY | O_CLOEXEC | O_CREAT;
|
|
|
|
if (mode == Mode::APPEND_TO_EXISTING) flags |= O_APPEND;
|
|
|
|
|
2019-02-14 17:34:09 +08:00
|
|
|
while (true) {
|
|
|
|
// The permissions are set to ((rw-r-----) & ~umask)
|
2019-09-12 20:49:45 +08:00
|
|
|
fd_ = open(path_.c_str(), flags, 0640);
|
2019-02-14 17:34:09 +08:00
|
|
|
if (fd_ == -1 && errno == EINTR) {
|
|
|
|
// The call was interrupted, try again...
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
// All other possible errors are fatal errors and are handled in the CHECK
|
|
|
|
// below.
|
|
|
|
break;
|
|
|
|
}
|
2018-01-15 18:16:19 +08:00
|
|
|
}
|
|
|
|
|
2019-02-14 17:34:09 +08:00
|
|
|
CHECK(fd_ != -1) << "While trying to open " << path_
|
|
|
|
<< " for writing an error occurred: " << strerror(errno)
|
|
|
|
<< " (" << errno << ").";
|
2018-01-15 18:16:19 +08:00
|
|
|
}
|
|
|
|
|
2019-09-12 20:49:45 +08:00
|
|
|
bool OutputFile::IsOpen() const { return fd_ != -1; }
|
2018-01-15 18:16:19 +08:00
|
|
|
|
2019-09-12 20:49:45 +08:00
|
|
|
const std::filesystem::path &OutputFile::path() const { return path_; }
|
2018-01-15 18:16:19 +08:00
|
|
|
|
2019-09-12 20:49:45 +08:00
|
|
|
void OutputFile::Write(const char *data, size_t size) {
|
2019-02-14 17:34:09 +08:00
|
|
|
while (size > 0) {
|
|
|
|
auto written = write(fd_, data, size);
|
|
|
|
if (written == -1 && errno == EINTR) {
|
|
|
|
continue;
|
|
|
|
}
|
2018-01-15 18:16:19 +08:00
|
|
|
|
2019-02-14 17:34:09 +08:00
|
|
|
CHECK(written > 0)
|
|
|
|
<< "While trying to write to " << path_
|
|
|
|
<< " an error occurred: " << strerror(errno) << " (" << errno
|
|
|
|
<< "). Possibly " << size
|
|
|
|
<< " bytes of data were lost from this call and possibly "
|
|
|
|
<< written_since_last_sync_ << " bytes were lost from previous calls.";
|
2018-01-15 18:16:19 +08:00
|
|
|
|
2019-02-14 17:34:09 +08:00
|
|
|
size -= written;
|
|
|
|
data += written;
|
|
|
|
written_since_last_sync_ += written;
|
2018-01-15 18:16:19 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-09-12 20:49:45 +08:00
|
|
|
void OutputFile::Write(const uint8_t *data, size_t size) {
|
2019-02-14 17:34:09 +08:00
|
|
|
Write(reinterpret_cast<const char *>(data), size);
|
|
|
|
}
|
2019-09-12 20:49:45 +08:00
|
|
|
void OutputFile::Write(const std::string_view &data) {
|
2019-02-14 17:34:09 +08:00
|
|
|
Write(data.data(), data.size());
|
2018-01-15 18:16:19 +08:00
|
|
|
}
|
|
|
|
|
2019-09-12 20:49:45 +08:00
|
|
|
size_t OutputFile::GetPosition() {
|
|
|
|
return SetPosition(Position::RELATIVE_TO_CURRENT, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t OutputFile::SetPosition(Position position, ssize_t offset) {
|
|
|
|
int whence;
|
|
|
|
switch (position) {
|
|
|
|
case Position::SET:
|
|
|
|
whence = SEEK_SET;
|
|
|
|
break;
|
|
|
|
case Position::RELATIVE_TO_CURRENT:
|
|
|
|
whence = SEEK_CUR;
|
|
|
|
break;
|
|
|
|
case Position::RELATIVE_TO_END:
|
|
|
|
whence = SEEK_END;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
while (true) {
|
|
|
|
auto pos = lseek(fd_, offset, whence);
|
|
|
|
if (pos == -1 && errno == EINTR) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
CHECK(pos >= 0) << "While trying to set the position in " << path_
|
|
|
|
<< " an error occurred: " << strerror(errno) << " ("
|
|
|
|
<< errno << ").";
|
|
|
|
return pos;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void OutputFile::Sync() {
|
2019-02-14 17:34:09 +08:00
|
|
|
int ret = 0;
|
|
|
|
while (true) {
|
|
|
|
ret = fsync(fd_);
|
|
|
|
if (ret == -1 && errno == EINTR) {
|
|
|
|
// The call was interrupted, try again...
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
// All other possible errors are fatal errors and are handled in the CHECK
|
|
|
|
// below.
|
|
|
|
break;
|
2018-01-15 18:16:19 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-14 17:34:09 +08:00
|
|
|
// In this check we are extremely rigorous because any error except EINTR is
|
|
|
|
// treated as a fatal error that will crash the database. The errors that will
|
|
|
|
// mainly occur are EIO which indicates an I/O error on the physical device
|
|
|
|
// and ENOSPC (documented only in new kernels) which indicates that the
|
|
|
|
// physical device doesn't have any space left. If we don't succeed in
|
|
|
|
// syncing pending data to the physical device there is no mechanism to
|
|
|
|
// determine which parts of the `write` calls weren't synced. That is why
|
|
|
|
// we call this a fatal error and we don't continue further.
|
|
|
|
//
|
|
|
|
// A good description of issues with `fsync` can be seen here:
|
|
|
|
// https://stackoverflow.com/questions/42434872/writing-programs-to-cope-with-i-o-errors-causing-lost-writes-on-linux
|
|
|
|
//
|
|
|
|
// A discussion between PostgreSQL developers of what to do when `fsync`
|
|
|
|
// fails can be seen here:
|
|
|
|
// https://www.postgresql.org/message-id/flat/CAMsr%2BYE5Gs9iPqw2mQ6OHt1aC5Qk5EuBFCyG%2BvzHun1EqMxyQg%40mail.gmail.com#CAMsr+YE5Gs9iPqw2mQ6OHt1aC5Qk5EuBFCyG+vzHun1EqMxyQg@mail.gmail.com
|
|
|
|
//
|
|
|
|
// A brief of the `fsync` semantics can be seen here (part of the mailing list
|
|
|
|
// discussion linked above):
|
|
|
|
// https://www.postgresql.org/message-id/20180402185320.GM11627%40technoir
|
|
|
|
//
|
|
|
|
// The PostgreSQL developers decided to do the same thing (die) when such an
|
|
|
|
// error occurs:
|
|
|
|
// https://www.postgresql.org/message-id/20180427222842.in2e4mibx45zdth5@alap3.anarazel.de
|
|
|
|
CHECK(ret == 0) << "While trying to sync " << path_
|
|
|
|
<< " an error occurred: " << strerror(errno) << " (" << errno
|
|
|
|
<< "). Possibly " << written_since_last_sync_
|
|
|
|
<< " bytes from previous write calls were lost.";
|
|
|
|
|
|
|
|
// Reset the counter.
|
|
|
|
written_since_last_sync_ = 0;
|
|
|
|
}
|
|
|
|
|
2019-09-12 20:49:45 +08:00
|
|
|
void OutputFile::Close() noexcept {
|
2019-02-14 17:34:09 +08:00
|
|
|
int ret = 0;
|
|
|
|
while (true) {
|
|
|
|
ret = close(fd_);
|
|
|
|
if (ret == -1 && errno == EINTR) {
|
|
|
|
// The call was interrupted, try again...
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
// All other possible errors are fatal errors and are handled in the CHECK
|
|
|
|
// below.
|
|
|
|
break;
|
|
|
|
}
|
2018-01-15 18:16:19 +08:00
|
|
|
}
|
|
|
|
|
2019-02-14 17:34:09 +08:00
|
|
|
CHECK(ret == 0) << "While trying to close " << path_
|
|
|
|
<< " an error occurred: " << strerror(errno) << " (" << errno
|
|
|
|
<< "). Possibly " << written_since_last_sync_
|
|
|
|
<< " bytes from previous write calls were lost.";
|
|
|
|
|
|
|
|
fd_ = -1;
|
|
|
|
written_since_last_sync_ = 0;
|
2018-01-15 18:16:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace utils
|