mirror of
https://github.com/nestriness/cdc-file-transfer.git
synced 2026-01-30 14:45:37 +02:00
The tools allow efficient and fast synchronization of large directory trees from a Windows workstation to a Linux target machine. cdc_rsync* support efficient copy of files by using content-defined chunking (CDC) to identify chunks within files that can be reused. asset_stream_manager + cdc_fuse_fs support efficient streaming of a local directory to a remote virtual file system based on FUSE. It also employs CDC to identify and reuse unchanged data chunks.
249 lines
8.5 KiB
C++
249 lines
8.5 KiB
C++
// Copyright 2022 Google LLC
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "cdc_rsync/file_finder_and_sender.h"
|
|
|
|
#include "absl/strings/match.h"
|
|
#include "absl/strings/str_format.h"
|
|
#include "cdc_rsync/base/message_pump.h"
|
|
#include "common/log.h"
|
|
#include "common/path.h"
|
|
#include "common/path_filter.h"
|
|
#include "common/status.h"
|
|
|
|
namespace cdc_ft {
|
|
namespace {
|
|
|
|
bool EndsWithSpecialDir(const std::string& source) {
|
|
return source == "." || source == ".." || absl::EndsWith(source, "\\.") ||
|
|
absl::EndsWith(source, "\\..");
|
|
}
|
|
|
|
// Returns C:\ from C:\path\to\file or an empty string if there is no drive.
|
|
std::string GetDrivePrefixWithBackslash(const std::string& source) {
|
|
std::string prefix = path::GetDrivePrefix(source);
|
|
if (source[prefix.size()] == '\\') {
|
|
prefix += "\\";
|
|
}
|
|
return prefix;
|
|
}
|
|
|
|
// Basically returns |sources_dir| + |source|, but removes drive letters from
|
|
// |source| if present and |sources_dir| is not empty.
|
|
std::string GetFullSource(const std::string& source,
|
|
const std::string& sources_dir) {
|
|
if (sources_dir.empty()) {
|
|
return source;
|
|
}
|
|
|
|
// Combine |sources_dir_| and |source|, but remove the drive prefix, so
|
|
// that we don't get stuff like "source_dir\C:\path\to\file".
|
|
return path::Join(sources_dir,
|
|
source.substr(GetDrivePrefixWithBackslash(source).size()));
|
|
}
|
|
|
|
std::string GetBaseDir(const std::string& source,
|
|
const std::string& sources_dir, bool relative) {
|
|
if (!relative) {
|
|
// For non-relative mode, the base dir is the directory part, so that
|
|
// path\to\file is copied to remote_dir/file and files in path\to\ are
|
|
// copied to remote_dir.
|
|
if (path::EndsWithPathSeparator(source)) return source;
|
|
std::string dir = path::DirName(source);
|
|
if (!dir.empty()) path::EnsureEndsWithPathSeparator(&dir);
|
|
return dir;
|
|
}
|
|
|
|
// A "\.\" is a marker for where the relative path should start.
|
|
// The base dir is the part up to that marker, so that
|
|
// path\.\to\file is copied to remote_dir/to/file.
|
|
size_t pos = source.find("\\.\\");
|
|
if (pos != std::string::npos) {
|
|
return source.substr(0, pos + 3);
|
|
}
|
|
|
|
// If there is a sources dir, the base dir is the sources dir, so that
|
|
// sources_dir\path\to\file is copied to remote_dir/path/to/file.
|
|
if (!sources_dir.empty()) {
|
|
assert(source.find(sources_dir) == 0);
|
|
return sources_dir;
|
|
}
|
|
|
|
// If there is a drive prefix, the base dir is that part, so that
|
|
// C:\path\to\file is copied to remote_dir/path/to/file.
|
|
return GetDrivePrefixWithBackslash(source);
|
|
}
|
|
|
|
} // namespace
|
|
|
|
FileFinderAndSender::FileFinderAndSender(PathFilter* path_filter,
|
|
MessagePump* message_pump,
|
|
ReportFindFilesProgress* progress,
|
|
std::string sources_dir,
|
|
bool recursive, bool relative,
|
|
size_t request_byte_threshold)
|
|
: path_filter_(path_filter),
|
|
message_pump_(message_pump),
|
|
progress_(progress),
|
|
sources_dir_(std::move(sources_dir)),
|
|
recursive_(recursive),
|
|
relative_(relative),
|
|
request_size_threshold_(request_byte_threshold) {
|
|
// (internal): Support / instead of \ in the source folder.
|
|
path::FixPathSeparators(&sources_dir_);
|
|
}
|
|
|
|
FileFinderAndSender::~FileFinderAndSender() = default;
|
|
|
|
absl::Status FileFinderAndSender::FindAndSendFiles(std::string source) {
|
|
// (internal): Support / instead of \ in sources.
|
|
path::FixPathSeparators(&source);
|
|
// Special case, "." and ".." should not specify the directory, but the files
|
|
// inside this directory!
|
|
if (EndsWithSpecialDir(source)) {
|
|
path::EnsureEndsWithPathSeparator(&source);
|
|
}
|
|
|
|
// Combine |source| and |sources_dir_| if present.
|
|
std::string full_source = GetFullSource(source, sources_dir_);
|
|
|
|
// Get the part of the path to remove before sending it to the server.
|
|
base_dir_ = GetBaseDir(full_source, sources_dir_, relative_);
|
|
|
|
size_t prev_size = files_.size() + dirs_.size();
|
|
|
|
auto handler = [this](std::string dir, std::string filename,
|
|
int64_t modified_time, uint64_t size,
|
|
bool is_directory) {
|
|
return HandleFoundFileOrDir(std::move(dir), std::move(filename),
|
|
modified_time, size, is_directory);
|
|
};
|
|
|
|
absl::Status status = path::SearchFiles(full_source, recursive_, handler);
|
|
if (!status.ok()) {
|
|
return WrapStatus(status,
|
|
"Failed to gather source files and directories for '%s'",
|
|
full_source);
|
|
}
|
|
|
|
if (files_.size() + dirs_.size() == prev_size) {
|
|
LOG_WARNING("Neither files nor directories found that match source '%s'",
|
|
full_source.c_str());
|
|
// This isn't fatal.
|
|
}
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status FileFinderAndSender::Flush() {
|
|
// Flush remaining files.
|
|
absl::Status status = SendFilesAndDirs();
|
|
if (!status.ok()) {
|
|
return WrapStatus(status, "SendFilesAndDirs() failed");
|
|
}
|
|
|
|
// Send an empty batch as EOF indicator.
|
|
assert(request_.files_size() == 0);
|
|
status = message_pump_->SendMessage(PacketType::kAddFiles, request_);
|
|
if (!status.ok()) {
|
|
return WrapStatus(status, "Failed to send EOF indicator");
|
|
}
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
void FileFinderAndSender::ReleaseFiles(std::vector<ClientFileInfo>* files) {
|
|
*files = std::move(files_);
|
|
}
|
|
|
|
void FileFinderAndSender::ReleaseDirs(std::vector<ClientDirInfo>* dirs) {
|
|
*dirs = std::move(dirs_);
|
|
}
|
|
|
|
absl::Status FileFinderAndSender::HandleFoundFileOrDir(std::string dir,
|
|
std::string filename,
|
|
int64_t modified_time,
|
|
uint64_t size,
|
|
bool is_directory) {
|
|
std::string relative_dir = dir.substr(base_dir_.size());
|
|
|
|
// Is the path excluded? Check IsEmpty() first to save the path::Join()
|
|
// if no filter is used (pretty common case).
|
|
if (!path_filter_->IsEmpty() &&
|
|
!path_filter_->IsMatch(path::Join(relative_dir, filename))) {
|
|
return absl::OkStatus();
|
|
}
|
|
if (is_directory) {
|
|
progress_->ReportDirFound();
|
|
} else {
|
|
progress_->ReportFileFound();
|
|
}
|
|
|
|
if (request_.directory() != relative_dir) {
|
|
// Flush files in previous directory.
|
|
absl::Status status = SendFilesAndDirs();
|
|
if (!status.ok()) {
|
|
return WrapStatus(status, "SendFilesAndDirs() failed");
|
|
}
|
|
|
|
// Set new directory.
|
|
request_.set_directory(relative_dir);
|
|
request_size_ = request_.directory().length();
|
|
}
|
|
|
|
if (is_directory) {
|
|
dirs_.emplace_back(path::Join(dir, filename),
|
|
static_cast<uint32_t>(base_dir_.size()));
|
|
request_.add_dirs(filename);
|
|
request_size_ += filename.size();
|
|
} else {
|
|
files_.emplace_back(path::Join(dir, filename), size,
|
|
static_cast<uint32_t>(base_dir_.size()));
|
|
|
|
AddFilesRequest::File* file = request_.add_files();
|
|
file->set_filename(filename);
|
|
file->set_modified_time(modified_time);
|
|
file->set_size(size);
|
|
// The serialized proto might have a slightly different length due to
|
|
// packing, but this doesn't need to be exact.
|
|
request_size_ += filename.size() + sizeof(modified_time) + sizeof(size);
|
|
}
|
|
if (request_size_ >= request_size_threshold_) {
|
|
absl::Status status = SendFilesAndDirs();
|
|
if (!status.ok()) {
|
|
return WrapStatus(status, "SendFilesAndDirs() failed");
|
|
}
|
|
}
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status FileFinderAndSender::SendFilesAndDirs() {
|
|
if (request_.files_size() == 0 && request_.dirs_size() == 0) {
|
|
return absl::OkStatus();
|
|
}
|
|
absl::Status status =
|
|
message_pump_->SendMessage(PacketType::kAddFiles, request_);
|
|
if (!status.ok()) {
|
|
return WrapStatus(status, "Failed to send AddFilesRequest");
|
|
}
|
|
|
|
request_.clear_files();
|
|
request_.clear_dirs();
|
|
request_size_ = request_.directory().length();
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
} // namespace cdc_ft
|