Releasing the former Stadia file transfer tools

The tools allow efficient and fast synchronization of large directory
trees from a Windows workstation to a Linux target machine.

cdc_rsync* support efficient copy of files by using content-defined
chunking (CDC) to identify chunks within files that can be reused.

asset_stream_manager + cdc_fuse_fs support efficient streaming of a
local directory to a remote virtual file system based on FUSE. It also
employs CDC to identify and reuse unchanged data chunks.
This commit is contained in:
Christian Schneider
2022-10-07 10:47:04 +02:00
commit 4326e972ac
364 changed files with 49410 additions and 0 deletions

92
cdc_rsync/base/BUILD Normal file
View File

@@ -0,0 +1,92 @@
package(default_visibility = [
"//:__subpackages__",
])
cc_library(
name = "cdc_interface",
srcs = ["cdc_interface.cc"],
hdrs = ["cdc_interface.h"],
deps = [
":message_pump",
"//cdc_rsync/protos:messages_cc_proto",
"//common:buffer",
"//common:log",
"//common:path",
"//common:status",
"//common:threadpool",
"//fastcdc",
"@com_github_blake3//:blake3",
"@com_google_absl//absl/strings:str_format",
],
)
cc_test(
name = "cdc_interface_test",
srcs = ["cdc_interface_test.cc"],
data = ["testdata/root.txt"] + glob(["testdata/cdc_interface/**"]),
deps = [
":cdc_interface",
":fake_socket",
"//common:status_test_macros",
"//common:test_main",
"@com_google_googletest//:gtest",
],
)
cc_library(
name = "fake_socket",
srcs = ["fake_socket.cc"],
hdrs = ["fake_socket.h"],
deps = [
"//cdc_rsync/base:socket",
"@com_google_absl//absl/status",
],
)
cc_library(
name = "message_pump",
srcs = ["message_pump.cc"],
hdrs = ["message_pump.h"],
deps = [
":socket",
"//common:buffer",
"//common:log",
"//common:status",
"@com_google_absl//absl/status",
"@com_google_absl//absl/strings:str_format",
"@com_google_protobuf//:protobuf_lite",
],
)
cc_test(
name = "message_pump_test",
srcs = ["message_pump_test.cc"],
deps = [
":fake_socket",
":message_pump",
"//cdc_rsync/protos:messages_cc_proto",
"//common:status_test_macros",
"//common:test_main",
"@com_google_googletest//:gtest",
],
)
cc_library(
name = "server_exit_code",
hdrs = ["server_exit_code.h"],
)
cc_library(
name = "socket",
hdrs = ["socket.h"],
)
filegroup(
name = "all_test_sources",
srcs = glob(["*_test.cc"]),
)
filegroup(
name = "all_test_data",
srcs = glob(["testdata/**"]),
)

View File

@@ -0,0 +1,670 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cdc_rsync/base/cdc_interface.h"
#include <vector>
#include "absl/strings/str_format.h"
#include "blake3.h"
#include "cdc_rsync/base/message_pump.h"
#include "cdc_rsync/protos/messages.pb.h"
#include "common/buffer.h"
#include "common/path.h"
#include "common/status.h"
#include "common/util.h"
#include "fastcdc/fastcdc.h"
#if PLATFORM_LINUX
#include <fcntl.h>
#endif
namespace cdc_ft {
namespace {
// The average chunk size should be as low as possible, but not too low.
// Lower sizes mean better delta-encoding and hence less data uploads.
// However, chunking becomes slower for lower sizes. At 8 KB, a gamelet can
// still process close to 700 MB/sec, which matches hard drive speed.
// Signature data rate is another factor. The gamelet generates signature data
// at a rate of 700 MB/sec / kAvgChunkSize * sizeof(Chunk) = 1.7 MB/sec for 8 KB
// chunks. That means, the client needs at least 16 MBit download bandwidth to
// stream signatures or else this part becomes slower. 4 KB chunks would require
// a 32 MBit connection.
constexpr size_t kAvgChunkSize = 8 * 1024;
constexpr size_t kMinChunkSize = kAvgChunkSize / 2;
constexpr size_t kMaxChunkSize = kAvgChunkSize * 4;
// This number was found by experimentally optimizing chunking throughput.
constexpr size_t kFileIoBufferSize = kMaxChunkSize * 4;
// Limits the size of contiguous patch chunks where data is copied from the
// basis file. Necessary since the server copies chunks in one go and doesn't
// split them up (would be possible, but unnecessarily complicates code).
constexpr size_t kCombinedChunkSizeThreshold = 64 * 1024;
// Number of hashing tasks in flight at a given point of time.
constexpr size_t kMaxNumHashTasks = 64;
#pragma pack(push, 1)
// 16 byte hashes guarantee a sufficiently low chance of hash collisions. For
// 8 byte the chance of a hash collision is actually quite high for large files
// 0.0004% for a 100 GB file and 8 KB chunks.
struct Hash {
uint64_t low;
uint64_t high;
bool operator==(const Hash& other) const {
return low == other.low && high == other.high;
}
bool operator!=(const Hash& other) const { return !(*this == other); }
};
#pragma pack(pop)
static_assert(sizeof(Hash) <= BLAKE3_OUT_LEN, "");
} // namespace
} // namespace cdc_ft
namespace std {
template <>
struct hash<cdc_ft::Hash> {
size_t operator()(const cdc_ft::Hash& hash) const { return hash.low; }
};
} // namespace std
namespace cdc_ft {
namespace {
// Send a batch of signatures every 8 MB of processed data (~90 packets per
// second at 700 MB/sec processing rate). The size of each signature batch is
// kMinNumChunksPerBatch * sizeof(Chunk), e.g. 20 KB for an avg chunk size of
// 8 KB.
constexpr int kMinSigBatchDataSize = 8 * 1024 * 1024;
constexpr int kMinNumChunksPerBatch = kMinSigBatchDataSize / kAvgChunkSize;
// Send patch commands in batches of at least that size for efficiency.
constexpr int kPatchRequestSizeThreshold = 65536;
// 16 bytes hash, 4 bytes size = 20 bytes.
struct Chunk {
Hash hash;
uint32_t size = 0;
Chunk(const Hash& hash, uint32_t size) : hash(hash), size(size) {}
};
Hash ComputeHash(const void* data, size_t size) {
assert(data);
Hash hash;
blake3_hasher hasher;
blake3_hasher_init(&hasher);
blake3_hasher_update(&hasher, data, size);
blake3_hasher_finalize(&hasher, reinterpret_cast<uint8_t*>(&hash),
sizeof(hash));
return hash;
}
// Task that computes hashes for a single chunk and adds the result to
// AddSignaturesResponse.
class HashTask : public Task {
public:
HashTask() {}
~HashTask() {}
HashTask(const HashTask& other) = delete;
HashTask& operator=(HashTask&) = delete;
// Sets the data to compute the hash of.
// Should be called before queuing the task.
void SetData(const void* data, size_t size) {
buffer_.reserve(size);
buffer_.resize(size);
memcpy(buffer_.data(), data, size);
}
// Appends the computed hash to |response|.
// Should be called once the task is finished.
void AppendHash(AddSignaturesResponse* response) const {
response->add_sizes(static_cast<uint32_t>(buffer_.size()));
std::string* hashes = response->mutable_hashes();
hashes->append(reinterpret_cast<const char*>(&hash_), sizeof(hash_));
}
void ThreadRun(IsCancelledPredicate is_cancelled) override {
hash_ = ComputeHash(buffer_.data(), buffer_.size());
}
private:
Buffer buffer_;
struct Hash hash_ = {0};
};
class ServerChunkReceiver {
public:
explicit ServerChunkReceiver(MessagePump* message_pump)
: message_pump_(message_pump) {
assert(message_pump_);
}
// Receives server signature packets and places the data into a map
// (chunk hash) -> (server-side file offset).
// If |block| is false, returns immediately if no data is available.
// If |block| is true, blocks until some data is available.
// |num_server_bytes_processed| is set to the total size of the chunks
// received.
absl::Status Receive(bool block, uint64_t* num_server_bytes_processed) {
assert(num_server_bytes_processed);
*num_server_bytes_processed = 0;
// Already all server chunks received?
if (all_chunks_received_) {
return absl::OkStatus();
}
// If no data is available, early out (unless blocking is requested).
if (!block && !message_pump_->CanReceive()) {
return absl::OkStatus();
}
// Receive signatures.
AddSignaturesResponse response;
absl::Status status =
message_pump_->ReceiveMessage(PacketType::kAddSignatures, &response);
if (!status.ok()) {
return WrapStatus(status, "Failed to receive AddSignaturesResponse");
}
// Validate size of packed hashes, just in case.
const int num_chunks = response.sizes_size();
if (response.hashes().size() != num_chunks * sizeof(Hash)) {
return MakeStatus("Bad hashes size. Expected %u. Actual %u.",
num_chunks * sizeof(Hash), response.hashes().size());
}
// An empty packet marks the end of the server chunks.
if (num_chunks == 0) {
all_chunks_received_ = true;
return absl::OkStatus();
}
// Copy the data over to |server_chunk_offsets|.
const Hash* hashes =
reinterpret_cast<const Hash*>(response.hashes().data());
for (int n = 0; n < num_chunks; ++n) {
uint32_t size = response.sizes(n);
chunk_offsets_.insert({hashes[n], curr_offset_});
curr_offset_ += size;
*num_server_bytes_processed += size;
}
return absl::OkStatus();
}
// True if all server chunks have been received.
bool AllChunksReceived() const { return all_chunks_received_; }
// Returns a map (server chunk hash) -> (offset of that chunk in server file).
const std::unordered_map<Hash, uint64_t>& ChunkOffsets() const {
return chunk_offsets_;
}
private:
MessagePump* message_pump_;
// Maps server chunk hashes to the file offset in the server file.
std::unordered_map<Hash, uint64_t> chunk_offsets_;
// Current server file offset.
uint64_t curr_offset_ = 0;
// Whether all server files have been received.
bool all_chunks_received_ = false;
};
class PatchSender {
// 1 byte for source, 8 bytes for offset and 4 bytes for size.
static constexpr size_t kPatchMetadataSize =
sizeof(uint8_t) + sizeof(uint64_t) + sizeof(uint32_t);
public:
PatchSender(FILE* file, MessagePump* message_pump)
: file_(file), message_pump_(message_pump) {}
// Tries to send patch data for the next chunk in |client_chunks|. The class
// keeps an internal counter for the current chunk index. Patch data is not
// sent if the current client chunk is not found among the server chunks and
// there are outstanding server chunks. In that case, the method returns
// with an OK status and should be called later as soon as additional server
// chunks have been received.
// |num_client_bytes_processed| is set to the total size of the chunks added.
absl::Status TryAddChunks(const std::vector<Chunk>& client_chunks,
const ServerChunkReceiver& server_chunk_receiver,
uint64_t* num_client_bytes_processed) {
assert(num_client_bytes_processed);
*num_client_bytes_processed = 0;
while (curr_chunk_idx_ < client_chunks.size()) {
const Chunk& chunk = client_chunks[curr_chunk_idx_];
auto it = server_chunk_receiver.ChunkOffsets().find(chunk.hash);
bool exists = it != server_chunk_receiver.ChunkOffsets().end();
// If there are outstanding server chunks and the client hash is not
// found, do not send the patch data yet. A future server chunk might
// contain the data.
if (!exists && !server_chunk_receiver.AllChunksReceived()) {
return absl::OkStatus();
}
absl::Status status = exists ? AddExistingChunk(it->second, chunk.size)
: AddNewChunk(chunk.size);
if (!status.ok()) {
return WrapStatus(status, "Failed to add chunk");
}
++curr_chunk_idx_;
*num_client_bytes_processed += chunk.size;
// Break loop if all server chunks are received. Otherwise, progress
// reporting is blocked.
if (server_chunk_receiver.AllChunksReceived()) {
break;
}
}
return absl::OkStatus();
}
// Sends the remaining patch commands and an EOF marker.
absl::Status Flush() {
if (request_size_ > 0) {
absl::Status status =
message_pump_->SendMessage(PacketType::kAddPatchCommands, request_);
if (!status.ok()) {
return WrapStatus(status, "Failed to send final patch commands");
}
total_request_size_ += request_size_;
request_.Clear();
}
// Send an empty patch commands request as EOF marker.
absl::Status status =
message_pump_->SendMessage(PacketType::kAddPatchCommands, request_);
if (!status.ok()) {
return WrapStatus(status, "Failed to send patch commands EOF marker");
}
return absl::OkStatus();
}
// Returns the (estimated) total size of all patch data sent.
uint64_t GetTotalRequestSize() const { return total_request_size_; }
// Index of the next client chunk.
size_t CurrChunkIdx() const { return curr_chunk_idx_; }
private:
// Adds patch data for a client chunk that has a matching server chunk of
// given |size| at given |offset| in the server file.
absl::Status AddExistingChunk(uint64_t offset, uint32_t size) {
int last_idx = request_.sources_size() - 1;
if (last_idx >= 0 &&
request_.sources(last_idx) ==
AddPatchCommandsRequest::SOURCE_BASIS_FILE &&
request_.offsets(last_idx) + request_.sizes(last_idx) == offset &&
request_.sizes(last_idx) < kCombinedChunkSizeThreshold) {
// Same source and contiguous data -> Append to last entry.
request_.set_sizes(last_idx, request_.sizes(last_idx) + size);
} else {
// Different source or first chunk -> Create new entry.
request_.add_sources(AddPatchCommandsRequest::SOURCE_BASIS_FILE);
request_.add_offsets(offset);
request_.add_sizes(size);
request_size_ += kPatchMetadataSize;
}
return OnChunkAdded(size);
}
absl::Status AddNewChunk(uint32_t size) {
std::string* data = request_.mutable_data();
int last_idx = request_.sources_size() - 1;
if (last_idx >= 0 &&
request_.sources(last_idx) == AddPatchCommandsRequest::SOURCE_DATA) {
// Same source -> Append to last entry.
request_.set_sizes(last_idx, request_.sizes(last_idx) + size);
} else {
// Different source or first chunk -> Create new entry.
request_.add_sources(AddPatchCommandsRequest::SOURCE_DATA);
request_.add_offsets(data->size());
request_.add_sizes(size);
request_size_ += kPatchMetadataSize;
}
// Read data from client file into |data|. Be sure to restore the previous
// file offset as the chunker might still be processing the file.
size_t prev_size = data->size();
data->resize(prev_size + size);
int64_t prev_offset = ftell64(file_);
if (fseek64(file_, file_offset_, SEEK_SET) != 0 ||
fread(&(*data)[prev_size], 1, size, file_) != size ||
fseek64(file_, prev_offset, SEEK_SET) != 0) {
return MakeStatus("Failed to read %u bytes at offset %u", size,
file_offset_);
}
request_size_ += size;
return OnChunkAdded(size);
}
absl::Status OnChunkAdded(uint32_t size) {
file_offset_ += size;
// Send patch commands if there's enough data.
if (request_size_ > kPatchRequestSizeThreshold) {
absl::Status status =
message_pump_->SendMessage(PacketType::kAddPatchCommands, request_);
if (!status.ok()) {
return WrapStatus(status, "Failed to send patch commands");
}
total_request_size_ += request_size_;
request_size_ = 0;
request_.Clear();
}
return absl::OkStatus();
}
FILE* file_;
MessagePump* message_pump_;
AddPatchCommandsRequest request_;
size_t request_size_ = 0;
size_t total_request_size_ = 0;
uint64_t file_offset_ = 0;
size_t curr_chunk_idx_ = 0;
};
} // namespace
CdcInterface::CdcInterface(MessagePump* message_pump)
: message_pump_(message_pump) {}
absl::Status CdcInterface::CreateAndSendSignature(const std::string& filepath) {
absl::StatusOr<FILE*> file = path::OpenFile(filepath, "rb");
if (!file.ok()) {
return file.status();
}
#if PLATFORM_LINUX
// Tell the kernel we'll load the file sequentially (improves IO bandwidth).
posix_fadvise(fileno(*file), 0, 0, POSIX_FADV_SEQUENTIAL);
#endif
// Use a background thread for computing hashes on the server.
// Allocate lazily since it is not needed on the client.
// MUST NOT use more than 1 worker thread since the order of finished tasks
// would then not necessarily match the pushing order. However, the order is
// important for computing offsets.
if (!hash_pool_) hash_pool_ = std::make_unique<Threadpool>(1);
// |chunk_handler| is called for each CDC chunk. It pushes a hash task to the
// pool. Tasks are "recycled" from |free_tasks_|, so that buffers don't have
// to reallocated constantly.
size_t num_hash_tasks = 0;
auto chunk_handler = [pool = hash_pool_.get(), &num_hash_tasks,
free_tasks = &free_tasks_](const void* data,
size_t size) {
++num_hash_tasks;
if (free_tasks->empty()) {
free_tasks->push_back(std::make_unique<HashTask>());
}
std::unique_ptr<Task> task = std::move(free_tasks->back());
free_tasks->pop_back();
static_cast<HashTask*>(task.get())->SetData(data, size);
pool->QueueTask(std::move(task));
};
fastcdc::Config config(kMinChunkSize, kAvgChunkSize, kMaxChunkSize);
fastcdc::Chunker chunker(config, chunk_handler);
AddSignaturesResponse response;
auto read_handler = [&chunker, &response, pool = hash_pool_.get(),
&num_hash_tasks, free_tasks = &free_tasks_,
message_pump = message_pump_](const void* data,
size_t size) {
chunker.Process(static_cast<const uint8_t*>(data), size);
// Finish hashing tasks. Block if there are too many of them in flight.
for (;;) {
std::unique_ptr<Task> task = num_hash_tasks >= kMaxNumHashTasks
? pool->GetCompletedTask()
: pool->TryGetCompletedTask();
if (!task) break;
num_hash_tasks--;
static_cast<HashTask*>(task.get())->AppendHash(&response);
free_tasks->push_back(std::move(task));
}
// Send data if we have enough chunks.
if (response.sizes_size() >= kMinNumChunksPerBatch) {
absl::Status status =
message_pump->SendMessage(PacketType::kAddSignatures, response);
if (!status.ok()) {
return WrapStatus(status, "Failed to send signatures");
}
response.Clear();
}
return absl::OkStatus();
};
absl::Status status =
path::StreamReadFileContents(*file, kFileIoBufferSize, read_handler);
fclose(*file);
if (!status.ok()) {
return WrapStatus(status, "Failed to compute signatures");
}
chunker.Finalize();
// Finish hashing tasks.
hash_pool_->Wait();
std::unique_ptr<Task> task = hash_pool_->TryGetCompletedTask();
while (task) {
static_cast<HashTask*>(task.get())->AppendHash(&response);
free_tasks_.push_back(std::move(task));
task = hash_pool_->TryGetCompletedTask();
}
// Send the remaining chunks, if any.
if (response.sizes_size() > 0) {
status = message_pump_->SendMessage(PacketType::kAddSignatures, response);
if (!status.ok()) {
return WrapStatus(status, "Failed to send final signatures");
}
response.Clear();
}
// Send an empty response as EOF marker.
status = message_pump_->SendMessage(PacketType::kAddSignatures, response);
if (!status.ok()) {
return WrapStatus(status, "Failed to send signatures EOF marker");
}
return absl::OkStatus();
}
absl::Status CdcInterface::ReceiveSignatureAndCreateAndSendDiff(
FILE* file, ReportCdcProgress* progress) {
//
// Compute signatures from client |file| and send patches while receiving
// server signatures.
//
std::vector<Chunk> client_chunks;
ServerChunkReceiver server_chunk_receiver(message_pump_);
PatchSender patch_sender(file, message_pump_);
auto chunk_handler = [&client_chunks](const void* data, size_t size) {
client_chunks.emplace_back(ComputeHash(data, size),
static_cast<uint32_t>(size));
};
fastcdc::Config config(kMinChunkSize, kAvgChunkSize, kMaxChunkSize);
fastcdc::Chunker chunker(config, chunk_handler);
uint64_t file_size = 0;
auto read_handler = [&chunker, &client_chunks, &server_chunk_receiver,
&file_size, progress,
&patch_sender](const void* data, size_t size) {
// Process client chunks for the data read.
chunker.Process(static_cast<const uint8_t*>(data), size);
file_size += size;
const bool all_client_chunks_read = data == nullptr;
if (all_client_chunks_read) {
chunker.Finalize();
}
do {
// Receive any server chunks available.
uint64_t num_server_bytes_processed = 0;
absl::Status status = server_chunk_receiver.Receive(
/*block=*/all_client_chunks_read, &num_server_bytes_processed);
if (!status.ok()) {
return WrapStatus(status, "Failed to receive server chunks");
}
// Try to send patch data.
uint64_t num_client_bytes_processed = 0;
status = patch_sender.TryAddChunks(client_chunks, server_chunk_receiver,
&num_client_bytes_processed);
if (!status.ok()) {
return WrapStatus(status, "Failed to send patch data");
}
progress->ReportSyncProgress(num_client_bytes_processed,
num_server_bytes_processed);
} while (all_client_chunks_read &&
(!server_chunk_receiver.AllChunksReceived() ||
patch_sender.CurrChunkIdx() < client_chunks.size()));
return absl::OkStatus();
};
absl::Status status =
path::StreamReadFileContents(file, kFileIoBufferSize, read_handler);
if (!status.ok()) {
return WrapStatus(status, "Failed to stream file");
}
// Should have sent all client chunks by now.
assert(patch_sender.CurrChunkIdx() == client_chunks.size());
// Flush remaining patches.
status = patch_sender.Flush();
if (!status.ok()) {
return WrapStatus(status, "Failed to flush patches");
}
return absl::OkStatus();
}
absl::Status CdcInterface::ReceiveDiffAndPatch(
const std::string& basis_filepath, FILE* patched_file,
bool* is_executable) {
Buffer buffer;
*is_executable = false;
absl::StatusOr<FILE*> basis_file = path::OpenFile(basis_filepath, "rb");
if (!basis_file.ok()) {
return basis_file.status();
}
#if PLATFORM_LINUX
// Tell the kernel we'll load the file sequentially (improves IO bandwidth).
// It is not strictly true that the basis file is accessed sequentially, but
// for larger parts of this file this should be the case.
posix_fadvise(fileno(*basis_file), 0, 0, POSIX_FADV_SEQUENTIAL);
#endif
bool first_chunk = true;
for (;;) {
AddPatchCommandsRequest request;
absl::Status status =
message_pump_->ReceiveMessage(PacketType::kAddPatchCommands, &request);
if (!status.ok()) {
fclose(*basis_file);
return WrapStatus(status, "Failed to receive AddPatchCommandsRequest");
}
// All arrays must be of the same size.
int num_chunks = request.sources_size();
if (num_chunks != request.offsets_size() ||
num_chunks != request.sizes_size()) {
fclose(*basis_file);
return MakeStatus(
"Corrupted patch command arrays: Expected sizes %i. Actual %i/%i.",
num_chunks, request.offsets_size(), request.sizes_size());
}
if (num_chunks == 0) {
// A zero-size request marks the end of patch commands.
break;
}
for (int n = 0; n < num_chunks; ++n) {
AddPatchCommandsRequest::Source source = request.sources(n);
uint64_t chunk_offset = request.offsets(n);
uint32_t chunk_size = request.sizes(n);
const char* chunk_data = nullptr;
if (source == AddPatchCommandsRequest::SOURCE_BASIS_FILE) {
// Copy [chunk_offset, chunk_offset + chunk_size) from |basis_file|.
buffer.resize(chunk_size);
if (fseek64(*basis_file, chunk_offset, SEEK_SET) != 0 ||
fread(buffer.data(), 1, chunk_size, *basis_file) != chunk_size) {
fclose(*basis_file);
return MakeStatus(
"Failed to read %u bytes at offset %u from basis file",
chunk_size, chunk_offset);
}
chunk_data = buffer.data();
} else {
// Write [chunk_offset, chunk_offset + chunk_size) from request data.
assert(source == AddPatchCommandsRequest::SOURCE_DATA);
if (request.data().size() < chunk_offset + chunk_size) {
fclose(*basis_file);
return MakeStatus(
"Insufficient data in patch commands. Required %u. Actual %u.",
chunk_offset + chunk_size, request.data().size());
}
chunk_data = &request.data()[chunk_offset];
}
if (first_chunk && chunk_size > 0) {
first_chunk = false;
*is_executable = Util::IsExecutable(chunk_data, chunk_size);
}
if (fwrite(chunk_data, 1, chunk_size, patched_file) != chunk_size) {
fclose(*basis_file);
return MakeStatus("Failed to write %u bytes to patched file",
chunk_size);
}
}
}
fclose(*basis_file);
return absl::OkStatus();
}
} // namespace cdc_ft

View File

@@ -0,0 +1,73 @@
/*
* Copyright 2022 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef CDC_RSYNC_BASE_CDC_INTERFACE_H_
#define CDC_RSYNC_BASE_CDC_INTERFACE_H_
#include <string>
#include "absl/status/status.h"
#include "common/threadpool.h"
namespace cdc_ft {
class MessagePump;
class ReportCdcProgress {
public:
virtual ~ReportCdcProgress() = default;
virtual void ReportSyncProgress(size_t num_client_bytes_processed,
size_t num_server_bytes_processed) = 0;
};
// Creates signatures, diffs and patches files. Abstraction layer for fastcdc
// chunking and blake3 hashing.
class CdcInterface {
public:
explicit CdcInterface(MessagePump* message_pump);
// Creates the signature of the file at |filepath| and sends it to the socket.
// Typically called on the server.
absl::Status CreateAndSendSignature(const std::string& filepath);
// Receives the server-side signature of |file| from the socket, creates diff
// data using the signature and the file, and sends the diffs to the socket.
// Typically called on the client.
absl::Status ReceiveSignatureAndCreateAndSendDiff(
FILE* file, ReportCdcProgress* progress);
// Receives diffs from the socket and patches the file at |basis_filepath|.
// The patched data is written to |patched_file|, which must be open in "wb"
// mode. Sets |is_executable| to true if the patched file is an executable
// (based on magic headers).
// Typically called on the server.
absl::Status ReceiveDiffAndPatch(const std::string& basis_filepath,
FILE* patched_file, bool* is_executable);
private:
MessagePump* const message_pump_;
// Thread pool for computing chunk hashes.
std::unique_ptr<Threadpool> hash_pool_;
// List of unused hash computation tasks. Tasks are reused by the hash pool
// in order to prevent buffer reallocation.
std::vector<std::unique_ptr<Task>> free_tasks_;
};
} // namespace cdc_ft
#endif // CDC_RSYNC_BASE_CDC_INTERFACE_H_

View File

@@ -0,0 +1,118 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cdc_rsync/base/cdc_interface.h"
#include <cstdio>
#include <fstream>
#include "cdc_rsync/base/fake_socket.h"
#include "cdc_rsync/base/message_pump.h"
#include "common/log.h"
#include "common/path.h"
#include "common/status_test_macros.h"
#include "common/test_main.h"
#include "gtest/gtest.h"
namespace cdc_ft {
namespace {
class FakeCdcProgress : public ReportCdcProgress {
public:
void ReportSyncProgress(uint64_t num_client_bytes_processed,
uint64_t num_server_bytes_processed) override {
total_client_bytes_processed += num_client_bytes_processed;
total_server_bytes_processed += num_server_bytes_processed;
}
uint64_t total_client_bytes_processed = 0;
uint64_t total_server_bytes_processed = 0;
};
class CdcInterfaceTest : public ::testing::Test {
public:
void SetUp() override {
Log::Initialize(std::make_unique<ConsoleLog>(LogLevel::kInfo));
message_pump_.StartMessagePump();
}
void TearDown() override {
socket_.ShutdownSendingEnd();
message_pump_.StopMessagePump();
Log::Shutdown();
}
protected:
FakeSocket socket_;
MessagePump message_pump_{&socket_, MessagePump::PacketReceivedDelegate()};
std::string base_dir_ = GetTestDataDir("cdc_interface");
};
TEST_F(CdcInterfaceTest, SyncTest) {
CdcInterface cdc(&message_pump_);
FakeCdcProgress progress;
const std::string old_filepath = path::Join(base_dir_, "old_file.txt");
const std::string new_filepath = path::Join(base_dir_, "new_file.txt");
const std::string patched_filepath =
path::Join(base_dir_, "patched_file.txt");
path::Stats old_stats;
EXPECT_OK(path::GetStats(old_filepath, &old_stats));
path::Stats new_stats;
EXPECT_OK(path::GetStats(new_filepath, &new_stats));
// Create signature of old file and send it to the fake socket (it'll just
// send it to itself).
EXPECT_OK(cdc.CreateAndSendSignature(old_filepath));
// Receive the signature from the fake socket, generate the diff to the file
// at |new_filepath| and send it to the socket again.
absl::StatusOr<FILE*> new_file = path::OpenFile(new_filepath, "rb");
EXPECT_OK(new_file);
EXPECT_OK(cdc.ReceiveSignatureAndCreateAndSendDiff(*new_file, &progress));
fclose(*new_file);
// Receive the diff from the fake socket and create a patched file.
std::FILE* patched_file = std::tmpfile();
ASSERT_TRUE(patched_file != nullptr);
bool is_executable = false;
EXPECT_OK(
cdc.ReceiveDiffAndPatch(old_filepath, patched_file, &is_executable));
EXPECT_FALSE(is_executable);
// Read new file.
std::ifstream new_file_stream(new_filepath.c_str(), std::ios::binary);
std::vector<uint8_t> new_file_data(
std::istreambuf_iterator<char>(new_file_stream), {});
// Read patched file.
fseek(patched_file, 0, SEEK_END);
std::vector<uint8_t> patched_file_data(ftell(patched_file));
fseek(patched_file, 0, SEEK_SET);
fread(patched_file_data.data(), 1, patched_file_data.size(), patched_file);
// New and patched file should be equal now.
EXPECT_EQ(patched_file_data, new_file_data);
fclose(patched_file);
// Verify progress tracker.
EXPECT_EQ(progress.total_server_bytes_processed, old_stats.size);
EXPECT_EQ(progress.total_client_bytes_processed, new_stats.size);
}
} // namespace
} // namespace cdc_ft

View File

@@ -0,0 +1,70 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cdc_rsync/base/fake_socket.h"
namespace cdc_ft {
FakeSocket::FakeSocket() = default;
FakeSocket::~FakeSocket() = default;
absl::Status FakeSocket::Send(const void* buffer, size_t size) {
// Wait until we can send again.
std::unique_lock<std::mutex> suspend_lock(suspend_mutex_);
suspend_cv_.wait(suspend_lock, [this]() { return !sending_suspended_; });
suspend_lock.unlock();
std::unique_lock<std::mutex> lock(data_mutex_);
data_.append(static_cast<const char*>(buffer), size);
lock.unlock();
data_cv_.notify_all();
return absl::OkStatus();
}
absl::Status FakeSocket::Receive(void* buffer, size_t size,
bool allow_partial_read,
size_t* bytes_received) {
*bytes_received = 0;
std::unique_lock<std::mutex> lock(data_mutex_);
data_cv_.wait(lock, [this, size, allow_partial_read]() {
return allow_partial_read || data_.size() >= size || shutdown_;
});
if (shutdown_) {
return absl::UnavailableError("Pipe is shut down");
}
size_t to_copy = std::min(size, data_.size());
memcpy(buffer, data_.data(), to_copy);
*bytes_received = to_copy;
// This is horribly inefficent, but should be OK in a fake.
data_.erase(0, to_copy);
return absl::OkStatus();
}
void FakeSocket::ShutdownSendingEnd() {
std::unique_lock<std::mutex> lock(data_mutex_);
shutdown_ = true;
lock.unlock();
data_cv_.notify_all();
}
void FakeSocket::SuspendSending(bool suspended) {
std::unique_lock<std::mutex> lock(suspend_mutex_);
sending_suspended_ = suspended;
lock.unlock();
suspend_cv_.notify_all();
}
} // namespace cdc_ft

View File

@@ -0,0 +1,57 @@
/*
* Copyright 2022 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef CDC_RSYNC_BASE_FAKE_SOCKET_H_
#define CDC_RSYNC_BASE_FAKE_SOCKET_H_
#include <condition_variable>
#include <mutex>
#include "absl/status/status.h"
#include "cdc_rsync/base/socket.h"
namespace cdc_ft {
// Fake socket that receives the same data it sends.
class FakeSocket : public Socket {
public:
FakeSocket();
~FakeSocket();
// Socket:
absl::Status Send(const void* buffer, size_t size) override; // thread-safe
absl::Status Receive(void* buffer, size_t size, bool allow_partial_read,
size_t* bytes_received) override; // thread-safe
void ShutdownSendingEnd();
// If set to true, blocks on Send() until it is set to false again.
void SuspendSending(bool suspended);
private:
std::mutex data_mutex_;
std::condition_variable data_cv_;
std::string data_;
bool shutdown_ = false;
bool sending_suspended_ = false;
std::mutex suspend_mutex_;
std::condition_variable suspend_cv_;
};
} // namespace cdc_ft
#endif // CDC_RSYNC_BASE_FAKE_SOCKET_H_

View File

@@ -0,0 +1,473 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cdc_rsync/base/message_pump.h"
#include "absl/status/status.h"
#include "absl/strings/str_format.h"
#include "cdc_rsync/base/socket.h"
#include "common/buffer.h"
#include "common/log.h"
#include "common/status.h"
#include "google/protobuf/message_lite.h"
namespace cdc_ft {
namespace {
// Max total size of messages in the packet queues.
// If exdeeded, Send/Receive methods start blocking.
uint64_t kInOutBufferSize = 1024 * 1024 * 8;
// Header is 1 byte type, 3 bytes size.
constexpr size_t kHeaderSize = 4;
// Size is compressed to 3 bytes.
constexpr uint32_t kMaxPacketSize = 256 * 256 * 256 - 1;
// Creates a packet of size |kHeaderSize| + |size| and sets the header.
absl::Status CreateSerializedPacket(PacketType type, size_t size,
Buffer* serialized_packet) {
if (size > kMaxPacketSize) {
return MakeStatus("Max packet size exceeded: %u", size);
}
serialized_packet->clear();
serialized_packet->reserve(kHeaderSize + size);
// Header is 1 byte type, 3 bytes size.
static_assert(static_cast<size_t>(PacketType::kCount) <= 256, "");
static_assert(kMaxPacketSize < 256 * 256 * 256, "");
static_assert(kHeaderSize == 4, "");
uint8_t header[] = {static_cast<uint8_t>(type),
static_cast<uint8_t>(size & 0xFF),
static_cast<uint8_t>((size >> 8) & 0xFF),
static_cast<uint8_t>((size >> 16) & 0xFF)};
serialized_packet->append(header, sizeof(header));
return absl::OkStatus();
}
#define HANDLE_PACKET_TYPE(type) \
case PacketType::type: \
return #type;
const char* PacketTypeName(PacketType type) {
if (type > PacketType::kCount) {
return "<unknown>";
}
switch (type) {
HANDLE_PACKET_TYPE(kRawData)
HANDLE_PACKET_TYPE(kTest)
HANDLE_PACKET_TYPE(kSetOptions)
HANDLE_PACKET_TYPE(kToggleCompression)
HANDLE_PACKET_TYPE(kAddFiles)
HANDLE_PACKET_TYPE(kSendFileStats)
HANDLE_PACKET_TYPE(kAddFileIndices)
HANDLE_PACKET_TYPE(kSendMissingFileData)
HANDLE_PACKET_TYPE(kAddSignatures)
HANDLE_PACKET_TYPE(kAddPatchCommands)
HANDLE_PACKET_TYPE(kAddDeletedFiles)
HANDLE_PACKET_TYPE(kShutdown)
HANDLE_PACKET_TYPE(kCount)
}
return "<unknown>";
}
#undef HANDLE_PACKET_TYPE
} // namespace
MessagePump::MessagePump(Socket* socket, PacketReceivedDelegate packet_received)
: socket_(socket),
packet_received_(packet_received),
creation_thread_id_(std::this_thread::get_id()) {
assert(socket_ != nullptr);
}
MessagePump::~MessagePump() { StopMessagePump(); }
void MessagePump::StartMessagePump() {
assert(creation_thread_id_ == std::this_thread::get_id());
message_sender_thread_ = std::thread([this]() { ThreadSenderMain(); });
message_receiver_thread_ = std::thread([this]() { ThreadReceiverMain(); });
}
void MessagePump::StopMessagePump() {
assert(creation_thread_id_ == std::this_thread::get_id());
if (shutdown_) {
return;
}
FlushOutgoingQueue();
{
absl::MutexLock outgoing_lock(&outgoing_mutex_);
absl::MutexLock incoming_lock(&incoming_mutex_);
shutdown_ = true;
}
if (message_sender_thread_.joinable()) {
message_sender_thread_.join();
}
if (message_receiver_thread_.joinable()) {
message_receiver_thread_.join();
}
}
absl::Status MessagePump::SendRawData(const void* data, size_t size) {
Buffer serialized_packet;
absl::Status status =
CreateSerializedPacket(PacketType::kRawData, size, &serialized_packet);
if (!status.ok()) {
return status;
}
const uint8_t* u8_data = static_cast<const uint8_t*>(data);
serialized_packet.append(u8_data, size);
return QueuePacket(std::move(serialized_packet));
}
absl::Status MessagePump::SendMessage(
PacketType type, const google::protobuf::MessageLite& message) {
Buffer serialized_packet;
size_t size = message.ByteSizeLong();
absl::Status status = CreateSerializedPacket(type, size, &serialized_packet);
if (!status.ok()) {
return status;
}
// Serialize the message directly into the packet.
serialized_packet.resize(kHeaderSize + size);
if (size > 0 &&
!message.SerializeToArray(serialized_packet.data() + kHeaderSize,
static_cast<int>(size))) {
return MakeStatus("Failed to serialize message to array");
}
return QueuePacket(std::move(serialized_packet));
}
absl::Status MessagePump::QueuePacket(Buffer&& serialize_packet) {
// Wait a little if the max queue size is exceeded.
absl::MutexLock outgoing_lock(&outgoing_mutex_);
auto cond = [this]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(outgoing_mutex_) {
return outgoing_packets_byte_size_ < kInOutBufferSize || send_error_ ||
receive_error_;
};
outgoing_mutex_.Await(absl::Condition(&cond));
// There could be a race where send_error_ is set to true after this, but
// that's OK.
if (send_error_ || receive_error_) {
absl::MutexLock status_lock(&status_mutex_);
return WrapStatus(status_,
"Failed to send packet. Message pump thread is down");
}
// Put packet into outgoing queue.
outgoing_packets_byte_size_ += serialize_packet.size();
outgoing_packets_.push(std::move(serialize_packet));
return absl::OkStatus();
}
absl::Status MessagePump::ThreadDoSendPacket(Buffer&& serialized_packet) {
if (receive_error_) {
// Just eat the packet if there was a receive error as the other side is
// probably down and won't read packets anymore.
return absl::OkStatus();
}
if (output_handler_) {
// Redirect output, don't send to socket.
absl::Status status =
output_handler_(serialized_packet.data(), serialized_packet.size());
return WrapStatus(status, "Output handler failed");
}
absl::Status status =
socket_->Send(serialized_packet.data(), serialized_packet.size());
if (!status.ok()) {
return WrapStatus(status, "Failed to send packet of size %u",
serialized_packet.size());
}
LOG_VERBOSE("Sent packet of size %u (total buffer: %u)",
serialized_packet.size(), outgoing_packets_byte_size_.load());
return absl::OkStatus();
}
absl::Status MessagePump::ReceiveRawData(Buffer* data) {
Packet packet;
absl::Status status = DequeuePacket(&packet);
if (!status.ok()) {
return WrapStatus(status, "Failed to dequeue packet");
}
if (packet.type != PacketType::kRawData) {
return MakeStatus("Unexpected packet type %s. Expected kRawData.",
PacketTypeName(packet.type));
}
*data = std::move(packet.data);
return absl::OkStatus();
}
absl::Status MessagePump::ReceiveMessage(
PacketType type, google::protobuf::MessageLite* message) {
Packet packet;
absl::Status status = DequeuePacket(&packet);
if (!status.ok()) {
return WrapStatus(status, "Failed to dequeue packet");
}
if (packet.type != type) {
return MakeStatus("Unexpected packet type %s. Expected %s.",
PacketTypeName(packet.type), PacketTypeName(type));
}
if (!message->ParseFromArray(packet.data.data(),
static_cast<int>(packet.data.size()))) {
return MakeStatus("Failed to parse packet of type %s and size %u",
PacketTypeName(packet.type), packet.data.size());
}
return absl::OkStatus();
}
absl::Status MessagePump::DequeuePacket(Packet* packet) {
// Wait for a packet to be available.
absl::MutexLock incoming_lock(&incoming_mutex_);
auto cond = [this]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(incoming_mutex_) {
return !incoming_packets_.empty() || send_error_ || receive_error_;
};
incoming_mutex_.Await(absl::Condition(&cond));
// If receive_error_ is true, do not return an error until |incoming_packets_|
// is empty and all valid packets have been returned. This way, the error
// shows up for the packet that failed to be received.
if (send_error_ || (receive_error_ && incoming_packets_.empty())) {
absl::MutexLock status_lock(&status_mutex_);
return WrapStatus(status_, "Message pump thread is down");
}
// Grab packet from incoming queue.
*packet = std::move(incoming_packets_.front());
incoming_packets_.pop();
// Update byte size.
incoming_packets_byte_size_ -= kHeaderSize + packet->data.size();
return absl::OkStatus();
}
absl::Status MessagePump::ThreadDoReceivePacket(Packet* packet) {
// Read type and size in one go for performance reasons.
uint8_t header[kHeaderSize];
absl::Status status = ThreadDoReceive(&header, kHeaderSize);
if (!status.ok()) {
return WrapStatus(status, "Failed to receive packet of size %u",
kHeaderSize);
}
static_assert(kHeaderSize == 4, "");
uint8_t packet_type = header[0];
uint32_t packet_size = static_cast<uint32_t>(header[1]) |
(static_cast<uint32_t>(header[2]) << 8) |
(static_cast<uint32_t>(header[3]) << 16);
if (packet_type >= static_cast<uint8_t>(PacketType::kCount)) {
return MakeStatus("Invalid packet type: %u", packet_type);
}
packet->type = static_cast<PacketType>(packet_type);
if (packet_size > kMaxPacketSize) {
return MakeStatus("Max packet size exceeded: %u", packet_size);
}
packet->data.resize(packet_size);
status = ThreadDoReceive(packet->data.data(), packet_size);
if (!status.ok()) {
return WrapStatus(status, "Failed to read packet data of size %u",
packet_size);
}
LOG_VERBOSE("Received packet of size %u (total buffer: %u)", packet_size,
incoming_packets_byte_size_.load());
return absl::OkStatus();
}
absl::Status MessagePump::ThreadDoReceive(void* buffer, size_t size) {
if (size == 0) {
return absl::OkStatus();
}
if (input_reader_) {
size_t bytes_read = 0;
bool eof = false;
absl::Status status = input_reader_->Read(buffer, size, &bytes_read, &eof);
if (eof) {
input_reader_.reset();
}
if (!status.ok()) {
return status;
}
// |input_reader_| should read |size| bytes unless |eof| is hit.
assert(bytes_read == size || eof);
// Since this method never reads across packet boundaries and since packets
// should not be partially received through |input_reader_|, it is an error
// if there's a partial read on EOF.
if (eof && (bytes_read > 0 && bytes_read < size)) {
return MakeStatus("EOF after partial read of %u / %u bytes", bytes_read,
size);
}
// Special case, might happen if |input_reader_| was an unzip stream and the
// last read stopped right before zlib's EOF marker. Fall through to reading
// uncompressed data in that case.
if (bytes_read == size) {
return absl::OkStatus();
}
assert(eof && bytes_read == 0);
}
size_t unused;
return socket_->Receive(buffer, size, /*allow_partial_read=*/false, &unused);
}
void MessagePump::FlushOutgoingQueue() {
absl::MutexLock outgoing_lock(&outgoing_mutex_);
auto cond = [this]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(outgoing_mutex_) {
return outgoing_packets_byte_size_ == 0 || send_error_ || receive_error_;
};
outgoing_mutex_.Await(absl::Condition(&cond));
}
void MessagePump::RedirectInput(std::unique_ptr<InputReader> input_reader) {
assert(std::this_thread::get_id() == message_receiver_thread_.get_id());
assert(input_reader);
if (input_reader_) {
LOG_WARNING("Input reader already set");
return;
}
input_reader_ = std::move(input_reader);
}
void MessagePump::RedirectOutput(OutputHandler output_handler) {
FlushOutgoingQueue();
output_handler_ = std::move(output_handler);
}
size_t MessagePump::GetNumOutgoingPackagesForTesting() {
absl::MutexLock outgoing_lock(&outgoing_mutex_);
return outgoing_packets_.size();
}
size_t MessagePump::GetMaxInOutBufferSizeForTesting() {
return kInOutBufferSize;
}
size_t MessagePump::GetMaxPacketSizeForTesting() { return kMaxPacketSize; }
void MessagePump::ThreadSenderMain() {
while (!send_error_) {
Buffer serialized_packet;
size_t size;
{
// Wait for a packet to be available.
absl::MutexLock outgoing_lock(&outgoing_mutex_);
auto cond = [this]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(outgoing_mutex_) {
return outgoing_packets_.size() > 0 || shutdown_;
};
outgoing_mutex_.Await(absl::Condition(&cond));
if (shutdown_) {
break;
}
// Grab packet from outgoing queue.
serialized_packet = std::move(outgoing_packets_.front());
size = serialized_packet.size();
outgoing_packets_.pop();
}
// Send data. This blocks until all data is submitted.
absl::Status status = ThreadDoSendPacket(std::move(serialized_packet));
if (!status.ok()) {
{
absl::MutexLock status_lock(&status_mutex_);
status_ = WrapStatus(status, "Failed to send packet");
}
absl::MutexLock outgoing_lock(&outgoing_mutex_);
absl::MutexLock incoming_lock(&incoming_mutex_);
send_error_ = true;
break;
}
// Decrease AFTER sending, this is important for FlushOutgoingQueue().
absl::MutexLock outgoing_lock(&outgoing_mutex_);
outgoing_packets_byte_size_ -= size;
}
}
void MessagePump::ThreadReceiverMain() {
while (!receive_error_) {
// Wait for a packet to be available.
{
absl::MutexLock incoming_lock(&incoming_mutex_);
auto cond = [this]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(incoming_mutex_) {
return incoming_packets_byte_size_ < kInOutBufferSize || shutdown_;
};
incoming_mutex_.Await(absl::Condition(&cond));
if (shutdown_) {
break;
}
}
// Receive packet. This blocks until data is available.
Packet packet;
absl::Status status = ThreadDoReceivePacket(&packet);
if (!status.ok()) {
{
absl::MutexLock status_lock(&status_mutex_);
status_ = WrapStatus(status, "Failed to receive packet");
}
absl::MutexLock outgoing_lock(&outgoing_mutex_);
absl::MutexLock incoming_lock(&incoming_mutex_);
receive_error_ = true;
break;
}
if (packet_received_) {
packet_received_(packet.type);
}
// Queue the packet for receiving.
absl::MutexLock incoming_lock(&incoming_mutex_);
incoming_packets_byte_size_ += kHeaderSize + packet.data.size();
incoming_packets_.push(std::move(packet));
}
}
} // namespace cdc_ft

View File

@@ -0,0 +1,275 @@
/*
* Copyright 2022 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef CDC_RSYNC_BASE_MESSAGE_PUMP_H_
#define CDC_RSYNC_BASE_MESSAGE_PUMP_H_
#include <queue>
#include <thread>
#include "absl/base/thread_annotations.h"
#include "absl/status/status.h"
#include "absl/synchronization/mutex.h"
#include "common/buffer.h"
namespace google {
namespace protobuf {
class MessageLite;
}
} // namespace google
namespace cdc_ft {
class Socket;
// See messages.proto. When sending a kXXXRequest from client to server or a
// kXXXResponse from server to client, use packet type kXXX. See messages.proto.
enum class PacketType {
// Not a proto, just raw bytes.
kRawData = 0,
// Used for testing.
kTest,
// Send options to server.
kSetOptions,
// Toggle compression on/off.
kToggleCompression,
//
// Send all files from client to server.
//
// Send file paths including timestamps and sizes, and directories to server.
// An empty request indicates that all data has been sent.
kAddFiles,
// Send stats about missing, excessive, changed and matching files to client.
kSendFileStats,
//
// Send all missing files from server to client.
//
// Send indices of missing files to client.
// An empty request indicates that all data has been sent.
// Also used for sending indices of changed files.
kAddFileIndices,
// Start sending missing file data to the server. After each
// SendMissingFileDataRequest, the client sends file data as raw packets and
// an empty packet to indicate eof.
kSendMissingFileData,
//
// Rsync data exchange.
//
// Send signatures to client.
// An empty response indicates that all data has been sent.
kAddSignatures,
// Send patch commands to server.
// An empty request indicates that all data has been sent.
kAddPatchCommands,
//
// Deletion of extraneous files.
//
kAddDeletedFiles,
//
// Shutdown.
//
// Ask the server to shut down. Also used for shutdown ack.
kShutdown,
// Must be last.
kCount
};
class MessagePump {
public:
using PacketReceivedDelegate = std::function<void(PacketType)>;
// |socket| is the underlying socket that data is sent to and received from,
// unless redirected with one of the Redirect* methods. |packet_received| is
// a callback that is called from the receiver thread as soon as a packet is
// received. RedirectInput() should be called from this delegate. Useful for
// things like decompression.
MessagePump(Socket* socket, PacketReceivedDelegate packet_received);
virtual ~MessagePump();
// Starts worker threads to send/receive messages. Should be called after the
// socket is connected. Must not be already started.
// NOT thread-safe. Should be called from the creation thread.
void StartMessagePump();
// Stops worker threads to send/receive messages. No-op if already stopped or
// not started. Cannot be restarted.
// NOT thread-safe. Should be called from the creation thread.
void StopMessagePump() ABSL_LOCKS_EXCLUDED(outgoing_mutex_, incoming_mutex_);
// Queues data for sending. May block if too much data is queued.
// Thread-safe.
absl::Status SendRawData(const void* data, size_t size);
absl::Status SendMessage(PacketType type,
const google::protobuf::MessageLite& message);
// Receives a packet. Blocks if currently no packets is available.
// Thread-safe.
absl::Status ReceiveRawData(Buffer* data);
absl::Status ReceiveMessage(PacketType type,
google::protobuf::MessageLite* message);
// Returns true if the Receive* functions have data available. Note that
// receiving messages from multiple threads might be racy, i.e. if
// CanReceive() returns true and Receive* is called afterwards, the method
// might block if another thread has grabbed the packet in the meantime.
bool CanReceive() const { return incoming_packets_byte_size_ > 0; }
// Blocks until all outgoing messages were sent. Does not prevent that other
// threads queue new packets while the method is blocking, so the caller
// should make sure that that's not the case for consistent behavior.
// Thread-safe.
void FlushOutgoingQueue() ABSL_LOCKS_EXCLUDED(outgoing_mutex_);
class InputReader {
public:
virtual ~InputReader() {}
// Reads as much as data possible to |out_buffer|, but no more than
// |out_size| bytes. Sets |bytes_read| to the number of bytes read.
// |eof| is set to true if no more input data is available. The flag
// indicates that the parent MessagePump should reset the input reader
// and read data from the socket again.
virtual absl::Status Read(void* out_buffer, size_t out_size,
size_t* bytes_read, bool* eof) = 0;
};
// Starts receiving input from |input_reader| instead of from the socket.
// |input_reader| is called on a background thread. It must be a valid
// pointer. The input reader stays in place until it returns |eof| == true.
// After that, the input reader is reset and data is received from the socket
// again.
// This method must be called from the receiver thread, usually during the
// execution of the PacketReceivedDelegate passed in the constructor.
// Otherwise, the receiver thread might be blocked on a recv() call and the
// first data received would still be read the socket.
void RedirectInput(std::unique_ptr<InputReader> input_reader);
// If set to a non-empty function, starts sending output to |output_handler|
// instead of to the socket. If set to an empty function, starts sending to
// the socket again. |output_handler| is called on a background thread.
// The outgoing packet queue is flushed prior to changing the output handler.
// The caller must make sure that no background threads are sending new
// messages while this method is running.
using OutputHandler =
std::function<absl::Status(const void* data, size_t size)>;
void RedirectOutput(OutputHandler output_handler);
// Returns the number of packets queued for sending.
size_t GetNumOutgoingPackagesForTesting()
ABSL_LOCKS_EXCLUDED(outgoing_mutex_);
// Returns the max total size of messages in the packet queues.
size_t GetMaxInOutBufferSizeForTesting();
// Returns hte max size of a single raw or proto message (including header).
size_t GetMaxPacketSizeForTesting();
protected:
struct Packet {
PacketType type = PacketType::kCount;
Buffer data;
// Instances should be moved, not copied.
Packet() = default;
Packet(Packet&& other) { *this = std::move(other); }
Packet(const Packet&) = delete;
Packet& operator=(const Packet&) = delete;
Packet& operator=(Packet&& other) {
type = other.type;
data = std::move(other.data);
return *this;
}
};
private:
// Outgoing packets are already serialized to save mem copies.
absl::Status QueuePacket(Buffer&& serialized_packet)
ABSL_LOCKS_EXCLUDED(outgoing_mutex_, status_mutex_);
absl::Status DequeuePacket(Packet* packet)
ABSL_LOCKS_EXCLUDED(incoming_mutex_, status_mutex_);
// Underlying socket, not owned.
Socket* socket_;
// Delegate called if a packet was received.
// Called immediately from the receiver thread.
PacketReceivedDelegate packet_received_;
// Message pump threads main method for sending and receiving data.
void ThreadSenderMain() ABSL_LOCKS_EXCLUDED(outgoing_mutex_, status_mutex_);
void ThreadReceiverMain() ABSL_LOCKS_EXCLUDED(incoming_mutex_, status_mutex_);
// Actually send/receive packets.
absl::Status ThreadDoSendPacket(Buffer&& serialized_packet);
absl::Status ThreadDoReceivePacket(Packet* packet);
absl::Status ThreadDoReceive(void* buffer, size_t size);
std::thread message_sender_thread_;
std::thread message_receiver_thread_;
// If set, input is not received from the socket, but from |input_reader_|.
std::unique_ptr<InputReader> input_reader_;
// If set, output is not sent to the socket, but to |output_handler_|.
OutputHandler output_handler_;
//
// Synchronization of message pump threads and main thread.
//
// Guards to protect access to queued packets.
absl::Mutex outgoing_mutex_;
absl::Mutex incoming_mutex_ ABSL_ACQUIRED_AFTER(outgoing_mutex_);
// Queued packets.
std::queue<Buffer> outgoing_packets_ ABSL_GUARDED_BY(outgoing_mutex_);
std::queue<Packet> incoming_packets_ ABSL_GUARDED_BY(incoming_mutex_);
// Total size of queued packets. Used to limit max queue size.
std::atomic_uint64_t outgoing_packets_byte_size_{0};
std::atomic_uint64_t incoming_packets_byte_size_{0};
// If true, the respective thread saw an error and shut down.
std::atomic_bool send_error_{false};
std::atomic_bool receive_error_{false};
// Shutdown signal to sender and receiver threads.
std::atomic_bool shutdown_{false};
absl::Mutex status_mutex_;
absl::Status status_ ABSL_GUARDED_BY(status_mutex_);
std::thread::id creation_thread_id_;
};
} // namespace cdc_ft
#endif // CDC_RSYNC_BASE_MESSAGE_PUMP_H_

View File

@@ -0,0 +1,272 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cdc_rsync/base/message_pump.h"
#include "cdc_rsync/base/fake_socket.h"
#include "cdc_rsync/protos/messages.pb.h"
#include "common/log.h"
#include "common/status.h"
#include "common/status_test_macros.h"
#include "gtest/gtest.h"
namespace cdc_ft {
namespace {
class MessagePumpTest : public ::testing::Test {
public:
void SetUp() override {
Log::Initialize(std::make_unique<ConsoleLog>(LogLevel::kInfo));
message_pump_.StartMessagePump();
}
void TearDown() override {
fake_socket_.ShutdownSendingEnd();
message_pump_.StopMessagePump();
Log::Shutdown();
}
protected:
// Called on the receiver thread.
void ThreadPackageReceived(PacketType type) {
// Empty by default. Only takes effect if set by tests.
if (type == PacketType::kToggleCompression) {
message_pump_.RedirectInput(std::move(fake_compressed_input_reader_));
}
}
FakeSocket fake_socket_;
MessagePump message_pump_{
&fake_socket_, [this](PacketType type) { ThreadPackageReceived(type); }};
std::unique_ptr<MessagePump::InputReader> fake_compressed_input_reader_;
};
TEST_F(MessagePumpTest, SendReceiveRawData) {
// The FakeSocket just routes everything that's sent to the receiving end.
const Buffer raw_data = {'r', 'a', 'w'};
EXPECT_OK(message_pump_.SendRawData(raw_data.data(), raw_data.size()));
Buffer received_raw_data;
EXPECT_OK(message_pump_.ReceiveRawData(&received_raw_data));
EXPECT_EQ(raw_data, received_raw_data);
}
TEST_F(MessagePumpTest, SendReceiveMessage) {
TestRequest request;
request.set_message("message");
EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, request));
TestRequest received_request;
EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kTest, &received_request));
EXPECT_EQ(request.message(), received_request.message());
}
TEST_F(MessagePumpTest, SendReceiveMultiple) {
const Buffer raw_data_1 = {'r', 'a', 'w', '1'};
const Buffer raw_data_2 = {'r', 'a', 'w', '2'};
TestRequest request;
request.set_message("message");
EXPECT_OK(message_pump_.SendRawData(raw_data_1.data(), raw_data_1.size()));
EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, request));
EXPECT_OK(message_pump_.SendRawData(raw_data_2.data(), raw_data_2.size()));
Buffer received_raw_data_1;
Buffer received_raw_data_2;
TestRequest received_request;
EXPECT_OK(message_pump_.ReceiveRawData(&received_raw_data_1));
EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kTest, &received_request));
EXPECT_OK(message_pump_.ReceiveRawData(&received_raw_data_2));
EXPECT_EQ(raw_data_1, received_raw_data_1);
EXPECT_EQ(request.message(), received_request.message());
EXPECT_EQ(raw_data_2, received_raw_data_2);
}
TEST_F(MessagePumpTest, ReceiveMessageInstreadOfRaw) {
const Buffer raw_data = {'r', 'a', 'w'};
EXPECT_OK(message_pump_.SendRawData(raw_data.data(), raw_data.size()));
TestRequest received_request;
EXPECT_NOT_OK(
message_pump_.ReceiveMessage(PacketType::kTest, &received_request));
}
TEST_F(MessagePumpTest, ReceiveRawInsteadOfMessage) {
TestRequest request;
EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, request));
Buffer received_raw_data;
EXPECT_NOT_OK(message_pump_.ReceiveRawData(&received_raw_data));
}
TEST_F(MessagePumpTest, ReceiveMessageWrongType) {
TestRequest request;
EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, request));
ShutdownRequest received_request;
EXPECT_NOT_OK(
message_pump_.ReceiveMessage(PacketType::kShutdown, &received_request));
}
TEST_F(MessagePumpTest, MessageMaxSizeExceeded) {
TestRequest request;
size_t max_size = message_pump_.GetMaxPacketSizeForTesting();
request.set_message(std::string(max_size + 1, 'x'));
EXPECT_NOT_OK(message_pump_.SendMessage(PacketType::kTest, request));
}
TEST_F(MessagePumpTest, FlushOutgoingQueue) {
TestRequest request;
request.set_message(std::string(1024 * 4, 'x'));
constexpr size_t kNumMessages = 1000;
// Note: Must stay below max queue size or else SendMessage starts blocking.
ASSERT_LT((request.message().size() + 4) * kNumMessages,
message_pump_.GetMaxInOutBufferSizeForTesting());
// Queue up a bunch of large messages.
fake_socket_.SuspendSending(true);
for (size_t n = 0; n < kNumMessages; ++n) {
EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, request));
}
EXPECT_GT(message_pump_.GetNumOutgoingPackagesForTesting(), 0);
// Flush the queue.
fake_socket_.SuspendSending(false);
message_pump_.FlushOutgoingQueue();
// Check if the queue is empty.
EXPECT_EQ(message_pump_.GetNumOutgoingPackagesForTesting(), 0);
}
class FakeCompressedInputReader : public MessagePump::InputReader {
public:
explicit FakeCompressedInputReader(Socket* socket) : socket_(socket) {}
// Doesn't actually do compression, just replaces the word "compressed" by
// "COMPRESSED" as a sign that this handler was executed. In the real rsync
// algorithm, this is used to decompress data.
absl::Status Read(void* out_buffer, size_t out_size, size_t* bytes_read,
bool* eof) override {
absl::Status status = socket_->Receive(
out_buffer, out_size, /*allow_partial_read=*/false, bytes_read);
if (!status.ok()) {
return WrapStatus(status, "socket_->Receive() failed");
}
assert(*bytes_read == out_size);
char* char_buffer = static_cast<char*>(out_buffer);
char* pos = strstr(char_buffer, "compressed");
if (pos) {
memcpy(pos, "COMPRESSED", strlen("COMPRESSED"));
}
*eof = strstr(char_buffer, "set_eof") != nullptr;
return absl::OkStatus();
};
private:
Socket* socket_;
};
TEST_F(MessagePumpTest, RedirectInput) {
fake_compressed_input_reader_ =
std::make_unique<FakeCompressedInputReader>(&fake_socket_);
TestRequest test_request;
ToggleCompressionRequest compression_request;
test_request.set_message("uncompressed");
EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, test_request));
// Once this message is received, |fake_compressed_input_reader_| is set by
// ThreadPackageReceived().
EXPECT_OK(message_pump_.SendMessage(PacketType::kToggleCompression,
compression_request));
// Send a "compressed" message (should be converted to upper case).
test_request.set_message("compressed");
EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, test_request));
// Trigger reset of the input reader.
test_request.set_message("set_eof");
EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, test_request));
// The next message should be "uncompressed" (lower case) again.
test_request.set_message("uncompressed");
EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, test_request));
EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kTest, &test_request));
EXPECT_EQ(test_request.message(), "uncompressed");
EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kToggleCompression,
&compression_request));
EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kTest, &test_request));
EXPECT_EQ(test_request.message(), "COMPRESSED");
EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kTest, &test_request));
EXPECT_EQ(test_request.message(), "set_eof");
EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kTest, &test_request));
EXPECT_EQ(test_request.message(), "uncompressed");
}
TEST_F(MessagePumpTest, RedirectOutput) {
// Doesn't actually do compression, just replaces the word "compressed" by
// "COMPRESSED" as a sign that this handler was executed. In the real rsync
// algorithm, this handler would pipe the data through zstd to compress it.
auto fake_compressed_output_handler = [this](const void* data, size_t size) {
std::string char_buffer(static_cast<const char*>(data), size);
std::string::size_type pos = char_buffer.find("compressed");
if (pos != std::string::npos) {
char_buffer.replace(pos, strlen("COMPRESSED"), "COMPRESSED");
}
return fake_socket_.Send(char_buffer.data(), size);
};
TestRequest test_request;
ToggleCompressionRequest compression_request;
test_request.set_message("uncompressed");
EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, test_request));
// Set output handler.
message_pump_.RedirectOutput(fake_compressed_output_handler);
// Send a "compressed" message (should be converted to upper case).
test_request.set_message("compressed");
EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, test_request));
// Clear output handler again.
message_pump_.RedirectOutput(MessagePump::OutputHandler());
// The next message should be "uncompressed" (lower case) again.
test_request.set_message("uncompressed");
EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, test_request));
EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kTest, &test_request));
EXPECT_EQ(test_request.message(), "uncompressed");
EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kTest, &test_request));
EXPECT_EQ(test_request.message(), "COMPRESSED");
EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kTest, &test_request));
EXPECT_EQ(test_request.message(), "uncompressed");
}
} // namespace
} // namespace cdc_ft

View File

@@ -0,0 +1,63 @@
/*
* Copyright 2022 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef CDC_RSYNC_BASE_SERVER_EXIT_CODE_H_
#define CDC_RSYNC_BASE_SERVER_EXIT_CODE_H_
namespace cdc_ft {
// Since the client cannot distinguish between stderr and stdout (ssh.exe sends
// both to stdout), the server marks the beginning and ending of error messages
// with this marker char. The client interprets everything in between as an
// error message.
constexpr char kServerErrorMarker = 0x1e;
enum ServerExitCode {
// Pick a range of exit codes that does not overlap with unrelated exit codes
// like bash exit codes.
// - 126: error from bash when binary can't be started (permission denied).
// - 127: error from bash when binary isn't found
// - 255: ssh.exe error code.
// Note that codes must be <= 255.
// KEEP UPDATED!
kServerExitCodeMin = 50,
// Generic error on startup, before out-of-date check, e.g. bad args.
kServerExitCodeGenericStartup = 50,
// A gamelet component is outdated and needs to be re-uploaded.
kServerExitCodeOutOfDate = 51,
//
// All other exit codes must be strictly bigger than kServerErrorOutOfDate.
// They are guaranteed to be past the out-of-date check.
//
// Unspecified error.
kServerExitCodeGeneric = 52,
// Binding to the forward port failed, probably because there's another
// instance of cdc_rsync running.
kServerExitCodeAddressInUse = 53,
// KEEP UPDATED!
kServerExitCodeMax = 53,
};
} // namespace cdc_ft
#endif // CDC_RSYNC_BASE_SERVER_EXIT_CODE_H_

45
cdc_rsync/base/socket.h Normal file
View File

@@ -0,0 +1,45 @@
/*
* Copyright 2022 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef CDC_RSYNC_BASE_SOCKET_H_
#define CDC_RSYNC_BASE_SOCKET_H_
#include "absl/status/status.h"
namespace cdc_ft {
class Socket {
public:
Socket() = default;
virtual ~Socket() = default;
// Send data to the socket.
virtual absl::Status Send(const void* buffer, size_t size) = 0;
// Receives data from the socket. Blocks until data is available or the
// sending end of the socket gets shut down by the sender.
// If |allow_partial_read| is false, blocks until |size| bytes are available.
// If |allow_partial_read| is true, may return with success if less than
// |size| (but more than 0) bytes were received.
// The number of bytes written to |buffer| is returned in |bytes_received|.
virtual absl::Status Receive(void* buffer, size_t size,
bool allow_partial_read,
size_t* bytes_received) = 0;
};
} // namespace cdc_ft
#endif // CDC_RSYNC_BASE_SOCKET_H_

View File

@@ -0,0 +1 @@
Data for rsync testing. This is the new, modified file on the workstation.

View File

@@ -0,0 +1 @@
Data for rsync testing. This is the old version on the gamelet.

0
cdc_rsync/base/testdata/root.txt vendored Normal file
View File