mirror of
https://github.com/nestriness/cdc-file-transfer.git
synced 2026-01-30 14:25:36 +02:00
The tools allow efficient and fast synchronization of large directory trees from a Windows workstation to a Linux target machine. cdc_rsync* support efficient copy of files by using content-defined chunking (CDC) to identify chunks within files that can be reused. asset_stream_manager + cdc_fuse_fs support efficient streaming of a local directory to a remote virtual file system based on FUSE. It also employs CDC to identify and reuse unchanged data chunks.
671 lines
23 KiB
C++
671 lines
23 KiB
C++
// Copyright 2022 Google LLC
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "cdc_rsync/base/cdc_interface.h"
|
|
|
|
#include <vector>
|
|
|
|
#include "absl/strings/str_format.h"
|
|
#include "blake3.h"
|
|
#include "cdc_rsync/base/message_pump.h"
|
|
#include "cdc_rsync/protos/messages.pb.h"
|
|
#include "common/buffer.h"
|
|
#include "common/path.h"
|
|
#include "common/status.h"
|
|
#include "common/util.h"
|
|
#include "fastcdc/fastcdc.h"
|
|
|
|
#if PLATFORM_LINUX
|
|
#include <fcntl.h>
|
|
#endif
|
|
|
|
namespace cdc_ft {
|
|
namespace {
|
|
|
|
// The average chunk size should be as low as possible, but not too low.
|
|
// Lower sizes mean better delta-encoding and hence less data uploads.
|
|
// However, chunking becomes slower for lower sizes. At 8 KB, a gamelet can
|
|
// still process close to 700 MB/sec, which matches hard drive speed.
|
|
// Signature data rate is another factor. The gamelet generates signature data
|
|
// at a rate of 700 MB/sec / kAvgChunkSize * sizeof(Chunk) = 1.7 MB/sec for 8 KB
|
|
// chunks. That means, the client needs at least 16 MBit download bandwidth to
|
|
// stream signatures or else this part becomes slower. 4 KB chunks would require
|
|
// a 32 MBit connection.
|
|
constexpr size_t kAvgChunkSize = 8 * 1024;
|
|
constexpr size_t kMinChunkSize = kAvgChunkSize / 2;
|
|
constexpr size_t kMaxChunkSize = kAvgChunkSize * 4;
|
|
|
|
// This number was found by experimentally optimizing chunking throughput.
|
|
constexpr size_t kFileIoBufferSize = kMaxChunkSize * 4;
|
|
|
|
// Limits the size of contiguous patch chunks where data is copied from the
|
|
// basis file. Necessary since the server copies chunks in one go and doesn't
|
|
// split them up (would be possible, but unnecessarily complicates code).
|
|
constexpr size_t kCombinedChunkSizeThreshold = 64 * 1024;
|
|
|
|
// Number of hashing tasks in flight at a given point of time.
|
|
constexpr size_t kMaxNumHashTasks = 64;
|
|
|
|
#pragma pack(push, 1)
|
|
// 16 byte hashes guarantee a sufficiently low chance of hash collisions. For
|
|
// 8 byte the chance of a hash collision is actually quite high for large files
|
|
// 0.0004% for a 100 GB file and 8 KB chunks.
|
|
struct Hash {
|
|
uint64_t low;
|
|
uint64_t high;
|
|
|
|
bool operator==(const Hash& other) const {
|
|
return low == other.low && high == other.high;
|
|
}
|
|
bool operator!=(const Hash& other) const { return !(*this == other); }
|
|
};
|
|
#pragma pack(pop)
|
|
|
|
static_assert(sizeof(Hash) <= BLAKE3_OUT_LEN, "");
|
|
|
|
} // namespace
|
|
} // namespace cdc_ft
|
|
|
|
namespace std {
|
|
|
|
template <>
|
|
struct hash<cdc_ft::Hash> {
|
|
size_t operator()(const cdc_ft::Hash& hash) const { return hash.low; }
|
|
};
|
|
|
|
} // namespace std
|
|
|
|
namespace cdc_ft {
|
|
namespace {
|
|
|
|
// Send a batch of signatures every 8 MB of processed data (~90 packets per
|
|
// second at 700 MB/sec processing rate). The size of each signature batch is
|
|
// kMinNumChunksPerBatch * sizeof(Chunk), e.g. 20 KB for an avg chunk size of
|
|
// 8 KB.
|
|
constexpr int kMinSigBatchDataSize = 8 * 1024 * 1024;
|
|
constexpr int kMinNumChunksPerBatch = kMinSigBatchDataSize / kAvgChunkSize;
|
|
|
|
// Send patch commands in batches of at least that size for efficiency.
|
|
constexpr int kPatchRequestSizeThreshold = 65536;
|
|
|
|
// 16 bytes hash, 4 bytes size = 20 bytes.
|
|
struct Chunk {
|
|
Hash hash;
|
|
uint32_t size = 0;
|
|
Chunk(const Hash& hash, uint32_t size) : hash(hash), size(size) {}
|
|
};
|
|
|
|
Hash ComputeHash(const void* data, size_t size) {
|
|
assert(data);
|
|
Hash hash;
|
|
blake3_hasher hasher;
|
|
blake3_hasher_init(&hasher);
|
|
blake3_hasher_update(&hasher, data, size);
|
|
blake3_hasher_finalize(&hasher, reinterpret_cast<uint8_t*>(&hash),
|
|
sizeof(hash));
|
|
return hash;
|
|
}
|
|
|
|
// Task that computes hashes for a single chunk and adds the result to
|
|
// AddSignaturesResponse.
|
|
class HashTask : public Task {
|
|
public:
|
|
HashTask() {}
|
|
~HashTask() {}
|
|
|
|
HashTask(const HashTask& other) = delete;
|
|
HashTask& operator=(HashTask&) = delete;
|
|
|
|
// Sets the data to compute the hash of.
|
|
// Should be called before queuing the task.
|
|
void SetData(const void* data, size_t size) {
|
|
buffer_.reserve(size);
|
|
buffer_.resize(size);
|
|
memcpy(buffer_.data(), data, size);
|
|
}
|
|
|
|
// Appends the computed hash to |response|.
|
|
// Should be called once the task is finished.
|
|
void AppendHash(AddSignaturesResponse* response) const {
|
|
response->add_sizes(static_cast<uint32_t>(buffer_.size()));
|
|
std::string* hashes = response->mutable_hashes();
|
|
hashes->append(reinterpret_cast<const char*>(&hash_), sizeof(hash_));
|
|
}
|
|
|
|
void ThreadRun(IsCancelledPredicate is_cancelled) override {
|
|
hash_ = ComputeHash(buffer_.data(), buffer_.size());
|
|
}
|
|
|
|
private:
|
|
Buffer buffer_;
|
|
struct Hash hash_ = {0};
|
|
};
|
|
|
|
class ServerChunkReceiver {
|
|
public:
|
|
explicit ServerChunkReceiver(MessagePump* message_pump)
|
|
: message_pump_(message_pump) {
|
|
assert(message_pump_);
|
|
}
|
|
|
|
// Receives server signature packets and places the data into a map
|
|
// (chunk hash) -> (server-side file offset).
|
|
// If |block| is false, returns immediately if no data is available.
|
|
// If |block| is true, blocks until some data is available.
|
|
// |num_server_bytes_processed| is set to the total size of the chunks
|
|
// received.
|
|
absl::Status Receive(bool block, uint64_t* num_server_bytes_processed) {
|
|
assert(num_server_bytes_processed);
|
|
*num_server_bytes_processed = 0;
|
|
|
|
// Already all server chunks received?
|
|
if (all_chunks_received_) {
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
// If no data is available, early out (unless blocking is requested).
|
|
if (!block && !message_pump_->CanReceive()) {
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
// Receive signatures.
|
|
AddSignaturesResponse response;
|
|
absl::Status status =
|
|
message_pump_->ReceiveMessage(PacketType::kAddSignatures, &response);
|
|
if (!status.ok()) {
|
|
return WrapStatus(status, "Failed to receive AddSignaturesResponse");
|
|
}
|
|
|
|
// Validate size of packed hashes, just in case.
|
|
const int num_chunks = response.sizes_size();
|
|
if (response.hashes().size() != num_chunks * sizeof(Hash)) {
|
|
return MakeStatus("Bad hashes size. Expected %u. Actual %u.",
|
|
num_chunks * sizeof(Hash), response.hashes().size());
|
|
}
|
|
|
|
// An empty packet marks the end of the server chunks.
|
|
if (num_chunks == 0) {
|
|
all_chunks_received_ = true;
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
// Copy the data over to |server_chunk_offsets|.
|
|
const Hash* hashes =
|
|
reinterpret_cast<const Hash*>(response.hashes().data());
|
|
for (int n = 0; n < num_chunks; ++n) {
|
|
uint32_t size = response.sizes(n);
|
|
chunk_offsets_.insert({hashes[n], curr_offset_});
|
|
curr_offset_ += size;
|
|
*num_server_bytes_processed += size;
|
|
}
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
// True if all server chunks have been received.
|
|
bool AllChunksReceived() const { return all_chunks_received_; }
|
|
|
|
// Returns a map (server chunk hash) -> (offset of that chunk in server file).
|
|
const std::unordered_map<Hash, uint64_t>& ChunkOffsets() const {
|
|
return chunk_offsets_;
|
|
}
|
|
|
|
private:
|
|
MessagePump* message_pump_;
|
|
|
|
// Maps server chunk hashes to the file offset in the server file.
|
|
std::unordered_map<Hash, uint64_t> chunk_offsets_;
|
|
|
|
// Current server file offset.
|
|
uint64_t curr_offset_ = 0;
|
|
|
|
// Whether all server files have been received.
|
|
bool all_chunks_received_ = false;
|
|
};
|
|
|
|
class PatchSender {
|
|
// 1 byte for source, 8 bytes for offset and 4 bytes for size.
|
|
static constexpr size_t kPatchMetadataSize =
|
|
sizeof(uint8_t) + sizeof(uint64_t) + sizeof(uint32_t);
|
|
|
|
public:
|
|
PatchSender(FILE* file, MessagePump* message_pump)
|
|
: file_(file), message_pump_(message_pump) {}
|
|
|
|
// Tries to send patch data for the next chunk in |client_chunks|. The class
|
|
// keeps an internal counter for the current chunk index. Patch data is not
|
|
// sent if the current client chunk is not found among the server chunks and
|
|
// there are outstanding server chunks. In that case, the method returns
|
|
// with an OK status and should be called later as soon as additional server
|
|
// chunks have been received.
|
|
// |num_client_bytes_processed| is set to the total size of the chunks added.
|
|
absl::Status TryAddChunks(const std::vector<Chunk>& client_chunks,
|
|
const ServerChunkReceiver& server_chunk_receiver,
|
|
uint64_t* num_client_bytes_processed) {
|
|
assert(num_client_bytes_processed);
|
|
*num_client_bytes_processed = 0;
|
|
|
|
while (curr_chunk_idx_ < client_chunks.size()) {
|
|
const Chunk& chunk = client_chunks[curr_chunk_idx_];
|
|
auto it = server_chunk_receiver.ChunkOffsets().find(chunk.hash);
|
|
bool exists = it != server_chunk_receiver.ChunkOffsets().end();
|
|
|
|
// If there are outstanding server chunks and the client hash is not
|
|
// found, do not send the patch data yet. A future server chunk might
|
|
// contain the data.
|
|
if (!exists && !server_chunk_receiver.AllChunksReceived()) {
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status status = exists ? AddExistingChunk(it->second, chunk.size)
|
|
: AddNewChunk(chunk.size);
|
|
if (!status.ok()) {
|
|
return WrapStatus(status, "Failed to add chunk");
|
|
}
|
|
|
|
++curr_chunk_idx_;
|
|
*num_client_bytes_processed += chunk.size;
|
|
|
|
// Break loop if all server chunks are received. Otherwise, progress
|
|
// reporting is blocked.
|
|
if (server_chunk_receiver.AllChunksReceived()) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
// Sends the remaining patch commands and an EOF marker.
|
|
absl::Status Flush() {
|
|
if (request_size_ > 0) {
|
|
absl::Status status =
|
|
message_pump_->SendMessage(PacketType::kAddPatchCommands, request_);
|
|
if (!status.ok()) {
|
|
return WrapStatus(status, "Failed to send final patch commands");
|
|
}
|
|
total_request_size_ += request_size_;
|
|
request_.Clear();
|
|
}
|
|
|
|
// Send an empty patch commands request as EOF marker.
|
|
absl::Status status =
|
|
message_pump_->SendMessage(PacketType::kAddPatchCommands, request_);
|
|
if (!status.ok()) {
|
|
return WrapStatus(status, "Failed to send patch commands EOF marker");
|
|
}
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
// Returns the (estimated) total size of all patch data sent.
|
|
uint64_t GetTotalRequestSize() const { return total_request_size_; }
|
|
|
|
// Index of the next client chunk.
|
|
size_t CurrChunkIdx() const { return curr_chunk_idx_; }
|
|
|
|
private:
|
|
// Adds patch data for a client chunk that has a matching server chunk of
|
|
// given |size| at given |offset| in the server file.
|
|
absl::Status AddExistingChunk(uint64_t offset, uint32_t size) {
|
|
int last_idx = request_.sources_size() - 1;
|
|
if (last_idx >= 0 &&
|
|
request_.sources(last_idx) ==
|
|
AddPatchCommandsRequest::SOURCE_BASIS_FILE &&
|
|
request_.offsets(last_idx) + request_.sizes(last_idx) == offset &&
|
|
request_.sizes(last_idx) < kCombinedChunkSizeThreshold) {
|
|
// Same source and contiguous data -> Append to last entry.
|
|
request_.set_sizes(last_idx, request_.sizes(last_idx) + size);
|
|
} else {
|
|
// Different source or first chunk -> Create new entry.
|
|
request_.add_sources(AddPatchCommandsRequest::SOURCE_BASIS_FILE);
|
|
request_.add_offsets(offset);
|
|
request_.add_sizes(size);
|
|
request_size_ += kPatchMetadataSize;
|
|
}
|
|
|
|
return OnChunkAdded(size);
|
|
}
|
|
|
|
absl::Status AddNewChunk(uint32_t size) {
|
|
std::string* data = request_.mutable_data();
|
|
int last_idx = request_.sources_size() - 1;
|
|
if (last_idx >= 0 &&
|
|
request_.sources(last_idx) == AddPatchCommandsRequest::SOURCE_DATA) {
|
|
// Same source -> Append to last entry.
|
|
request_.set_sizes(last_idx, request_.sizes(last_idx) + size);
|
|
} else {
|
|
// Different source or first chunk -> Create new entry.
|
|
request_.add_sources(AddPatchCommandsRequest::SOURCE_DATA);
|
|
request_.add_offsets(data->size());
|
|
request_.add_sizes(size);
|
|
request_size_ += kPatchMetadataSize;
|
|
}
|
|
|
|
// Read data from client file into |data|. Be sure to restore the previous
|
|
// file offset as the chunker might still be processing the file.
|
|
size_t prev_size = data->size();
|
|
data->resize(prev_size + size);
|
|
int64_t prev_offset = ftell64(file_);
|
|
if (fseek64(file_, file_offset_, SEEK_SET) != 0 ||
|
|
fread(&(*data)[prev_size], 1, size, file_) != size ||
|
|
fseek64(file_, prev_offset, SEEK_SET) != 0) {
|
|
return MakeStatus("Failed to read %u bytes at offset %u", size,
|
|
file_offset_);
|
|
}
|
|
request_size_ += size;
|
|
|
|
return OnChunkAdded(size);
|
|
}
|
|
|
|
absl::Status OnChunkAdded(uint32_t size) {
|
|
file_offset_ += size;
|
|
|
|
// Send patch commands if there's enough data.
|
|
if (request_size_ > kPatchRequestSizeThreshold) {
|
|
absl::Status status =
|
|
message_pump_->SendMessage(PacketType::kAddPatchCommands, request_);
|
|
if (!status.ok()) {
|
|
return WrapStatus(status, "Failed to send patch commands");
|
|
}
|
|
total_request_size_ += request_size_;
|
|
request_size_ = 0;
|
|
request_.Clear();
|
|
}
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
FILE* file_;
|
|
MessagePump* message_pump_;
|
|
|
|
AddPatchCommandsRequest request_;
|
|
size_t request_size_ = 0;
|
|
size_t total_request_size_ = 0;
|
|
uint64_t file_offset_ = 0;
|
|
size_t curr_chunk_idx_ = 0;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
CdcInterface::CdcInterface(MessagePump* message_pump)
|
|
: message_pump_(message_pump) {}
|
|
|
|
absl::Status CdcInterface::CreateAndSendSignature(const std::string& filepath) {
|
|
absl::StatusOr<FILE*> file = path::OpenFile(filepath, "rb");
|
|
if (!file.ok()) {
|
|
return file.status();
|
|
}
|
|
#if PLATFORM_LINUX
|
|
// Tell the kernel we'll load the file sequentially (improves IO bandwidth).
|
|
posix_fadvise(fileno(*file), 0, 0, POSIX_FADV_SEQUENTIAL);
|
|
#endif
|
|
|
|
// Use a background thread for computing hashes on the server.
|
|
// Allocate lazily since it is not needed on the client.
|
|
// MUST NOT use more than 1 worker thread since the order of finished tasks
|
|
// would then not necessarily match the pushing order. However, the order is
|
|
// important for computing offsets.
|
|
if (!hash_pool_) hash_pool_ = std::make_unique<Threadpool>(1);
|
|
|
|
// |chunk_handler| is called for each CDC chunk. It pushes a hash task to the
|
|
// pool. Tasks are "recycled" from |free_tasks_|, so that buffers don't have
|
|
// to reallocated constantly.
|
|
size_t num_hash_tasks = 0;
|
|
auto chunk_handler = [pool = hash_pool_.get(), &num_hash_tasks,
|
|
free_tasks = &free_tasks_](const void* data,
|
|
size_t size) {
|
|
++num_hash_tasks;
|
|
if (free_tasks->empty()) {
|
|
free_tasks->push_back(std::make_unique<HashTask>());
|
|
}
|
|
std::unique_ptr<Task> task = std::move(free_tasks->back());
|
|
free_tasks->pop_back();
|
|
static_cast<HashTask*>(task.get())->SetData(data, size);
|
|
pool->QueueTask(std::move(task));
|
|
};
|
|
|
|
fastcdc::Config config(kMinChunkSize, kAvgChunkSize, kMaxChunkSize);
|
|
fastcdc::Chunker chunker(config, chunk_handler);
|
|
|
|
AddSignaturesResponse response;
|
|
auto read_handler = [&chunker, &response, pool = hash_pool_.get(),
|
|
&num_hash_tasks, free_tasks = &free_tasks_,
|
|
message_pump = message_pump_](const void* data,
|
|
size_t size) {
|
|
chunker.Process(static_cast<const uint8_t*>(data), size);
|
|
|
|
// Finish hashing tasks. Block if there are too many of them in flight.
|
|
for (;;) {
|
|
std::unique_ptr<Task> task = num_hash_tasks >= kMaxNumHashTasks
|
|
? pool->GetCompletedTask()
|
|
: pool->TryGetCompletedTask();
|
|
if (!task) break;
|
|
num_hash_tasks--;
|
|
static_cast<HashTask*>(task.get())->AppendHash(&response);
|
|
free_tasks->push_back(std::move(task));
|
|
}
|
|
|
|
// Send data if we have enough chunks.
|
|
if (response.sizes_size() >= kMinNumChunksPerBatch) {
|
|
absl::Status status =
|
|
message_pump->SendMessage(PacketType::kAddSignatures, response);
|
|
if (!status.ok()) {
|
|
return WrapStatus(status, "Failed to send signatures");
|
|
}
|
|
response.Clear();
|
|
}
|
|
|
|
return absl::OkStatus();
|
|
};
|
|
|
|
absl::Status status =
|
|
path::StreamReadFileContents(*file, kFileIoBufferSize, read_handler);
|
|
fclose(*file);
|
|
if (!status.ok()) {
|
|
return WrapStatus(status, "Failed to compute signatures");
|
|
}
|
|
chunker.Finalize();
|
|
|
|
// Finish hashing tasks.
|
|
hash_pool_->Wait();
|
|
std::unique_ptr<Task> task = hash_pool_->TryGetCompletedTask();
|
|
while (task) {
|
|
static_cast<HashTask*>(task.get())->AppendHash(&response);
|
|
free_tasks_.push_back(std::move(task));
|
|
task = hash_pool_->TryGetCompletedTask();
|
|
}
|
|
|
|
// Send the remaining chunks, if any.
|
|
if (response.sizes_size() > 0) {
|
|
status = message_pump_->SendMessage(PacketType::kAddSignatures, response);
|
|
if (!status.ok()) {
|
|
return WrapStatus(status, "Failed to send final signatures");
|
|
}
|
|
response.Clear();
|
|
}
|
|
|
|
// Send an empty response as EOF marker.
|
|
status = message_pump_->SendMessage(PacketType::kAddSignatures, response);
|
|
if (!status.ok()) {
|
|
return WrapStatus(status, "Failed to send signatures EOF marker");
|
|
}
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status CdcInterface::ReceiveSignatureAndCreateAndSendDiff(
|
|
FILE* file, ReportCdcProgress* progress) {
|
|
//
|
|
// Compute signatures from client |file| and send patches while receiving
|
|
// server signatures.
|
|
//
|
|
std::vector<Chunk> client_chunks;
|
|
ServerChunkReceiver server_chunk_receiver(message_pump_);
|
|
PatchSender patch_sender(file, message_pump_);
|
|
|
|
auto chunk_handler = [&client_chunks](const void* data, size_t size) {
|
|
client_chunks.emplace_back(ComputeHash(data, size),
|
|
static_cast<uint32_t>(size));
|
|
};
|
|
|
|
fastcdc::Config config(kMinChunkSize, kAvgChunkSize, kMaxChunkSize);
|
|
fastcdc::Chunker chunker(config, chunk_handler);
|
|
|
|
uint64_t file_size = 0;
|
|
auto read_handler = [&chunker, &client_chunks, &server_chunk_receiver,
|
|
&file_size, progress,
|
|
&patch_sender](const void* data, size_t size) {
|
|
// Process client chunks for the data read.
|
|
chunker.Process(static_cast<const uint8_t*>(data), size);
|
|
file_size += size;
|
|
|
|
const bool all_client_chunks_read = data == nullptr;
|
|
if (all_client_chunks_read) {
|
|
chunker.Finalize();
|
|
}
|
|
|
|
do {
|
|
// Receive any server chunks available.
|
|
uint64_t num_server_bytes_processed = 0;
|
|
absl::Status status = server_chunk_receiver.Receive(
|
|
/*block=*/all_client_chunks_read, &num_server_bytes_processed);
|
|
if (!status.ok()) {
|
|
return WrapStatus(status, "Failed to receive server chunks");
|
|
}
|
|
|
|
// Try to send patch data.
|
|
uint64_t num_client_bytes_processed = 0;
|
|
status = patch_sender.TryAddChunks(client_chunks, server_chunk_receiver,
|
|
&num_client_bytes_processed);
|
|
if (!status.ok()) {
|
|
return WrapStatus(status, "Failed to send patch data");
|
|
}
|
|
|
|
progress->ReportSyncProgress(num_client_bytes_processed,
|
|
num_server_bytes_processed);
|
|
} while (all_client_chunks_read &&
|
|
(!server_chunk_receiver.AllChunksReceived() ||
|
|
patch_sender.CurrChunkIdx() < client_chunks.size()));
|
|
|
|
return absl::OkStatus();
|
|
};
|
|
|
|
absl::Status status =
|
|
path::StreamReadFileContents(file, kFileIoBufferSize, read_handler);
|
|
if (!status.ok()) {
|
|
return WrapStatus(status, "Failed to stream file");
|
|
}
|
|
|
|
// Should have sent all client chunks by now.
|
|
assert(patch_sender.CurrChunkIdx() == client_chunks.size());
|
|
|
|
// Flush remaining patches.
|
|
status = patch_sender.Flush();
|
|
if (!status.ok()) {
|
|
return WrapStatus(status, "Failed to flush patches");
|
|
}
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status CdcInterface::ReceiveDiffAndPatch(
|
|
const std::string& basis_filepath, FILE* patched_file,
|
|
bool* is_executable) {
|
|
Buffer buffer;
|
|
*is_executable = false;
|
|
|
|
absl::StatusOr<FILE*> basis_file = path::OpenFile(basis_filepath, "rb");
|
|
if (!basis_file.ok()) {
|
|
return basis_file.status();
|
|
}
|
|
#if PLATFORM_LINUX
|
|
// Tell the kernel we'll load the file sequentially (improves IO bandwidth).
|
|
// It is not strictly true that the basis file is accessed sequentially, but
|
|
// for larger parts of this file this should be the case.
|
|
posix_fadvise(fileno(*basis_file), 0, 0, POSIX_FADV_SEQUENTIAL);
|
|
#endif
|
|
|
|
bool first_chunk = true;
|
|
for (;;) {
|
|
AddPatchCommandsRequest request;
|
|
absl::Status status =
|
|
message_pump_->ReceiveMessage(PacketType::kAddPatchCommands, &request);
|
|
if (!status.ok()) {
|
|
fclose(*basis_file);
|
|
return WrapStatus(status, "Failed to receive AddPatchCommandsRequest");
|
|
}
|
|
|
|
// All arrays must be of the same size.
|
|
int num_chunks = request.sources_size();
|
|
if (num_chunks != request.offsets_size() ||
|
|
num_chunks != request.sizes_size()) {
|
|
fclose(*basis_file);
|
|
return MakeStatus(
|
|
"Corrupted patch command arrays: Expected sizes %i. Actual %i/%i.",
|
|
num_chunks, request.offsets_size(), request.sizes_size());
|
|
}
|
|
|
|
if (num_chunks == 0) {
|
|
// A zero-size request marks the end of patch commands.
|
|
break;
|
|
}
|
|
|
|
for (int n = 0; n < num_chunks; ++n) {
|
|
AddPatchCommandsRequest::Source source = request.sources(n);
|
|
uint64_t chunk_offset = request.offsets(n);
|
|
uint32_t chunk_size = request.sizes(n);
|
|
|
|
const char* chunk_data = nullptr;
|
|
if (source == AddPatchCommandsRequest::SOURCE_BASIS_FILE) {
|
|
// Copy [chunk_offset, chunk_offset + chunk_size) from |basis_file|.
|
|
buffer.resize(chunk_size);
|
|
if (fseek64(*basis_file, chunk_offset, SEEK_SET) != 0 ||
|
|
fread(buffer.data(), 1, chunk_size, *basis_file) != chunk_size) {
|
|
fclose(*basis_file);
|
|
return MakeStatus(
|
|
"Failed to read %u bytes at offset %u from basis file",
|
|
chunk_size, chunk_offset);
|
|
}
|
|
chunk_data = buffer.data();
|
|
} else {
|
|
// Write [chunk_offset, chunk_offset + chunk_size) from request data.
|
|
assert(source == AddPatchCommandsRequest::SOURCE_DATA);
|
|
if (request.data().size() < chunk_offset + chunk_size) {
|
|
fclose(*basis_file);
|
|
return MakeStatus(
|
|
"Insufficient data in patch commands. Required %u. Actual %u.",
|
|
chunk_offset + chunk_size, request.data().size());
|
|
}
|
|
chunk_data = &request.data()[chunk_offset];
|
|
}
|
|
|
|
if (first_chunk && chunk_size > 0) {
|
|
first_chunk = false;
|
|
*is_executable = Util::IsExecutable(chunk_data, chunk_size);
|
|
}
|
|
if (fwrite(chunk_data, 1, chunk_size, patched_file) != chunk_size) {
|
|
fclose(*basis_file);
|
|
return MakeStatus("Failed to write %u bytes to patched file",
|
|
chunk_size);
|
|
}
|
|
}
|
|
}
|
|
fclose(*basis_file);
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
} // namespace cdc_ft
|