mirror of
https://github.com/nestriness/cdc-file-transfer.git
synced 2026-01-30 14:45:37 +02:00
Releasing the former Stadia file transfer tools
The tools allow efficient and fast synchronization of large directory trees from a Windows workstation to a Linux target machine. cdc_rsync* support efficient copy of files by using content-defined chunking (CDC) to identify chunks within files that can be reused. asset_stream_manager + cdc_fuse_fs support efficient streaming of a local directory to a remote virtual file system based on FUSE. It also employs CDC to identify and reuse unchanged data chunks.
This commit is contained in:
253
manifest/file_chunk_map.cc
Normal file
253
manifest/file_chunk_map.cc
Normal file
@@ -0,0 +1,253 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "manifest/file_chunk_map.h"
|
||||
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "manifest/stats_printer.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
FileChunkMap::FileChunkMap(bool enable_stats) {
|
||||
if (enable_stats) stats_ = std::make_unique<StatsPrinter>();
|
||||
}
|
||||
|
||||
FileChunkMap::~FileChunkMap() = default;
|
||||
|
||||
void FileChunkMap::Init(std::string path, uint64_t file_size,
|
||||
std::vector<FileChunk>* chunks) {
|
||||
FileUpdate update(FileUpdateType::kInit, std::move(path));
|
||||
update.file_size = file_size;
|
||||
if (chunks) update.chunks = std::move(*chunks);
|
||||
file_updates_.push_back(std::move(update));
|
||||
}
|
||||
|
||||
void FileChunkMap::AppendCopy(std::string path,
|
||||
const RepeatedChunkRefProto& list,
|
||||
uint64_t list_offset) {
|
||||
FileUpdate update(FileUpdateType::kAppend, std::move(path));
|
||||
update.chunks.reserve(list.size());
|
||||
for (const ChunkRefProto& ch : list)
|
||||
update.chunks.emplace_back(ch.chunk_id(), ch.offset() + list_offset);
|
||||
file_updates_.push_back(std::move(update));
|
||||
}
|
||||
|
||||
void FileChunkMap::AppendMove(std::string path, RepeatedChunkRefProto* list,
|
||||
uint64_t list_offset) {
|
||||
FileUpdate update(FileUpdateType::kAppend, std::move(path));
|
||||
update.chunks.reserve(list->size());
|
||||
for (ChunkRefProto& ch : *list) {
|
||||
update.chunks.emplace_back(std::move(*ch.mutable_chunk_id()),
|
||||
ch.offset() + list_offset);
|
||||
}
|
||||
file_updates_.push_back(std::move(update));
|
||||
}
|
||||
|
||||
void FileChunkMap::Remove(std::string path) {
|
||||
FileUpdate update(FileUpdateType::kRemove, std::move(path));
|
||||
file_updates_.push_back(std::move(update));
|
||||
}
|
||||
|
||||
void FileChunkMap::Clear() {
|
||||
FileUpdate update(FileUpdateType::kClear, std::string());
|
||||
file_updates_.push_back(std::move(update));
|
||||
}
|
||||
|
||||
void FileChunkMap::FlushUpdates() {
|
||||
if (file_updates_.empty()) return;
|
||||
|
||||
absl::MutexLock lock(&mutex_);
|
||||
|
||||
for (FileUpdate& update : file_updates_) {
|
||||
switch (update.type) {
|
||||
case FileUpdateType::kInit: {
|
||||
File& file = path_to_file_[update.path];
|
||||
file.size = update.file_size;
|
||||
assert(total_chunks_ >= file.chunks.size());
|
||||
total_chunks_ -= file.chunks.size();
|
||||
total_chunks_ += update.chunks.size();
|
||||
file.chunks = std::move(update.chunks);
|
||||
break;
|
||||
}
|
||||
|
||||
case FileUpdateType::kAppend: {
|
||||
File& file = path_to_file_[update.path];
|
||||
total_chunks_ += update.chunks.size();
|
||||
if (file.chunks.empty()) {
|
||||
file.chunks = std::move(update.chunks);
|
||||
} else {
|
||||
file.chunks.reserve(file.chunks.size() + update.chunks.size());
|
||||
std::move(std::begin(update.chunks), std::end(update.chunks),
|
||||
std::back_inserter(file.chunks));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case FileUpdateType::kRemove: {
|
||||
const auto iter = path_to_file_.find(update.path);
|
||||
if (iter == path_to_file_.end()) break;
|
||||
assert(total_chunks_ >= iter->second.chunks.size());
|
||||
total_chunks_ -= iter->second.chunks.size();
|
||||
path_to_file_.erase(iter);
|
||||
break;
|
||||
}
|
||||
|
||||
case FileUpdateType::kClear: {
|
||||
path_to_file_.clear();
|
||||
total_chunks_ = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
file_updates_.clear();
|
||||
|
||||
UpdateIdToChunkMap();
|
||||
}
|
||||
|
||||
bool FileChunkMap::Lookup(const ContentIdProto& content_id, std::string* path,
|
||||
uint64_t* offset, uint32_t* size) {
|
||||
assert(path && offset && size);
|
||||
|
||||
absl::MutexLock lock(&mutex_);
|
||||
|
||||
return FindChunk(content_id, path, offset, size, nullptr);
|
||||
}
|
||||
|
||||
void FileChunkMap::RecordStreamedChunk(const ContentIdProto& content_id,
|
||||
size_t thread_id) {
|
||||
absl::MutexLock lock(&mutex_);
|
||||
|
||||
if (!stats_) return;
|
||||
|
||||
if (streamed_chunks_to_thread_.find(content_id) !=
|
||||
streamed_chunks_to_thread_.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::string path;
|
||||
uint32_t size;
|
||||
size_t index;
|
||||
if (FindChunk(content_id, &path, nullptr, &size, &index))
|
||||
stats_->RecordStreamedChunk(path, index, size, thread_id);
|
||||
streamed_chunks_to_thread_[content_id] = thread_id;
|
||||
}
|
||||
|
||||
void FileChunkMap::RecordCachedChunk(const ContentIdProto& content_id) {
|
||||
absl::MutexLock lock(&mutex_);
|
||||
|
||||
if (!stats_) return;
|
||||
|
||||
if (cached_chunks_.find(content_id) != cached_chunks_.end()) return;
|
||||
|
||||
// Restarting FUSE might report cached chunks that have been originally
|
||||
// streamed. Ignore those.
|
||||
if (streamed_chunks_to_thread_.find(content_id) !=
|
||||
streamed_chunks_to_thread_.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::string path;
|
||||
uint32_t size;
|
||||
size_t index;
|
||||
if (FindChunk(content_id, &path, nullptr, &size, &index))
|
||||
stats_->RecordCachedChunk(path, index, size);
|
||||
cached_chunks_.insert(content_id);
|
||||
}
|
||||
|
||||
void FileChunkMap::PrintStats() {
|
||||
absl::MutexLock lock(&mutex_);
|
||||
|
||||
if (!stats_) return;
|
||||
|
||||
stats_->Print();
|
||||
}
|
||||
|
||||
bool FileChunkMap::HasStats() const {
|
||||
absl::ReaderMutexLock lock(&mutex_);
|
||||
return stats_ != nullptr;
|
||||
}
|
||||
|
||||
void FileChunkMap::UpdateIdToChunkMap() {
|
||||
assert((mutex_.AssertHeld(), true));
|
||||
|
||||
// Put all chunks into the map.
|
||||
id_to_chunk_.clear();
|
||||
id_to_chunk_.reserve(total_chunks_);
|
||||
for (const auto& [path, file] : path_to_file_) {
|
||||
for (uint32_t n = 0; n < static_cast<uint32_t>(file.chunks.size()); ++n)
|
||||
id_to_chunk_[ContentIdRef(file.chunks[n].content_id)] = {&path, n};
|
||||
}
|
||||
|
||||
// Might be "<" if multiple files contain the same chunk.
|
||||
assert(id_to_chunk_.size() <= total_chunks_);
|
||||
|
||||
// Rebuild stats if present.
|
||||
if (stats_) {
|
||||
stats_->Clear();
|
||||
for (const auto& [path, file] : path_to_file_)
|
||||
stats_->InitFile(path, file.chunks.size());
|
||||
|
||||
// Fill in the streamed chunks.
|
||||
std::string path;
|
||||
uint32_t size;
|
||||
size_t index;
|
||||
for (const auto& [id, thread_id] : streamed_chunks_to_thread_) {
|
||||
if (FindChunk(id, &path, nullptr, &size, &index))
|
||||
stats_->RecordStreamedChunk(path, index, size, thread_id);
|
||||
}
|
||||
|
||||
// Fill in the cached chunks.
|
||||
for (const ContentIdProto& id : cached_chunks_) {
|
||||
if (FindChunk(id, &path, nullptr, &size, &index))
|
||||
stats_->RecordCachedChunk(path, index, size);
|
||||
}
|
||||
|
||||
// Make sure the above RecordStreamedChunk() calls don't count towards
|
||||
// bandwidth stats.
|
||||
stats_->ResetBandwidthStats();
|
||||
}
|
||||
}
|
||||
|
||||
bool FileChunkMap::FindChunk(const ContentIdProto& content_id,
|
||||
std::string* path, uint64_t* offset,
|
||||
uint32_t* size, size_t* index) {
|
||||
assert((mutex_.AssertHeld(), true));
|
||||
|
||||
// Find the |id_to_chunk_| entry by |content_id|. It might not exist if
|
||||
// changes to the manifest have not propagated to gamelets yet.
|
||||
IdToChunkMap::iterator i2c_iter = id_to_chunk_.find(ContentIdRef(content_id));
|
||||
if (i2c_iter == id_to_chunk_.end()) return false;
|
||||
|
||||
// Find the chunk location by path. This lookup should not fail because
|
||||
// |path_to_file_| and |id_to_chunk_| should always be in sync here.
|
||||
const ChunkLocation& loc = i2c_iter->second;
|
||||
PathToFileMap::iterator p2f_iter = path_to_file_.find(*loc.path);
|
||||
assert(p2f_iter != path_to_file_.end());
|
||||
|
||||
// Compute path, chunk offset and chunk size.
|
||||
const File& file = p2f_iter->second;
|
||||
assert(loc.index < file.chunks.size());
|
||||
uint64_t this_offset = file.chunks[loc.index].offset;
|
||||
uint64_t next_offset = loc.index + 1 == file.chunks.size()
|
||||
? file.size
|
||||
: file.chunks[loc.index + 1].offset;
|
||||
if (path) *path = *loc.path;
|
||||
if (offset) *offset = this_offset;
|
||||
if (size) *size = static_cast<uint32_t>(next_offset - this_offset);
|
||||
if (index) *index = loc.index;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace cdc_ft
|
||||
Reference in New Issue
Block a user