Files
netris-cdc-file-transfer/manifest/fake_manifest_builder.cc
Christian Schneider 4326e972ac Releasing the former Stadia file transfer tools
The tools allow efficient and fast synchronization of large directory
trees from a Windows workstation to a Linux target machine.

cdc_rsync* support efficient copy of files by using content-defined
chunking (CDC) to identify chunks within files that can be reused.

asset_stream_manager + cdc_fuse_fs support efficient streaming of a
local directory to a remote virtual file system based on FUSE. It also
employs CDC to identify and reuse unchanged data chunks.
2022-11-03 10:39:10 +01:00

183 lines
6.1 KiB
C++

// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "manifest/fake_manifest_builder.h"
#include <vector>
#include "absl/strings/str_format.h"
#include "common/path.h"
#include "data_store/mem_data_store.h"
#include "fastcdc/fastcdc.h"
namespace cdc_ft {
namespace {
constexpr size_t kAvgChunkSize = 1024 * 256;
constexpr size_t kMinChunkSize = kAvgChunkSize / 2;
constexpr size_t kMaxChunkSize = kAvgChunkSize * 4;
// Builds a data blob for faking a large file that contains
// <line number> - <60 random letters>
std::vector<char> BuildLargeFileData(int num_lines) {
std::vector<char> data;
char filler[60] = {0};
for (int n = 0; n < num_lines; ++n) {
for (size_t k = 0; k < sizeof(filler); ++k) {
filler[k] = (rand() % 26) + 'a';
}
std::string n_str = std::to_string(n);
data.insert(data.end(), n_str.c_str(), n_str.c_str() + n_str.size());
data.push_back('-');
data.insert(data.end(), filler, filler + sizeof(filler));
data.push_back('\n');
}
return data;
}
void UpdateFileContent(AssetProto* asset, MemDataStore* const store,
const std::vector<char>& data) {
uint64_t offset = 0;
auto chunk_handler = [asset, store, &offset](const void* data, size_t size) {
const char* char_data = reinterpret_cast<const char*>(data);
std::vector<char> data_vec;
data_vec.insert(data_vec.end(), char_data, char_data + size);
ChunkRefProto* chunk_ref = asset->add_file_chunks();
*chunk_ref->mutable_chunk_id() = store->AddData(data_vec);
chunk_ref->set_offset(offset);
offset += size;
};
fastcdc::Config config(kMinChunkSize, kAvgChunkSize, kMaxChunkSize);
fastcdc::Chunker chunker(config, chunk_handler);
chunker.Process(reinterpret_cast<const uint8_t*>(data.data()), data.size());
chunker.Finalize();
}
AssetProto* FindAsset(AssetProto* dir_asset, const char* name) {
assert(dir_asset);
assert(dir_asset->type() == AssetProto::DIRECTORY);
for (AssetProto& asset : *dir_asset->mutable_dir_assets()) {
if (asset.name() == name) {
return &asset;
}
}
return nullptr;
}
} // namespace
FakeManifestBuilder::FakeManifestBuilder(MemDataStore* store) : store_(store) {
manifest_.mutable_root_dir()->set_type(AssetProto::DIRECTORY);
manifest_.mutable_root_dir()->set_permissions(kRootDirPerms);
}
FakeManifestBuilder::~FakeManifestBuilder() = default;
void FakeManifestBuilder::AddFile(AssetProto* dir_asset, const char* name,
int64_t mtime_sec, uint32_t permissions,
const std::vector<char>& data) {
assert(dir_asset);
AssetProto* asset = dir_asset->add_dir_assets();
asset->set_name(name);
asset->set_type(AssetProto::FILE);
asset->set_file_size(data.size());
asset->set_mtime_seconds(mtime_sec);
asset->set_permissions(permissions);
UpdateFileContent(asset, store_, data);
}
AssetProto* FakeManifestBuilder::AddDirectory(AssetProto* dir_asset,
const char* name,
int64_t mtime_sec,
uint32_t permissions) {
assert(dir_asset);
AssetProto* asset = dir_asset->add_dir_assets();
asset->set_name(name);
asset->set_type(AssetProto::DIRECTORY);
asset->set_mtime_seconds(mtime_sec);
asset->set_permissions(permissions);
return asset;
}
ContentIdProto FakeManifestBuilder::BuildTestData() {
const uint32_t kFileMode =
path::MODE_IRUSR | path::MODE_IWUSR | path::MODE_IRGRP | path::MODE_IROTH;
const uint32_t kDirMode = path::MODE_IRGRP | path::MODE_IXGRP |
path::MODE_IROTH | path::MODE_IXOTH |
path::MODE_IRWXU;
const int64_t kModTime = 1614843754;
// root
// |- file1.txt
// |- fio_test
// |- large_file1.txt
// |- ...
// |- large_file9.txt
// |- a
// |- file2.txt
// |- b
// |- file3.txt
AssetProto* fio_test_dir =
AddDirectory(Root(), "fio_test", kModTime, kDirMode);
// 500k lines generate a ~33 MB file.
std::vector<char> data = BuildLargeFileData(500000);
for (int n = 1; n < 9; ++n) {
std::string filename = absl::StrFormat("large_file%i.txt", n);
AddFile(fio_test_dir, filename.c_str(), kModTime, kFileMode, data);
}
AddFile(Root(), "file1.txt", kModTime, kFileMode, {'1', '3', '3', '7', '\n'});
AssetProto* a_dir = AddDirectory(Root(), "a", kModTime, kDirMode);
AddFile(a_dir, "file2.txt", kModTime, kFileMode,
{'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd', '!', '\n'});
AssetProto* b_dir = AddDirectory(a_dir, "b", kModTime, kDirMode);
AddFile(
b_dir, "file3.txt", kModTime, kFileMode,
{0127, 0105, 0122, 0040, 0104, 0101, 0123, 0040, 0114, 0111, 0105, 0123,
0124, 0040, 0111, 0123, 0124, 0040, 0104, 0117, 0117, 0106, 0012});
return store_->AddProto(manifest_);
}
const ManifestProto* FakeManifestBuilder::Manifest() const {
return &manifest_;
}
AssetProto* FakeManifestBuilder::Root() { return manifest_.mutable_root_dir(); }
void FakeManifestBuilder::ModifyFile(AssetProto* dir_asset, const char* name,
int64_t mtime_sec, uint32_t permissions,
const std::vector<char>& data) {
assert(dir_asset);
AssetProto* asset = FindAsset(dir_asset, name);
assert(asset && asset->type() == AssetProto::FILE);
asset->set_file_size(data.size());
asset->set_mtime_seconds(mtime_sec);
asset->set_permissions(permissions);
asset->clear_file_chunks();
asset->clear_file_indirect_chunks();
UpdateFileContent(asset, store_, data);
}
} // namespace cdc_ft