mirror of
https://github.com/nestriness/cdc-file-transfer.git
synced 2026-01-30 10:35:37 +02:00
Releasing the former Stadia file transfer tools
The tools allow efficient and fast synchronization of large directory trees from a Windows workstation to a Linux target machine. cdc_rsync* support efficient copy of files by using content-defined chunking (CDC) to identify chunks within files that can be reused. asset_stream_manager + cdc_fuse_fs support efficient streaming of a local directory to a remote virtual file system based on FUSE. It also employs CDC to identify and reuse unchanged data chunks.
This commit is contained in:
220
manifest/BUILD
Normal file
220
manifest/BUILD
Normal file
@@ -0,0 +1,220 @@
|
||||
package(default_visibility = ["//:__subpackages__"])
|
||||
|
||||
cc_library(
|
||||
name = "content_id",
|
||||
srcs = ["content_id.cc"],
|
||||
hdrs = ["content_id.h"],
|
||||
deps = [
|
||||
":manifest_proto_defs",
|
||||
"@com_github_blake3//:blake3",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "content_id_test",
|
||||
srcs = ["content_id_test.cc"],
|
||||
deps = [
|
||||
":content_id",
|
||||
"@com_google_googletest//:gtest",
|
||||
"@com_google_googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "manifest_proto_defs",
|
||||
hdrs = ["manifest_proto_defs.h"],
|
||||
deps = ["//proto:manifest_cc_proto"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "fake_manifest_builder",
|
||||
srcs = ["fake_manifest_builder.cc"],
|
||||
hdrs = ["fake_manifest_builder.h"],
|
||||
deps = [
|
||||
":manifest_proto_defs",
|
||||
"//common:path",
|
||||
"//data_store:mem_data_store",
|
||||
"//fastcdc",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "fake_manifest_builder_test",
|
||||
srcs = ["fake_manifest_builder_test.cc"],
|
||||
deps = [
|
||||
":fake_manifest_builder",
|
||||
"//common:status_test_macros",
|
||||
"@com_google_googletest//:gtest",
|
||||
"@com_google_googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "manifest_builder",
|
||||
srcs = [
|
||||
"asset_builder.cc",
|
||||
"manifest_builder.cc",
|
||||
],
|
||||
hdrs = [
|
||||
"asset_builder.h",
|
||||
"manifest_builder.h",
|
||||
],
|
||||
deps = [
|
||||
":content_id",
|
||||
":manifest_proto_defs",
|
||||
"//common:log",
|
||||
"//common:path",
|
||||
"//common:status",
|
||||
"//common:status_macros",
|
||||
"//common:util",
|
||||
"//data_store",
|
||||
"@com_google_absl//absl/status:statusor",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "manifest_builder_test",
|
||||
srcs = ["manifest_builder_test.cc"],
|
||||
deps = [
|
||||
":manifest_builder",
|
||||
":manifest_iterator",
|
||||
":manifest_printer",
|
||||
"//common:status_test_macros",
|
||||
"//data_store:mem_data_store",
|
||||
"@com_google_googletest//:gtest",
|
||||
"@com_google_googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "manifest_iterator",
|
||||
srcs = ["manifest_iterator.cc"],
|
||||
hdrs = ["manifest_iterator.h"],
|
||||
deps = [
|
||||
"//common:log",
|
||||
"//common:path",
|
||||
"//data_store",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "manifest_printer",
|
||||
srcs = ["manifest_printer.cc"],
|
||||
hdrs = ["manifest_printer.h"],
|
||||
deps = [
|
||||
":content_id",
|
||||
":manifest_proto_defs",
|
||||
"@com_google_protobuf//:protobuf",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "manifest_updater",
|
||||
srcs = ["manifest_updater.cc"],
|
||||
hdrs = ["manifest_updater.h"],
|
||||
deps = [
|
||||
":file_chunk_map",
|
||||
":manifest_builder",
|
||||
":manifest_iterator",
|
||||
":manifest_proto_defs",
|
||||
":stats_printer",
|
||||
"//common:log",
|
||||
"//common:path",
|
||||
"//common:stopwatch",
|
||||
"//common:threadpool",
|
||||
"//common:util",
|
||||
"//data_store",
|
||||
"//fastcdc",
|
||||
"@com_google_absl//absl/status",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "stats_printer",
|
||||
srcs = ["stats_printer.cc"],
|
||||
hdrs = ["stats_printer.h"],
|
||||
copts = select({
|
||||
"//tools:windows": ["/wd4324"], # "structure was padded" from flat_hash_map
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
deps = [
|
||||
"//common:path",
|
||||
"//common:stopwatch",
|
||||
"//fastcdc",
|
||||
"@com_google_absl//absl/container:flat_hash_map",
|
||||
"@com_google_absl//absl/status",
|
||||
"@com_google_absl//absl/status:statusor",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "file_chunk_map",
|
||||
srcs = ["file_chunk_map.cc"],
|
||||
hdrs = ["file_chunk_map.h"],
|
||||
copts = select({
|
||||
"//tools:windows": ["/wd4324"], # "structure was padded" from flat_hash_map
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
deps = [
|
||||
":manifest_proto_defs",
|
||||
":stats_printer",
|
||||
"//manifest:content_id",
|
||||
"@com_google_absl//absl/container:flat_hash_map",
|
||||
"@com_google_absl//absl/container:flat_hash_set",
|
||||
"@com_google_absl//absl/status",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "file_chunk_map_test",
|
||||
srcs = ["file_chunk_map_test.cc"],
|
||||
deps = [
|
||||
":file_chunk_map",
|
||||
"//common:test_main",
|
||||
"@com_google_googletest//:gtest",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "manifest_test_base",
|
||||
srcs = ["manifest_test_base.cc"],
|
||||
hdrs = ["manifest_test_base.h"],
|
||||
deps = [
|
||||
":manifest_iterator",
|
||||
":manifest_printer",
|
||||
":manifest_updater",
|
||||
"//common:path",
|
||||
"//common:status_test_macros",
|
||||
"//data_store:mem_data_store",
|
||||
"@com_google_googletest//:gtest",
|
||||
],
|
||||
)
|
||||
|
||||
# This test only succeeds on Windows if the timezone is set to the local host's
|
||||
# timezone, but Bazel by default sets the test timezone to UTC.
|
||||
#
|
||||
# Run this test as follows to preserve the host's timezone:
|
||||
# bazel test --action_env=TZ=Local
|
||||
cc_test(
|
||||
name = "manifest_updater_test",
|
||||
srcs = ["manifest_updater_test.cc"],
|
||||
data = [":all_test_data"],
|
||||
deps = [
|
||||
":manifest_test_base",
|
||||
":manifest_updater",
|
||||
"//common:test_main",
|
||||
"//data_store:mem_data_store",
|
||||
"@com_google_googletest//:gtest",
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all_test_sources",
|
||||
srcs = glob(["*_test.cc"]),
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all_test_data",
|
||||
srcs = glob(["testdata/**"]),
|
||||
)
|
||||
115
manifest/asset_builder.cc
Normal file
115
manifest/asset_builder.cc
Normal file
@@ -0,0 +1,115 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "manifest/asset_builder.h"
|
||||
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "common/path.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
AssetBuilder::AssetBuilder() = default;
|
||||
|
||||
AssetBuilder::AssetBuilder(AssetProto* proto, const std::string& rel_path)
|
||||
: proto_(proto), rel_path_(path::ToUnix(rel_path)) {}
|
||||
|
||||
AssetBuilder::~AssetBuilder() = default;
|
||||
|
||||
std::string AssetBuilder::RelativeFilePath() const {
|
||||
if (!proto_) return std::string();
|
||||
return path::JoinUnix(rel_path_, proto_->name());
|
||||
}
|
||||
|
||||
void AssetBuilder::AppendChunk(const ContentIdProto& content_id, size_t len) {
|
||||
assert(proto_ != nullptr);
|
||||
assert(proto_->type() == AssetProto::FILE);
|
||||
// TODO: Handle indirect chunks.
|
||||
assert(proto_->file_indirect_chunks_size() == 0);
|
||||
ChunkRefProto* chunk_ref = proto_->add_file_chunks();
|
||||
chunk_ref->set_offset(proto_->file_size());
|
||||
chunk_ref->mutable_chunk_id()->CopyFrom(content_id);
|
||||
proto_->set_file_size(proto_->file_size() + len);
|
||||
}
|
||||
|
||||
void AssetBuilder::TruncateChunks() {
|
||||
assert(proto_ != nullptr);
|
||||
assert(proto_->type() == AssetProto::FILE);
|
||||
proto_->mutable_file_chunks()->Clear();
|
||||
proto_->mutable_file_indirect_chunks()->Clear();
|
||||
proto_->set_file_size(0);
|
||||
}
|
||||
|
||||
void AssetBuilder::SetChunks(const RepeatedChunkRefProto& chunks,
|
||||
uint64_t file_size) {
|
||||
assert(proto_ != nullptr);
|
||||
assert(proto_->type() == AssetProto::FILE);
|
||||
proto_->mutable_file_chunks()->Clear();
|
||||
proto_->mutable_file_chunks()->CopyFrom(chunks);
|
||||
proto_->mutable_file_indirect_chunks()->Clear();
|
||||
proto_->set_file_size(file_size);
|
||||
}
|
||||
|
||||
void AssetBuilder::SwapChunks(RepeatedChunkRefProto* chunks,
|
||||
uint64_t file_size) {
|
||||
assert(proto_ != nullptr);
|
||||
assert(proto_->type() == AssetProto::FILE);
|
||||
proto_->mutable_file_chunks()->Swap(chunks);
|
||||
proto_->mutable_file_indirect_chunks()->Clear();
|
||||
proto_->set_file_size(file_size);
|
||||
}
|
||||
|
||||
void AssetBuilder::SetFileSize(uint64_t file_size) {
|
||||
assert(proto_ != nullptr);
|
||||
assert(proto_->type() == AssetProto::FILE);
|
||||
proto_->set_file_size(file_size);
|
||||
}
|
||||
|
||||
AssetBuilder AssetBuilder::AppendAsset(const std::string& name,
|
||||
AssetProto::Type type) {
|
||||
assert(proto_ != nullptr);
|
||||
assert(proto_->type() == AssetProto::DIRECTORY);
|
||||
AssetProto* child = proto_->add_dir_assets();
|
||||
child->set_type(type);
|
||||
child->set_name(name);
|
||||
return AssetBuilder(child, RelativeFilePath());
|
||||
}
|
||||
|
||||
bool AssetBuilder::InProgress() const {
|
||||
if (!proto_) return false;
|
||||
return proto_->in_progress();
|
||||
}
|
||||
|
||||
void AssetBuilder::SetInProgress(bool in_progress) {
|
||||
assert(proto_ != nullptr);
|
||||
proto_->set_in_progress(in_progress);
|
||||
}
|
||||
|
||||
void AssetBuilder::SetProto(AssetProto* proto, const std::string& rel_path) {
|
||||
Clear();
|
||||
proto_ = proto;
|
||||
absl::StrAppend(&rel_path_, path::ToUnix(rel_path));
|
||||
}
|
||||
|
||||
void AssetBuilder::Clear() {
|
||||
proto_ = nullptr;
|
||||
rel_path_.resize(0);
|
||||
}
|
||||
|
||||
AssetBuilder& AssetBuilder::operator=(const AssetBuilder& other) {
|
||||
proto_ = other.proto_;
|
||||
rel_path_ = other.rel_path_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
} // namespace cdc_ft
|
||||
151
manifest/asset_builder.h
Normal file
151
manifest/asset_builder.h
Normal file
@@ -0,0 +1,151 @@
|
||||
/*
|
||||
* Copyright 2022 Google LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MANIFEST_ASSET_BUILDER_H_
|
||||
#define MANIFEST_ASSET_BUILDER_H_
|
||||
|
||||
#include "manifest/manifest_proto_defs.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
class AssetBuilder {
|
||||
public:
|
||||
AssetBuilder();
|
||||
|
||||
// Creates a new AssetBuilder referencing the given |proto| and relative path
|
||||
// |rel_path|. Ownership of |proto| remains with the caller and must remain
|
||||
// valid while the AssetBuilder is being used.
|
||||
AssetBuilder(AssetProto* proto, const std::string& rel_path);
|
||||
~AssetBuilder();
|
||||
|
||||
// The assignment operator ignores the constant member |empty_|.
|
||||
AssetBuilder& operator=(const AssetBuilder& other);
|
||||
|
||||
// Returns the modification timestamp of this asset.
|
||||
uint64_t MtimeSeconds() const { return proto_ ? proto_->mtime_seconds() : 0; }
|
||||
|
||||
// Sets the modification timestamp of this asset to |mtime|.
|
||||
void SetMtimeSeconds(uint64_t mtime) {
|
||||
if (proto_) proto_->set_mtime_seconds(mtime);
|
||||
}
|
||||
|
||||
// Returns the permission bits of this asset (RWX for user, group, world, in
|
||||
// that order).
|
||||
uint32_t Permissions() const { return proto_ ? proto_->permissions() : 0; }
|
||||
|
||||
// Sets the permission bits of this asset to |perms|.
|
||||
void SetPermissions(uint32_t perms) {
|
||||
if (proto_) proto_->set_permissions(perms);
|
||||
}
|
||||
|
||||
// Returns the file name of this asset.
|
||||
const std::string& Name() const { return proto_ ? proto_->name() : empty_; }
|
||||
|
||||
// Returns the asset type.
|
||||
AssetProto::Type Type() const {
|
||||
return proto_ ? proto_->type() : AssetProto::UNKNOWN;
|
||||
}
|
||||
|
||||
// Returns the Unix path of the directory containing this asset relative to
|
||||
// the manifest root directory, as specified during construction or
|
||||
// SetProto().
|
||||
const std::string& RelativePath() const { return rel_path_; }
|
||||
|
||||
// Returns the path and file name of this asset relative to the manifest root
|
||||
// directory.
|
||||
std::string RelativeFilePath() const;
|
||||
|
||||
// Returns this asset's in_progress status.
|
||||
bool InProgress() const;
|
||||
|
||||
// Sets the asset's in_progress status.
|
||||
void SetInProgress(bool in_progress);
|
||||
|
||||
// For FILE assets, appends the chunk with the given |content_id| and |len| to
|
||||
// the list of chunks. The chunk's offset will be auto-determined based on the
|
||||
// current file size.
|
||||
//
|
||||
// Asserts that the asset is actually of type FILE and that the file does not
|
||||
// have any associated indirect chunk lists.
|
||||
void AppendChunk(const ContentIdProto& content_id, size_t len);
|
||||
|
||||
// For FILE assets, removes all chunks from this file and resets the file size
|
||||
// to zero.
|
||||
//
|
||||
// Asserts that the asset is actually of type FILE.
|
||||
void TruncateChunks();
|
||||
|
||||
// Sets this file's chunks from the ones given in the provided |chunks| list
|
||||
// and the total size to |file_size|. Copies the proto contents, clears all
|
||||
// indirect chunk lists.
|
||||
//
|
||||
// Asserts that the asset is actually of type FILE.
|
||||
void SetChunks(const RepeatedChunkRefProto& chunks, uint64_t file_size);
|
||||
|
||||
// Swaps this file's chunks with the ones given in the provided |chunks| list
|
||||
// and sets the total size to |file_size|. This avoids copying the data.
|
||||
// Clears all indirect chunk lists.
|
||||
//
|
||||
// Asserts that the asset is actually of type FILE.
|
||||
void SwapChunks(RepeatedChunkRefProto* chunks, uint64_t file_size);
|
||||
|
||||
// Sets this file's size.
|
||||
//
|
||||
// Asserts that the asset is actually of type FILE.
|
||||
void SetFileSize(uint64_t file_size);
|
||||
|
||||
// For DIRECTORY assets, adds a new direct asset to the end of the list. Does
|
||||
// *not* verify if an asset with that name already exists.
|
||||
//
|
||||
// Asserts that the asset is actually of type DIRECTORY.
|
||||
AssetBuilder AppendAsset(const std::string& name, AssetProto::Type type);
|
||||
|
||||
// Returns the symlink target for symlinks.
|
||||
const std::string& SymlinkTarget() const {
|
||||
return proto_ ? proto_->symlink_target() : empty_;
|
||||
}
|
||||
|
||||
// Sets the target for symlinks.
|
||||
void SetSymlinkTarget(const std::string& target) {
|
||||
if (proto_) proto_->set_symlink_target(target);
|
||||
}
|
||||
|
||||
// Returns a pointer to the proto that this AssetBuilder references.
|
||||
const AssetProto* Proto() const { return proto_; }
|
||||
AssetProto* Proto() { return proto_; }
|
||||
|
||||
// Sets the |proto| and relative path |rel_path| this AssetBuilder is
|
||||
// referring to. Ownership of |proto| remains with the caller and must remain
|
||||
// valid while the AssetBuilder is being used.
|
||||
void SetProto(AssetProto* proto, const std::string& rel_path);
|
||||
|
||||
private:
|
||||
// Resets this AssetBuilder.
|
||||
void Clear();
|
||||
|
||||
// Empty string to return as reference when no proto is set.
|
||||
const std::string empty_;
|
||||
|
||||
// The proto this AssetBuilder refers to.
|
||||
AssetProto* proto_ = nullptr;
|
||||
|
||||
// The path leading to this asset relative to the manfest root.
|
||||
std::string rel_path_;
|
||||
};
|
||||
|
||||
} // namespace cdc_ft
|
||||
|
||||
#endif // MANIFEST_ASSET_BUILDER_H_
|
||||
99
manifest/content_id.cc
Normal file
99
manifest/content_id.cc
Normal file
@@ -0,0 +1,99 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "manifest/content_id.h"
|
||||
|
||||
#include "blake3.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
namespace {
|
||||
|
||||
// Converts |n| in the range 0..15 to its lower-case hex representation.
|
||||
// Returns -1 if |n| is not in the range 0..15.
|
||||
char IntToHex(uint8_t n) {
|
||||
if (n <= 9) return '0' + n;
|
||||
if (n <= 15) return 'a' + n - 10;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Converts the lower-case hex character |c| to its integer representation.
|
||||
// Returns -1 if |c| is not a valid lower-case hex character.
|
||||
int HexToInt(char c) {
|
||||
if (c >= '0' && c <= '9') return c - '0';
|
||||
if (c >= 'a' && c <= 'f') return c - 'a' + 10;
|
||||
return -1;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// static
|
||||
ContentIdProto ContentId::FromDataString(const std::string& data) {
|
||||
return FromArray(data.c_str(), data.size());
|
||||
}
|
||||
|
||||
// static
|
||||
ContentIdProto ContentId::FromDataString(absl::string_view data) {
|
||||
return FromArray(data.data(), data.size());
|
||||
}
|
||||
|
||||
// static
|
||||
ContentIdProto ContentId::FromArray(const void* data, size_t len) {
|
||||
blake3_hasher state;
|
||||
uint8_t out[kHashSize];
|
||||
blake3_hasher_init(&state);
|
||||
blake3_hasher_update(&state, data, len);
|
||||
blake3_hasher_finalize(&state, out, kHashSize);
|
||||
ContentIdProto content_id;
|
||||
content_id.set_blake3_sum_160(out, kHashSize);
|
||||
return content_id;
|
||||
}
|
||||
|
||||
// static
|
||||
std::string ContentId::ToHexString(const ContentIdProto& content_id) {
|
||||
absl::string_view blake3_sum(content_id.blake3_sum_160());
|
||||
std::string ret;
|
||||
ret.reserve(blake3_sum.size() << 1);
|
||||
for (size_t i = 0; i < blake3_sum.size(); ++i) {
|
||||
ret.push_back(IntToHex(static_cast<uint8_t>(blake3_sum[i]) >> 4));
|
||||
ret.push_back(IntToHex(static_cast<uint8_t>(blake3_sum[i]) & 0xf));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// static
|
||||
bool ContentId::FromHexString(const std::string& str,
|
||||
ContentIdProto* content_id) {
|
||||
if (str.size() != kHashSize * 2) return false;
|
||||
|
||||
std::string* hash = content_id->mutable_blake3_sum_160();
|
||||
hash->clear();
|
||||
hash->reserve(kHashSize);
|
||||
for (int n = 0; n < str.size(); n += 2) {
|
||||
int high = HexToInt(str[n]);
|
||||
int low = HexToInt(str[n + 1]);
|
||||
if (high == -1 || low == -1) {
|
||||
hash->clear();
|
||||
return false;
|
||||
}
|
||||
hash->push_back((high << 4) + low);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// static
|
||||
uint8_t ContentId::GetByte(const ContentIdProto& content_id, size_t pos) {
|
||||
if (pos >= content_id.blake3_sum_160().size()) return 0;
|
||||
return content_id.blake3_sum_160()[pos];
|
||||
}
|
||||
} // namespace cdc_ft
|
||||
90
manifest/content_id.h
Normal file
90
manifest/content_id.h
Normal file
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Copyright 2022 Google LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MANIFEST_CONTENT_ID_H_
|
||||
#define MANIFEST_CONTENT_ID_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "manifest/manifest_proto_defs.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
// This helper class provides some utility functions to work with ContentIdProto
|
||||
// messages.
|
||||
class ContentId {
|
||||
public:
|
||||
// Hashes are 160 bit long.
|
||||
static constexpr size_t kHashSize = 20;
|
||||
|
||||
// Returns content ID for the |data| passed in as a string.
|
||||
static ContentIdProto FromDataString(const std::string& data);
|
||||
|
||||
// Returns the content ID for the |data| passed in as a string_view.
|
||||
static ContentIdProto FromDataString(absl::string_view data);
|
||||
|
||||
// Returns the content ID for the |data| passed in as a pointer.
|
||||
static ContentIdProto FromArray(const void* data, size_t len);
|
||||
|
||||
// Converts the given content ID into a hex string. The string will consist of
|
||||
// the hex digits of the hash ('0'...'9', 'a'...'f'), so a 160 bit hash
|
||||
// results in a string of length kHashSize * 2.
|
||||
static std::string ToHexString(const ContentIdProto& content_id);
|
||||
|
||||
// Converts the given hex string into a content ID. The string is assumed to
|
||||
// consist of the hex digits of the hash ('0'...'9', 'a'...'f'), so a 160 bit
|
||||
// hash would have length kHashSize * 2. Returns false if |str| is malformed.
|
||||
static bool FromHexString(const std::string& str, ContentIdProto* content_id);
|
||||
|
||||
// Returns the |pos| byte of |content_id|.
|
||||
// Returns 0 if |content_id| is not set or |pos| is invalid.
|
||||
static uint8_t GetByte(const ContentIdProto& content_id, size_t pos);
|
||||
};
|
||||
|
||||
namespace proto {
|
||||
|
||||
inline bool operator==(const ContentId& a, const ContentId& b) {
|
||||
return a.blake3_sum_160() == b.blake3_sum_160();
|
||||
}
|
||||
|
||||
inline bool operator!=(const ContentId& a, const ContentId& b) {
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
inline bool operator<(const ContentId& a, const ContentId& b) {
|
||||
return a.blake3_sum_160() < b.blake3_sum_160();
|
||||
}
|
||||
|
||||
} // namespace proto
|
||||
} // namespace cdc_ft
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<cdc_ft::ContentIdProto> {
|
||||
size_t operator()(const cdc_ft::ContentIdProto& id) const {
|
||||
// Pick the first 8 bytes of the hash (assuming 64 bit binary).
|
||||
if (id.blake3_sum_160().size() < sizeof(size_t)) {
|
||||
return 0;
|
||||
}
|
||||
return *reinterpret_cast<const size_t*>(id.blake3_sum_160().data());
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
#endif // MANIFEST_CONTENT_ID_H_
|
||||
79
manifest/content_id_test.cc
Normal file
79
manifest/content_id_test.cc
Normal file
@@ -0,0 +1,79 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "manifest/content_id.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
namespace {
|
||||
|
||||
using StringList = std::vector<absl::string_view>;
|
||||
|
||||
static constexpr char kData[] = "Hey Google, tell me a joke.";
|
||||
static constexpr size_t kHashSize = 20;
|
||||
static constexpr char kHash[kHashSize + 1] =
|
||||
"\x12\xe8\x41\x41\x39\x93\x13\x82\x34\xd0\xfe\xcb\x4e\xcf\x6a\x4c\xfd\x74"
|
||||
"\x55\x27";
|
||||
static constexpr char kHashHex[] = "12e841413993138234d0fecb4ecf6a4cfd745527";
|
||||
|
||||
TEST(ContentIdTest, StringToContentId) {
|
||||
ContentIdProto content_id = ContentId::FromDataString(std::string(kData));
|
||||
EXPECT_EQ(content_id.blake3_sum_160().size(), kHashSize);
|
||||
EXPECT_EQ(content_id.blake3_sum_160(), absl::string_view(kHash, kHashSize));
|
||||
}
|
||||
|
||||
TEST(ContentIdTest, StringViewToContentId) {
|
||||
ContentIdProto content_id =
|
||||
ContentId::FromDataString(absl::string_view(kData));
|
||||
EXPECT_EQ(content_id.blake3_sum_160().size(), kHashSize);
|
||||
EXPECT_EQ(content_id.blake3_sum_160(), absl::string_view(kHash, kHashSize));
|
||||
}
|
||||
|
||||
TEST(ContentIdTest, PtrToContentId) {
|
||||
absl::string_view data(kData);
|
||||
ContentIdProto content_id = ContentId::FromArray(data.data(), data.size());
|
||||
EXPECT_EQ(content_id.blake3_sum_160().size(), kHashSize);
|
||||
EXPECT_EQ(content_id.blake3_sum_160(), absl::string_view(kHash, kHashSize));
|
||||
}
|
||||
|
||||
TEST(ContentIdTest, ToHexString) {
|
||||
ContentIdProto content_id =
|
||||
ContentId::FromDataString(absl::string_view(kData));
|
||||
std::string hash_str = ContentId::ToHexString(content_id);
|
||||
EXPECT_EQ(hash_str.size(), 2 * kHashSize);
|
||||
EXPECT_EQ(hash_str, kHashHex);
|
||||
}
|
||||
|
||||
TEST(ContentIdTest, FromHexString) {
|
||||
ContentIdProto content_id;
|
||||
EXPECT_TRUE(ContentId::FromHexString(kHashHex, &content_id));
|
||||
EXPECT_EQ(content_id.blake3_sum_160(), kHash);
|
||||
}
|
||||
|
||||
TEST(ContentIdTest, GetByte) {
|
||||
ContentIdProto content_id;
|
||||
EXPECT_EQ(ContentId::GetByte(content_id, 0), 0);
|
||||
EXPECT_EQ(ContentId::GetByte(content_id, 1000), 0);
|
||||
|
||||
EXPECT_TRUE(ContentId::FromHexString(kHashHex, &content_id));
|
||||
EXPECT_EQ(ContentId::GetByte(content_id, 0), static_cast<uint8_t>(kHash[0]));
|
||||
EXPECT_EQ(ContentId::GetByte(content_id, 1), static_cast<uint8_t>(kHash[1]));
|
||||
EXPECT_EQ(ContentId::GetByte(content_id, 20), 0);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace cdc_ft
|
||||
182
manifest/fake_manifest_builder.cc
Normal file
182
manifest/fake_manifest_builder.cc
Normal file
@@ -0,0 +1,182 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "manifest/fake_manifest_builder.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "common/path.h"
|
||||
#include "data_store/mem_data_store.h"
|
||||
#include "fastcdc/fastcdc.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
namespace {
|
||||
|
||||
constexpr size_t kAvgChunkSize = 1024 * 256;
|
||||
constexpr size_t kMinChunkSize = kAvgChunkSize / 2;
|
||||
constexpr size_t kMaxChunkSize = kAvgChunkSize * 4;
|
||||
|
||||
// Builds a data blob for faking a large file that contains
|
||||
// <line number> - <60 random letters>
|
||||
std::vector<char> BuildLargeFileData(int num_lines) {
|
||||
std::vector<char> data;
|
||||
char filler[60] = {0};
|
||||
for (int n = 0; n < num_lines; ++n) {
|
||||
for (size_t k = 0; k < sizeof(filler); ++k) {
|
||||
filler[k] = (rand() % 26) + 'a';
|
||||
}
|
||||
std::string n_str = std::to_string(n);
|
||||
data.insert(data.end(), n_str.c_str(), n_str.c_str() + n_str.size());
|
||||
data.push_back('-');
|
||||
data.insert(data.end(), filler, filler + sizeof(filler));
|
||||
data.push_back('\n');
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
void UpdateFileContent(AssetProto* asset, MemDataStore* const store,
|
||||
const std::vector<char>& data) {
|
||||
uint64_t offset = 0;
|
||||
auto chunk_handler = [asset, store, &offset](const void* data, size_t size) {
|
||||
const char* char_data = reinterpret_cast<const char*>(data);
|
||||
std::vector<char> data_vec;
|
||||
data_vec.insert(data_vec.end(), char_data, char_data + size);
|
||||
ChunkRefProto* chunk_ref = asset->add_file_chunks();
|
||||
*chunk_ref->mutable_chunk_id() = store->AddData(data_vec);
|
||||
chunk_ref->set_offset(offset);
|
||||
offset += size;
|
||||
};
|
||||
|
||||
fastcdc::Config config(kMinChunkSize, kAvgChunkSize, kMaxChunkSize);
|
||||
fastcdc::Chunker chunker(config, chunk_handler);
|
||||
|
||||
chunker.Process(reinterpret_cast<const uint8_t*>(data.data()), data.size());
|
||||
chunker.Finalize();
|
||||
}
|
||||
|
||||
AssetProto* FindAsset(AssetProto* dir_asset, const char* name) {
|
||||
assert(dir_asset);
|
||||
assert(dir_asset->type() == AssetProto::DIRECTORY);
|
||||
for (AssetProto& asset : *dir_asset->mutable_dir_assets()) {
|
||||
if (asset.name() == name) {
|
||||
return &asset;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
FakeManifestBuilder::FakeManifestBuilder(MemDataStore* store) : store_(store) {
|
||||
manifest_.mutable_root_dir()->set_type(AssetProto::DIRECTORY);
|
||||
manifest_.mutable_root_dir()->set_permissions(kRootDirPerms);
|
||||
}
|
||||
|
||||
FakeManifestBuilder::~FakeManifestBuilder() = default;
|
||||
|
||||
void FakeManifestBuilder::AddFile(AssetProto* dir_asset, const char* name,
|
||||
int64_t mtime_sec, uint32_t permissions,
|
||||
const std::vector<char>& data) {
|
||||
assert(dir_asset);
|
||||
AssetProto* asset = dir_asset->add_dir_assets();
|
||||
asset->set_name(name);
|
||||
asset->set_type(AssetProto::FILE);
|
||||
asset->set_file_size(data.size());
|
||||
asset->set_mtime_seconds(mtime_sec);
|
||||
asset->set_permissions(permissions);
|
||||
|
||||
UpdateFileContent(asset, store_, data);
|
||||
}
|
||||
|
||||
AssetProto* FakeManifestBuilder::AddDirectory(AssetProto* dir_asset,
|
||||
const char* name,
|
||||
int64_t mtime_sec,
|
||||
uint32_t permissions) {
|
||||
assert(dir_asset);
|
||||
AssetProto* asset = dir_asset->add_dir_assets();
|
||||
asset->set_name(name);
|
||||
asset->set_type(AssetProto::DIRECTORY);
|
||||
asset->set_mtime_seconds(mtime_sec);
|
||||
asset->set_permissions(permissions);
|
||||
return asset;
|
||||
}
|
||||
|
||||
ContentIdProto FakeManifestBuilder::BuildTestData() {
|
||||
const uint32_t kFileMode =
|
||||
path::MODE_IRUSR | path::MODE_IWUSR | path::MODE_IRGRP | path::MODE_IROTH;
|
||||
const uint32_t kDirMode = path::MODE_IRGRP | path::MODE_IXGRP |
|
||||
path::MODE_IROTH | path::MODE_IXOTH |
|
||||
path::MODE_IRWXU;
|
||||
const int64_t kModTime = 1614843754;
|
||||
|
||||
// root
|
||||
// |- file1.txt
|
||||
// |- fio_test
|
||||
// |- large_file1.txt
|
||||
// |- ...
|
||||
// |- large_file9.txt
|
||||
// |- a
|
||||
// |- file2.txt
|
||||
// |- b
|
||||
// |- file3.txt
|
||||
|
||||
AssetProto* fio_test_dir =
|
||||
AddDirectory(Root(), "fio_test", kModTime, kDirMode);
|
||||
|
||||
// 500k lines generate a ~33 MB file.
|
||||
std::vector<char> data = BuildLargeFileData(500000);
|
||||
for (int n = 1; n < 9; ++n) {
|
||||
std::string filename = absl::StrFormat("large_file%i.txt", n);
|
||||
AddFile(fio_test_dir, filename.c_str(), kModTime, kFileMode, data);
|
||||
}
|
||||
|
||||
AddFile(Root(), "file1.txt", kModTime, kFileMode, {'1', '3', '3', '7', '\n'});
|
||||
|
||||
AssetProto* a_dir = AddDirectory(Root(), "a", kModTime, kDirMode);
|
||||
|
||||
AddFile(a_dir, "file2.txt", kModTime, kFileMode,
|
||||
{'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd', '!', '\n'});
|
||||
|
||||
AssetProto* b_dir = AddDirectory(a_dir, "b", kModTime, kDirMode);
|
||||
|
||||
AddFile(
|
||||
b_dir, "file3.txt", kModTime, kFileMode,
|
||||
{0127, 0105, 0122, 0040, 0104, 0101, 0123, 0040, 0114, 0111, 0105, 0123,
|
||||
0124, 0040, 0111, 0123, 0124, 0040, 0104, 0117, 0117, 0106, 0012});
|
||||
|
||||
return store_->AddProto(manifest_);
|
||||
}
|
||||
|
||||
const ManifestProto* FakeManifestBuilder::Manifest() const {
|
||||
return &manifest_;
|
||||
}
|
||||
|
||||
AssetProto* FakeManifestBuilder::Root() { return manifest_.mutable_root_dir(); }
|
||||
|
||||
void FakeManifestBuilder::ModifyFile(AssetProto* dir_asset, const char* name,
|
||||
int64_t mtime_sec, uint32_t permissions,
|
||||
const std::vector<char>& data) {
|
||||
assert(dir_asset);
|
||||
AssetProto* asset = FindAsset(dir_asset, name);
|
||||
assert(asset && asset->type() == AssetProto::FILE);
|
||||
asset->set_file_size(data.size());
|
||||
asset->set_mtime_seconds(mtime_sec);
|
||||
asset->set_permissions(permissions);
|
||||
asset->clear_file_chunks();
|
||||
asset->clear_file_indirect_chunks();
|
||||
|
||||
UpdateFileContent(asset, store_, data);
|
||||
}
|
||||
} // namespace cdc_ft
|
||||
73
manifest/fake_manifest_builder.h
Normal file
73
manifest/fake_manifest_builder.h
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright 2022 Google LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MANIFEST_FAKE_MANIFEST_BUILDER_H_
|
||||
#define MANIFEST_FAKE_MANIFEST_BUILDER_H_
|
||||
|
||||
#include "manifest/manifest_proto_defs.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
class MemDataStore;
|
||||
|
||||
// In-memory manifest builder. Useful for testing.
|
||||
class FakeManifestBuilder {
|
||||
public:
|
||||
// Permissions assigned to the root directory.
|
||||
static constexpr uint32_t kRootDirPerms = 0755u;
|
||||
|
||||
explicit FakeManifestBuilder(MemDataStore* store);
|
||||
~FakeManifestBuilder();
|
||||
|
||||
// Adds a new file of given the |name| to the directory |dir_asset| and sets
|
||||
// the modified time to |mtime_sec| and permissions to |permissions|. Also
|
||||
// generates data chunks from |data| using fastcdc (hardcoded chunk sizes).
|
||||
// Use builder.AddFile(builder.Root(), ...) to add a file to the root
|
||||
// directory.
|
||||
void AddFile(AssetProto* dir_asset, const char* name, int64_t mtime_sec,
|
||||
uint32_t permissions, const std::vector<char>& data);
|
||||
|
||||
// Adds a new directory of the given |name| to the directory |dir_asset| and
|
||||
// sets the modified time to |mtime_sec| and permissions to |permissions|.
|
||||
// Returns a pointer to the new directory that can be used to further add
|
||||
// files or subdirectories.
|
||||
// Use builder.AddDirectory(builder.Root(), ...) to add a directory to the
|
||||
// root directory.
|
||||
AssetProto* AddDirectory(AssetProto* dir_asset, const char* name,
|
||||
int64_t mtime_sec, uint32_t permissions);
|
||||
|
||||
// Builds a fake directory structure with files and subdirectories suitable
|
||||
// for prototyping/testing.
|
||||
ContentIdProto BuildTestData();
|
||||
|
||||
// Returns the built manifest.
|
||||
const ManifestProto* Manifest() const;
|
||||
|
||||
// Shortcut to &Manifest()->root_dir().
|
||||
AssetProto* Root();
|
||||
|
||||
// Updates the file |name| with new |permissions|, |mtime_sec|, and |data|.
|
||||
void ModifyFile(AssetProto* dir_asset, const char* name, int64_t mtime_sec,
|
||||
uint32_t permissions, const std::vector<char>& data);
|
||||
|
||||
private:
|
||||
MemDataStore* const store_;
|
||||
ManifestProto manifest_;
|
||||
};
|
||||
|
||||
} // namespace cdc_ft
|
||||
|
||||
#endif // MANIFEST_FAKE_MANIFEST_BUILDER_H_
|
||||
120
manifest/fake_manifest_builder_test.cc
Normal file
120
manifest/fake_manifest_builder_test.cc
Normal file
@@ -0,0 +1,120 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "manifest/fake_manifest_builder.h"
|
||||
|
||||
#include "common/status_test_macros.h"
|
||||
#include "data_store/mem_data_store.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
namespace {
|
||||
|
||||
TEST(FakeManifestBuilderTest, RootDir) {
|
||||
MemDataStore store;
|
||||
FakeManifestBuilder builder(&store);
|
||||
|
||||
const AssetProto& root = builder.Manifest()->root_dir();
|
||||
EXPECT_EQ(&root, builder.Root());
|
||||
EXPECT_EQ(root.type(), AssetProto::DIRECTORY);
|
||||
EXPECT_TRUE(root.name().empty());
|
||||
ASSERT_EQ(root.dir_assets().size(), 0);
|
||||
}
|
||||
|
||||
TEST(FakeManifestBuilderTest, AddFile) {
|
||||
MemDataStore store;
|
||||
FakeManifestBuilder builder(&store);
|
||||
|
||||
std::vector<char> expected_data = {1, 3, 3, 7};
|
||||
builder.AddFile(builder.Root(), "file", 12345, 0750, expected_data);
|
||||
const AssetProto& root = builder.Manifest()->root_dir();
|
||||
|
||||
ASSERT_EQ(root.dir_assets().size(), 1);
|
||||
const AssetProto& file = root.dir_assets(0);
|
||||
EXPECT_EQ(file.name(), "file");
|
||||
EXPECT_EQ(file.type(), AssetProto::FILE);
|
||||
EXPECT_EQ(file.mtime_seconds(), 12345);
|
||||
EXPECT_EQ(file.permissions(), 0750);
|
||||
|
||||
ASSERT_EQ(file.file_chunks_size(), 1);
|
||||
const ChunkRefProto& chunk = file.file_chunks(0);
|
||||
EXPECT_EQ(chunk.offset(), 0);
|
||||
|
||||
// Try to read a byte more to see if it's properly clamped.
|
||||
std::vector<char> data;
|
||||
data.resize(expected_data.size() + 1);
|
||||
absl::StatusOr<uint64_t> bytes_read =
|
||||
store.Get(chunk.chunk_id(), data.data(), 0, data.size());
|
||||
|
||||
ASSERT_OK(bytes_read);
|
||||
EXPECT_EQ(*bytes_read, expected_data.size());
|
||||
data.resize(expected_data.size());
|
||||
}
|
||||
|
||||
TEST(FakeManifestBuilderTest, AddDirectory) {
|
||||
MemDataStore store;
|
||||
FakeManifestBuilder builder(&store);
|
||||
|
||||
AssetProto* dir = builder.AddDirectory(builder.Root(), "dir", 12345, 0750);
|
||||
builder.AddFile(dir, "file", 23456, 0321, {});
|
||||
const AssetProto& root = builder.Manifest()->root_dir();
|
||||
|
||||
ASSERT_EQ(root.dir_assets().size(), 1);
|
||||
|
||||
EXPECT_EQ(&root.dir_assets(0), dir);
|
||||
EXPECT_EQ(dir->name(), "dir");
|
||||
EXPECT_EQ(dir->type(), AssetProto::DIRECTORY);
|
||||
EXPECT_EQ(dir->mtime_seconds(), 12345);
|
||||
EXPECT_EQ(dir->permissions(), 0750);
|
||||
|
||||
ASSERT_EQ(dir->dir_assets_size(), 1);
|
||||
const AssetProto& file = dir->dir_assets(0);
|
||||
EXPECT_EQ(file.name(), "file");
|
||||
}
|
||||
|
||||
TEST(FakeManifestBuilderTest, ModifyFile) {
|
||||
MemDataStore store;
|
||||
FakeManifestBuilder builder(&store);
|
||||
|
||||
std::vector<char> expected_data = {1, 3, 3, 7, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1};
|
||||
builder.AddFile(builder.Root(), "file", 12345, 0750, expected_data);
|
||||
expected_data = {2, 4, 4, 3};
|
||||
builder.ModifyFile(builder.Root(), "file", 14843, 0666, expected_data);
|
||||
|
||||
const AssetProto& root = builder.Manifest()->root_dir();
|
||||
|
||||
ASSERT_EQ(root.dir_assets().size(), 1);
|
||||
const AssetProto& file = root.dir_assets(0);
|
||||
EXPECT_EQ(file.name(), "file");
|
||||
EXPECT_EQ(file.type(), AssetProto::FILE);
|
||||
EXPECT_EQ(file.mtime_seconds(), 14843);
|
||||
EXPECT_EQ(file.permissions(), 0666);
|
||||
|
||||
ASSERT_EQ(file.file_chunks_size(), 1);
|
||||
const ChunkRefProto& chunk = file.file_chunks(0);
|
||||
EXPECT_EQ(chunk.offset(), 0);
|
||||
|
||||
// Try to read a byte more to see if it's properly clamped.
|
||||
std::vector<char> data;
|
||||
data.resize(expected_data.size() + 1);
|
||||
absl::StatusOr<uint64_t> bytes_read =
|
||||
store.Get(chunk.chunk_id(), data.data(), 0, data.size());
|
||||
|
||||
ASSERT_OK(bytes_read);
|
||||
EXPECT_EQ(*bytes_read, expected_data.size());
|
||||
data.resize(expected_data.size());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace cdc_ft
|
||||
253
manifest/file_chunk_map.cc
Normal file
253
manifest/file_chunk_map.cc
Normal file
@@ -0,0 +1,253 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "manifest/file_chunk_map.h"
|
||||
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "manifest/stats_printer.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
FileChunkMap::FileChunkMap(bool enable_stats) {
|
||||
if (enable_stats) stats_ = std::make_unique<StatsPrinter>();
|
||||
}
|
||||
|
||||
FileChunkMap::~FileChunkMap() = default;
|
||||
|
||||
void FileChunkMap::Init(std::string path, uint64_t file_size,
|
||||
std::vector<FileChunk>* chunks) {
|
||||
FileUpdate update(FileUpdateType::kInit, std::move(path));
|
||||
update.file_size = file_size;
|
||||
if (chunks) update.chunks = std::move(*chunks);
|
||||
file_updates_.push_back(std::move(update));
|
||||
}
|
||||
|
||||
void FileChunkMap::AppendCopy(std::string path,
|
||||
const RepeatedChunkRefProto& list,
|
||||
uint64_t list_offset) {
|
||||
FileUpdate update(FileUpdateType::kAppend, std::move(path));
|
||||
update.chunks.reserve(list.size());
|
||||
for (const ChunkRefProto& ch : list)
|
||||
update.chunks.emplace_back(ch.chunk_id(), ch.offset() + list_offset);
|
||||
file_updates_.push_back(std::move(update));
|
||||
}
|
||||
|
||||
void FileChunkMap::AppendMove(std::string path, RepeatedChunkRefProto* list,
|
||||
uint64_t list_offset) {
|
||||
FileUpdate update(FileUpdateType::kAppend, std::move(path));
|
||||
update.chunks.reserve(list->size());
|
||||
for (ChunkRefProto& ch : *list) {
|
||||
update.chunks.emplace_back(std::move(*ch.mutable_chunk_id()),
|
||||
ch.offset() + list_offset);
|
||||
}
|
||||
file_updates_.push_back(std::move(update));
|
||||
}
|
||||
|
||||
void FileChunkMap::Remove(std::string path) {
|
||||
FileUpdate update(FileUpdateType::kRemove, std::move(path));
|
||||
file_updates_.push_back(std::move(update));
|
||||
}
|
||||
|
||||
void FileChunkMap::Clear() {
|
||||
FileUpdate update(FileUpdateType::kClear, std::string());
|
||||
file_updates_.push_back(std::move(update));
|
||||
}
|
||||
|
||||
void FileChunkMap::FlushUpdates() {
|
||||
if (file_updates_.empty()) return;
|
||||
|
||||
absl::MutexLock lock(&mutex_);
|
||||
|
||||
for (FileUpdate& update : file_updates_) {
|
||||
switch (update.type) {
|
||||
case FileUpdateType::kInit: {
|
||||
File& file = path_to_file_[update.path];
|
||||
file.size = update.file_size;
|
||||
assert(total_chunks_ >= file.chunks.size());
|
||||
total_chunks_ -= file.chunks.size();
|
||||
total_chunks_ += update.chunks.size();
|
||||
file.chunks = std::move(update.chunks);
|
||||
break;
|
||||
}
|
||||
|
||||
case FileUpdateType::kAppend: {
|
||||
File& file = path_to_file_[update.path];
|
||||
total_chunks_ += update.chunks.size();
|
||||
if (file.chunks.empty()) {
|
||||
file.chunks = std::move(update.chunks);
|
||||
} else {
|
||||
file.chunks.reserve(file.chunks.size() + update.chunks.size());
|
||||
std::move(std::begin(update.chunks), std::end(update.chunks),
|
||||
std::back_inserter(file.chunks));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case FileUpdateType::kRemove: {
|
||||
const auto iter = path_to_file_.find(update.path);
|
||||
if (iter == path_to_file_.end()) break;
|
||||
assert(total_chunks_ >= iter->second.chunks.size());
|
||||
total_chunks_ -= iter->second.chunks.size();
|
||||
path_to_file_.erase(iter);
|
||||
break;
|
||||
}
|
||||
|
||||
case FileUpdateType::kClear: {
|
||||
path_to_file_.clear();
|
||||
total_chunks_ = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
file_updates_.clear();
|
||||
|
||||
UpdateIdToChunkMap();
|
||||
}
|
||||
|
||||
bool FileChunkMap::Lookup(const ContentIdProto& content_id, std::string* path,
|
||||
uint64_t* offset, uint32_t* size) {
|
||||
assert(path && offset && size);
|
||||
|
||||
absl::MutexLock lock(&mutex_);
|
||||
|
||||
return FindChunk(content_id, path, offset, size, nullptr);
|
||||
}
|
||||
|
||||
void FileChunkMap::RecordStreamedChunk(const ContentIdProto& content_id,
|
||||
size_t thread_id) {
|
||||
absl::MutexLock lock(&mutex_);
|
||||
|
||||
if (!stats_) return;
|
||||
|
||||
if (streamed_chunks_to_thread_.find(content_id) !=
|
||||
streamed_chunks_to_thread_.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::string path;
|
||||
uint32_t size;
|
||||
size_t index;
|
||||
if (FindChunk(content_id, &path, nullptr, &size, &index))
|
||||
stats_->RecordStreamedChunk(path, index, size, thread_id);
|
||||
streamed_chunks_to_thread_[content_id] = thread_id;
|
||||
}
|
||||
|
||||
void FileChunkMap::RecordCachedChunk(const ContentIdProto& content_id) {
|
||||
absl::MutexLock lock(&mutex_);
|
||||
|
||||
if (!stats_) return;
|
||||
|
||||
if (cached_chunks_.find(content_id) != cached_chunks_.end()) return;
|
||||
|
||||
// Restarting FUSE might report cached chunks that have been originally
|
||||
// streamed. Ignore those.
|
||||
if (streamed_chunks_to_thread_.find(content_id) !=
|
||||
streamed_chunks_to_thread_.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::string path;
|
||||
uint32_t size;
|
||||
size_t index;
|
||||
if (FindChunk(content_id, &path, nullptr, &size, &index))
|
||||
stats_->RecordCachedChunk(path, index, size);
|
||||
cached_chunks_.insert(content_id);
|
||||
}
|
||||
|
||||
void FileChunkMap::PrintStats() {
|
||||
absl::MutexLock lock(&mutex_);
|
||||
|
||||
if (!stats_) return;
|
||||
|
||||
stats_->Print();
|
||||
}
|
||||
|
||||
bool FileChunkMap::HasStats() const {
|
||||
absl::ReaderMutexLock lock(&mutex_);
|
||||
return stats_ != nullptr;
|
||||
}
|
||||
|
||||
void FileChunkMap::UpdateIdToChunkMap() {
|
||||
assert((mutex_.AssertHeld(), true));
|
||||
|
||||
// Put all chunks into the map.
|
||||
id_to_chunk_.clear();
|
||||
id_to_chunk_.reserve(total_chunks_);
|
||||
for (const auto& [path, file] : path_to_file_) {
|
||||
for (uint32_t n = 0; n < static_cast<uint32_t>(file.chunks.size()); ++n)
|
||||
id_to_chunk_[ContentIdRef(file.chunks[n].content_id)] = {&path, n};
|
||||
}
|
||||
|
||||
// Might be "<" if multiple files contain the same chunk.
|
||||
assert(id_to_chunk_.size() <= total_chunks_);
|
||||
|
||||
// Rebuild stats if present.
|
||||
if (stats_) {
|
||||
stats_->Clear();
|
||||
for (const auto& [path, file] : path_to_file_)
|
||||
stats_->InitFile(path, file.chunks.size());
|
||||
|
||||
// Fill in the streamed chunks.
|
||||
std::string path;
|
||||
uint32_t size;
|
||||
size_t index;
|
||||
for (const auto& [id, thread_id] : streamed_chunks_to_thread_) {
|
||||
if (FindChunk(id, &path, nullptr, &size, &index))
|
||||
stats_->RecordStreamedChunk(path, index, size, thread_id);
|
||||
}
|
||||
|
||||
// Fill in the cached chunks.
|
||||
for (const ContentIdProto& id : cached_chunks_) {
|
||||
if (FindChunk(id, &path, nullptr, &size, &index))
|
||||
stats_->RecordCachedChunk(path, index, size);
|
||||
}
|
||||
|
||||
// Make sure the above RecordStreamedChunk() calls don't count towards
|
||||
// bandwidth stats.
|
||||
stats_->ResetBandwidthStats();
|
||||
}
|
||||
}
|
||||
|
||||
bool FileChunkMap::FindChunk(const ContentIdProto& content_id,
|
||||
std::string* path, uint64_t* offset,
|
||||
uint32_t* size, size_t* index) {
|
||||
assert((mutex_.AssertHeld(), true));
|
||||
|
||||
// Find the |id_to_chunk_| entry by |content_id|. It might not exist if
|
||||
// changes to the manifest have not propagated to gamelets yet.
|
||||
IdToChunkMap::iterator i2c_iter = id_to_chunk_.find(ContentIdRef(content_id));
|
||||
if (i2c_iter == id_to_chunk_.end()) return false;
|
||||
|
||||
// Find the chunk location by path. This lookup should not fail because
|
||||
// |path_to_file_| and |id_to_chunk_| should always be in sync here.
|
||||
const ChunkLocation& loc = i2c_iter->second;
|
||||
PathToFileMap::iterator p2f_iter = path_to_file_.find(*loc.path);
|
||||
assert(p2f_iter != path_to_file_.end());
|
||||
|
||||
// Compute path, chunk offset and chunk size.
|
||||
const File& file = p2f_iter->second;
|
||||
assert(loc.index < file.chunks.size());
|
||||
uint64_t this_offset = file.chunks[loc.index].offset;
|
||||
uint64_t next_offset = loc.index + 1 == file.chunks.size()
|
||||
? file.size
|
||||
: file.chunks[loc.index + 1].offset;
|
||||
if (path) *path = *loc.path;
|
||||
if (offset) *offset = this_offset;
|
||||
if (size) *size = static_cast<uint32_t>(next_offset - this_offset);
|
||||
if (index) *index = loc.index;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace cdc_ft
|
||||
206
manifest/file_chunk_map.h
Normal file
206
manifest/file_chunk_map.h
Normal file
@@ -0,0 +1,206 @@
|
||||
/*
|
||||
* Copyright 2022 Google LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MANIFEST_FILE_CHUNK_MAP_H_
|
||||
#define MANIFEST_FILE_CHUNK_MAP_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/container/flat_hash_map.h"
|
||||
#include "absl/container/flat_hash_set.h"
|
||||
#include "absl/status/status.h"
|
||||
#include "manifest/content_id.h"
|
||||
#include "manifest/manifest_proto_defs.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
class StatsPrinter;
|
||||
|
||||
// A file chunk, used by the FileChunkMap.
|
||||
struct FileChunk {
|
||||
// Id of the chunk.
|
||||
ContentIdProto content_id;
|
||||
|
||||
// Absolute offset of the chunk in the file.
|
||||
uint64_t offset = 0;
|
||||
|
||||
FileChunk(ContentIdProto content_id, uint64_t offset)
|
||||
: content_id(std::move(content_id)), offset(offset) {}
|
||||
};
|
||||
|
||||
// Manages chunk lookups by content id. The class can be populated by passing it
|
||||
// to ManifestUpdater and then used to look up chunks by calling Lookup().
|
||||
class FileChunkMap {
|
||||
public:
|
||||
// If |enable_stats| is true, keeps detailed statistics on chunk access
|
||||
// patterns.
|
||||
explicit FileChunkMap(bool enable_stats);
|
||||
~FileChunkMap();
|
||||
|
||||
FileChunkMap(FileChunkMap&) = delete;
|
||||
FileChunkMap& operator=(FileChunkMap&) = delete;
|
||||
|
||||
// Initializes a new entry for |path| or clears the existing one and sets the
|
||||
// |file_size|. If |chunks| is not null, moves the contents of |chunks| to
|
||||
// this file's chunk list.
|
||||
void Init(std::string path, uint64_t file_size,
|
||||
std::vector<FileChunk>* chunks = nullptr);
|
||||
|
||||
// Appends the chunks in |list| to the entry for |path|. |list_offset| is
|
||||
// added to all chunk offsets in |list|. Copies ContentIdProtos from the list.
|
||||
// The operation is queued and gets applied by calling FlushUpdates().
|
||||
void AppendCopy(std::string path, const RepeatedChunkRefProto& list,
|
||||
uint64_t list_offset);
|
||||
|
||||
// Same as above, but modifies |list| by moving ContentIdProtos off the list.
|
||||
// The operation is queued and gets applied by calling FlushUpdates().
|
||||
void AppendMove(std::string path, RepeatedChunkRefProto* list,
|
||||
uint64_t list_offset);
|
||||
|
||||
// Removes the entry for |path|.
|
||||
// The operation is queued and gets applied by calling FlushUpdates().
|
||||
void Remove(std::string path);
|
||||
|
||||
// Clears all entries.
|
||||
// The operation is queued and gets applied by calling FlushUpdates().
|
||||
void Clear();
|
||||
|
||||
// Flushes all updates made by the above functions.
|
||||
void FlushUpdates() ABSL_LOCKS_EXCLUDED(mutex_);
|
||||
|
||||
// Looks up the file |path|, the chunk |offset| and chunk |size| by the given
|
||||
// |content_id|. Returns false if the entry does not exist.
|
||||
bool Lookup(const ContentIdProto& content_id, std::string* path,
|
||||
uint64_t* offset, uint32_t* size) ABSL_LOCKS_EXCLUDED(mutex_);
|
||||
|
||||
// Records that a chunk with the given |content_id| was streamed from the
|
||||
// workstation.
|
||||
// |thread_id| is the id of the thread that requested the chunk on the
|
||||
// gamelet, usually the hash of the std::thread::id.
|
||||
// No-op if |enable_stats| was false in the constructor.
|
||||
void RecordStreamedChunk(const ContentIdProto& content_id, size_t thread_id)
|
||||
ABSL_LOCKS_EXCLUDED(mutex_);
|
||||
|
||||
// Records that a chunk with the given |content_id| is cached on the gamelet.
|
||||
// No-op if |enable_stats| was false in the constructor.
|
||||
void RecordCachedChunk(const ContentIdProto& content_id)
|
||||
ABSL_LOCKS_EXCLUDED(mutex_);
|
||||
|
||||
// Prints detailed chunk statistics.
|
||||
// No-op if |enable_stats| was false in the constructor.
|
||||
void PrintStats() ABSL_LOCKS_EXCLUDED(mutex_);
|
||||
|
||||
bool HasStats() const;
|
||||
|
||||
private:
|
||||
struct File {
|
||||
// All chunks in the file.
|
||||
std::vector<FileChunk> chunks;
|
||||
|
||||
// Total file size.
|
||||
uint64_t size = 0;
|
||||
};
|
||||
|
||||
enum class FileUpdateType { kInit, kAppend, kRemove, kClear };
|
||||
|
||||
struct FileUpdate {
|
||||
FileUpdateType type = FileUpdateType::kInit;
|
||||
std::string path;
|
||||
uint64_t file_size = 0;
|
||||
std::vector<FileChunk> chunks;
|
||||
|
||||
FileUpdate(FileUpdateType type, std::string path)
|
||||
: type(type), path(std::move(path)) {}
|
||||
};
|
||||
|
||||
struct ChunkLocation {
|
||||
// Asset path, also key into |path_to_file_| map.
|
||||
const std::string* path = nullptr;
|
||||
|
||||
// Index into |path_to_file_[*path].chunks|.
|
||||
uint32_t index = 0;
|
||||
};
|
||||
|
||||
// Keeps a pointer to a content id proto and compares by value.
|
||||
struct ContentIdRef {
|
||||
const ContentIdProto* content_id;
|
||||
|
||||
explicit ContentIdRef(const ContentIdProto& content_id)
|
||||
: content_id(&content_id) {}
|
||||
|
||||
bool operator==(const ContentIdRef& other) const {
|
||||
return *content_id == *other.content_id;
|
||||
}
|
||||
bool operator!=(const ContentIdRef& other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
};
|
||||
|
||||
struct ContentIdRefHash {
|
||||
std::size_t operator()(const ContentIdRef& ref) const noexcept {
|
||||
return hash(*ref.content_id);
|
||||
}
|
||||
std::hash<ContentIdProto> hash;
|
||||
};
|
||||
|
||||
// Updates |id_to_chunk_|. Also rebuilds |stats_| if present.
|
||||
void UpdateIdToChunkMap() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
// Finds a chunk its by |content_id|.
|
||||
// |path| returns the relative Unix path of a file that contains the chunk.
|
||||
// |offset| returns the offset of the chunk in the file.
|
||||
// |size| returns the size of the chunk.
|
||||
// |index| returns the index of the chunk in the File struct.
|
||||
// All output variables are optional.
|
||||
// Calls MaybeUpdateIdToChunkMap().
|
||||
// Returns true if the chunk was found.
|
||||
bool FindChunk(const ContentIdProto& content_id, std::string* path,
|
||||
uint64_t* offset, uint32_t* size, size_t* index)
|
||||
ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
// Queued updates.
|
||||
std::vector<FileUpdate> file_updates_;
|
||||
|
||||
// Maps the relative Unix path of assets to its file size and chunks.
|
||||
using PathToFileMap = absl::flat_hash_map<std::string, File>;
|
||||
PathToFileMap path_to_file_ ABSL_GUARDED_BY(mutex_);
|
||||
|
||||
// Maps content id to path and chunk index.
|
||||
using IdToChunkMap =
|
||||
absl::flat_hash_map<ContentIdRef, ChunkLocation, ContentIdRefHash>;
|
||||
IdToChunkMap id_to_chunk_ ABSL_GUARDED_BY(mutex_);
|
||||
|
||||
size_t total_chunks_ ABSL_GUARDED_BY(mutex_) = 0;
|
||||
|
||||
// Keeps detailed chunk access statistics.
|
||||
// Only used if |enable_stats| was set to true in the constructor.
|
||||
std::unique_ptr<StatsPrinter> stats_ ABSL_GUARDED_BY(mutex_);
|
||||
|
||||
// All chunks streamed from/cached on the gamelet.
|
||||
// The data is used to rebuild stats in case of a the manifest update.
|
||||
// Only used if |enable_stats| was set to true in the constructor.
|
||||
absl::flat_hash_map<ContentIdProto, size_t> streamed_chunks_to_thread_
|
||||
ABSL_GUARDED_BY(mutex_);
|
||||
absl::flat_hash_set<ContentIdProto> cached_chunks_ ABSL_GUARDED_BY(mutex_);
|
||||
|
||||
mutable absl::Mutex mutex_;
|
||||
};
|
||||
|
||||
}; // namespace cdc_ft
|
||||
|
||||
#endif // MANIFEST_FILE_CHUNK_MAP_H_
|
||||
252
manifest/file_chunk_map_test.cc
Normal file
252
manifest/file_chunk_map_test.cc
Normal file
@@ -0,0 +1,252 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "manifest/file_chunk_map.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
namespace {
|
||||
|
||||
constexpr char kFile1[] = "file1";
|
||||
constexpr char kFile2[] = "file2";
|
||||
|
||||
class FileChunkMapTest : public ::testing::Test {
|
||||
protected:
|
||||
// Creates a ChunkRef proto list from chunk data.
|
||||
RepeatedChunkRefProto MakeChunks(
|
||||
std::initializer_list<std::string> chunk_data) {
|
||||
uint64_t offset = 0;
|
||||
RepeatedChunkRefProto chunks;
|
||||
for (const std::string& data : chunk_data) {
|
||||
ChunkRefProto* chunk = chunks.Add();
|
||||
chunk->set_offset(offset);
|
||||
*chunk->mutable_chunk_id() = ContentId::FromDataString(data);
|
||||
offset += data.size();
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
||||
// Creates a ContentId proto from string |data|.
|
||||
ContentIdProto Id(const std::string& data) {
|
||||
return ContentId::FromDataString(data);
|
||||
}
|
||||
|
||||
FileChunkMap file_chunks_{/*enable_stats=*/false};
|
||||
std::string path_;
|
||||
uint64_t offset_ = 0;
|
||||
uint32_t size_ = 0;
|
||||
};
|
||||
|
||||
TEST_F(FileChunkMapTest, LookupOneChunk) {
|
||||
file_chunks_.Init(kFile1, 10);
|
||||
file_chunks_.AppendCopy(kFile1, MakeChunks({"0123456789"}), 0);
|
||||
file_chunks_.FlushUpdates();
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("0123456789"), &path_, &offset_, &size_));
|
||||
EXPECT_EQ(path_, kFile1);
|
||||
EXPECT_EQ(offset_, 0);
|
||||
EXPECT_EQ(size_, 10);
|
||||
}
|
||||
|
||||
TEST_F(FileChunkMapTest, LookupWithoutFlush) {
|
||||
file_chunks_.Init(kFile1, 10);
|
||||
file_chunks_.AppendCopy(kFile1, MakeChunks({"0123456789"}), 0);
|
||||
file_chunks_.FlushUpdates();
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("0123456789"), &path_, &offset_, &size_));
|
||||
|
||||
file_chunks_.Clear();
|
||||
// No FlushUpdates() call.
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("0123456789"), &path_, &offset_, &size_));
|
||||
}
|
||||
|
||||
TEST_F(FileChunkMapTest, LookupTwoChunks) {
|
||||
file_chunks_.Init(kFile1, 10);
|
||||
file_chunks_.AppendCopy(kFile1, MakeChunks({"0123", "456789"}), 0);
|
||||
file_chunks_.FlushUpdates();
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("0123"), &path_, &offset_, &size_));
|
||||
EXPECT_EQ(path_, kFile1);
|
||||
EXPECT_EQ(offset_, 0);
|
||||
EXPECT_EQ(size_, 4);
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("456789"), &path_, &offset_, &size_));
|
||||
EXPECT_EQ(path_, kFile1);
|
||||
EXPECT_EQ(offset_, 4);
|
||||
EXPECT_EQ(size_, 6);
|
||||
}
|
||||
|
||||
TEST_F(FileChunkMapTest, LookupTwoFiles) {
|
||||
file_chunks_.Init(kFile1, 4);
|
||||
file_chunks_.AppendCopy(kFile1, MakeChunks({"0123"}), 0);
|
||||
|
||||
file_chunks_.Init(kFile2, 6);
|
||||
file_chunks_.AppendCopy(kFile2, MakeChunks({"012345"}), 0);
|
||||
|
||||
file_chunks_.FlushUpdates();
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("0123"), &path_, &offset_, &size_));
|
||||
EXPECT_EQ(path_, kFile1);
|
||||
EXPECT_EQ(offset_, 0);
|
||||
EXPECT_EQ(size_, 4);
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("012345"), &path_, &offset_, &size_));
|
||||
EXPECT_EQ(path_, kFile2);
|
||||
EXPECT_EQ(offset_, 0);
|
||||
EXPECT_EQ(size_, 6);
|
||||
}
|
||||
|
||||
TEST_F(FileChunkMapTest, InitWithChunks) {
|
||||
std::vector<FileChunk> chunks;
|
||||
chunks.emplace_back(Id("0123"), 0);
|
||||
chunks.emplace_back(Id("456789"), 4);
|
||||
|
||||
file_chunks_.Init(kFile1, 10, &chunks);
|
||||
file_chunks_.FlushUpdates();
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("0123"), &path_, &offset_, &size_));
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("456789"), &path_, &offset_, &size_));
|
||||
EXPECT_TRUE(chunks.empty());
|
||||
}
|
||||
|
||||
TEST_F(FileChunkMapTest, InitWithChunksAndAppend) {
|
||||
std::vector<FileChunk> chunks;
|
||||
chunks.emplace_back(Id("0123"), 0);
|
||||
|
||||
file_chunks_.Init(kFile1, 10, &chunks);
|
||||
file_chunks_.AppendCopy(kFile1, MakeChunks({"456789"}), 4);
|
||||
file_chunks_.FlushUpdates();
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("0123"), &path_, &offset_, &size_));
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("456789"), &path_, &offset_, &size_));
|
||||
EXPECT_TRUE(chunks.empty());
|
||||
}
|
||||
|
||||
TEST_F(FileChunkMapTest, InitClearsExistingEntry) {
|
||||
file_chunks_.Init(kFile1, 6);
|
||||
file_chunks_.AppendCopy(kFile1, MakeChunks({"012345"}), 0);
|
||||
file_chunks_.FlushUpdates();
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("012345"), &path_, &offset_, &size_));
|
||||
EXPECT_EQ(size_, 6);
|
||||
|
||||
file_chunks_.Init(kFile1, 4);
|
||||
file_chunks_.AppendCopy(kFile1, MakeChunks({"0123"}), 0);
|
||||
file_chunks_.FlushUpdates();
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("0123"), &path_, &offset_, &size_));
|
||||
EXPECT_EQ(size_, 4);
|
||||
}
|
||||
|
||||
TEST_F(FileChunkMapTest, AppendAddsOffset) {
|
||||
file_chunks_.Init(kFile1, 10);
|
||||
file_chunks_.AppendCopy(kFile1, MakeChunks({"01", "23", "45"}), 0);
|
||||
file_chunks_.AppendCopy(kFile1, MakeChunks({"67", "89"}), 6);
|
||||
file_chunks_.FlushUpdates();
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("45"), &path_, &offset_, &size_));
|
||||
EXPECT_EQ(path_, kFile1);
|
||||
EXPECT_EQ(offset_, 4);
|
||||
EXPECT_EQ(size_, 2);
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("67"), &path_, &offset_, &size_));
|
||||
EXPECT_EQ(path_, kFile1);
|
||||
EXPECT_EQ(offset_, 6);
|
||||
EXPECT_EQ(size_, 2);
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("89"), &path_, &offset_, &size_));
|
||||
EXPECT_EQ(path_, kFile1);
|
||||
EXPECT_EQ(offset_, 8);
|
||||
EXPECT_EQ(size_, 2);
|
||||
}
|
||||
|
||||
TEST_F(FileChunkMapTest, Remove_DifferentChunks) {
|
||||
file_chunks_.Init(kFile1, 1);
|
||||
file_chunks_.AppendCopy(kFile1, MakeChunks({"0"}), 0);
|
||||
|
||||
file_chunks_.Init(kFile2, 1);
|
||||
file_chunks_.AppendCopy(kFile2, MakeChunks({"1"}), 0);
|
||||
|
||||
file_chunks_.FlushUpdates();
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("0"), &path_, &offset_, &size_));
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("1"), &path_, &offset_, &size_));
|
||||
|
||||
file_chunks_.Remove(kFile2);
|
||||
file_chunks_.FlushUpdates();
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("0"), &path_, &offset_, &size_));
|
||||
EXPECT_FALSE(file_chunks_.Lookup(Id("1"), &path_, &offset_, &size_));
|
||||
}
|
||||
|
||||
TEST_F(FileChunkMapTest, Remove_SameChunks) {
|
||||
file_chunks_.Init(kFile1, 1);
|
||||
file_chunks_.AppendCopy(kFile1, MakeChunks({"0"}), 0);
|
||||
|
||||
file_chunks_.Init(kFile2, 1);
|
||||
file_chunks_.AppendCopy(kFile2, MakeChunks({"0"}), 0);
|
||||
|
||||
file_chunks_.FlushUpdates();
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("0"), &path_, &offset_, &size_));
|
||||
// |path_| is not deterministic as an absl::flat_hash_map is used internally.
|
||||
EXPECT_TRUE(path_ == kFile1 || path_ == kFile2) << path_;
|
||||
|
||||
file_chunks_.Remove(kFile2);
|
||||
file_chunks_.FlushUpdates();
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("0"), &path_, &offset_, &size_));
|
||||
EXPECT_EQ(path_, kFile1);
|
||||
}
|
||||
|
||||
TEST_F(FileChunkMapTest, Clear) {
|
||||
file_chunks_.Init(kFile1, 1);
|
||||
file_chunks_.AppendCopy(kFile1, MakeChunks({"0"}), 0);
|
||||
file_chunks_.FlushUpdates();
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("0"), &path_, &offset_, &size_));
|
||||
|
||||
file_chunks_.Clear();
|
||||
file_chunks_.FlushUpdates();
|
||||
|
||||
EXPECT_FALSE(file_chunks_.Lookup(Id("0"), &path_, &offset_, &size_));
|
||||
}
|
||||
|
||||
TEST_F(FileChunkMapTest, AppendCopyMove) {
|
||||
RepeatedChunkRefProto chunks1 = MakeChunks({"01"});
|
||||
RepeatedChunkRefProto chunks2 = MakeChunks({"23"});
|
||||
|
||||
file_chunks_.Init(kFile1, 2);
|
||||
file_chunks_.Init(kFile2, 2);
|
||||
|
||||
file_chunks_.AppendCopy(kFile1, chunks1, 0);
|
||||
file_chunks_.AppendMove(kFile2, &chunks2, 0);
|
||||
|
||||
file_chunks_.FlushUpdates();
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("01"), &path_, &offset_, &size_));
|
||||
EXPECT_EQ(path_, kFile1);
|
||||
|
||||
EXPECT_TRUE(file_chunks_.Lookup(Id("23"), &path_, &offset_, &size_));
|
||||
EXPECT_EQ(path_, kFile2);
|
||||
|
||||
// AppendMove() should have moved the second chunk off the list.
|
||||
EXPECT_EQ(chunks1[0].chunk_id(), Id("01"));
|
||||
EXPECT_EQ(chunks2[0].chunk_id(), ContentIdProto());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace cdc_ft
|
||||
740
manifest/manifest_builder.cc
Normal file
740
manifest/manifest_builder.cc
Normal file
@@ -0,0 +1,740 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "manifest/manifest_builder.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <deque>
|
||||
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "absl/time/time.h"
|
||||
#include "common/log.h"
|
||||
#include "common/path.h"
|
||||
#include "common/status.h"
|
||||
#include "common/status_macros.h"
|
||||
#include "common/util.h"
|
||||
#include "manifest/asset_builder.h"
|
||||
#include "manifest/content_id.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
namespace {
|
||||
|
||||
// Splits the given Unix path into its components.
|
||||
inline std::vector<absl::string_view> SplitUnixPath(const std::string& path) {
|
||||
return SplitString(path, '/', false);
|
||||
}
|
||||
|
||||
// Joins the given path components using the Unix path separator. This function
|
||||
// assumes that none of the path components have trailing path separators.
|
||||
inline std::string JoinUnixPath(const std::vector<absl::string_view>& path) {
|
||||
return JoinStrings(path, 0, path.size(), '/');
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ManifestBuilder::ManifestBuilder(CdcParamsProto cdc_params,
|
||||
DataStoreWriter* chunk_store)
|
||||
: data_store_(chunk_store), cdc_params_(std::move(cdc_params)) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
ManifestBuilder::~ManifestBuilder() = default;
|
||||
|
||||
absl::Status ManifestBuilder::LoadManifest(const std::string& manifest_hex_id) {
|
||||
ContentIdProto manifest_id;
|
||||
if (!ContentId::FromHexString(manifest_hex_id, &manifest_id)) {
|
||||
return absl::InvalidArgumentError(
|
||||
absl::StrFormat("Invalid manifest ID: '%s'", manifest_hex_id));
|
||||
}
|
||||
return LoadManifest(manifest_id);
|
||||
}
|
||||
|
||||
absl::Status ManifestBuilder::LoadManifest(const ContentIdProto& manifest_id) {
|
||||
Reset();
|
||||
RETURN_IF_ERROR(data_store_->GetProto(manifest_id, manifest_));
|
||||
manifest_id_.CopyFrom(manifest_id);
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
void ManifestBuilder::Reset() {
|
||||
asset_lists_.clear();
|
||||
manifest_id_.Clear();
|
||||
manifest_bytes_written_ = 0;
|
||||
manifest_chunks_written_ = 0;
|
||||
arena_.Reset();
|
||||
manifest_ = MakeProto<ManifestProto>();
|
||||
*manifest_->mutable_cdc_params() = cdc_params_;
|
||||
}
|
||||
|
||||
absl::StatusOr<AssetBuilder> ManifestBuilder::GetOrCreateAsset(
|
||||
const std::string& path, AssetProto::Type type, bool force_create,
|
||||
bool* created) {
|
||||
// We must keep |unix_path| allocated while the string_views in |parts| are
|
||||
// being used.
|
||||
if (created) *created = false;
|
||||
std::string unix_path = path::ToUnix(path);
|
||||
std::vector<absl::string_view> parts = SplitUnixPath(unix_path);
|
||||
absl::string_view name;
|
||||
if (!parts.empty()) {
|
||||
name = parts.back();
|
||||
parts.pop_back();
|
||||
}
|
||||
DirCreateMode create_mode =
|
||||
force_create ? DirCreateMode::kForceCreate : DirCreateMode::kCreate;
|
||||
AssetProto* dir;
|
||||
ASSIGN_OR_RETURN(dir, FindOrCreateDirPath(parts, create_mode),
|
||||
"Failed to create directory '%s'", JoinUnixPath(parts));
|
||||
|
||||
if (name.empty()) {
|
||||
// Special case: return the root directory for a DIRECTORY with empty name.
|
||||
if (type == AssetProto::DIRECTORY) return AssetBuilder(dir, std::string());
|
||||
return absl::InvalidArgumentError("Empty path given");
|
||||
}
|
||||
|
||||
// Check if the asset already exists.
|
||||
absl::StatusOr<AssetProto*> result = FindAssetInDir(name, dir);
|
||||
AssetProto* asset = nullptr;
|
||||
if (result.ok()) {
|
||||
asset = result.value();
|
||||
// Verify that both assets are of the same type.
|
||||
if (asset->type() != type) {
|
||||
if (force_create) {
|
||||
RETURN_IF_ERROR(DeleteAsset(path));
|
||||
asset = nullptr;
|
||||
} else {
|
||||
return absl::AlreadyExistsError(absl::StrFormat(
|
||||
"Asset '%s' already exists in '%s' as %s.", path,
|
||||
JoinUnixPath(parts), AssetProto::Type_Name(asset->type())));
|
||||
}
|
||||
}
|
||||
} else if (!absl::IsNotFound(result.status())) {
|
||||
// Return any unexpected error.
|
||||
return result.status();
|
||||
}
|
||||
// Create the asset if it was not found or it was deleted.
|
||||
if (!asset) {
|
||||
asset = dir->add_dir_assets();
|
||||
InitNewAsset(name, type, asset);
|
||||
if (created) *created = true;
|
||||
}
|
||||
return AssetBuilder(asset, path::ToUnix(path::DirName(path)));
|
||||
}
|
||||
|
||||
absl::Status ManifestBuilder::DeleteAsset(const std::string& path) {
|
||||
// We must keep |unix_path| allocated while the string_views in |parts| are
|
||||
// being used.
|
||||
std::string unix_path = path::ToUnix(path);
|
||||
std::vector<absl::string_view> parts = SplitUnixPath(unix_path);
|
||||
if (parts.empty()) return absl::InvalidArgumentError("Empty path given");
|
||||
absl::string_view name = parts.back();
|
||||
parts.pop_back();
|
||||
absl::StatusOr<AssetProto*> dir =
|
||||
FindOrCreateDirPath(parts, DirCreateMode::kNoCreate);
|
||||
if (!dir.ok()) {
|
||||
// We can get an absl::InvalidArgumentError here if one of the path
|
||||
// components is not a directory, which means the asset to be deleted does
|
||||
// not exist.
|
||||
if (absl::IsNotFound(dir.status()) ||
|
||||
absl::IsInvalidArgument(dir.status())) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
// Return any unexpected error.
|
||||
return WrapStatus(dir.status(), "Failed to look up path '%s'",
|
||||
JoinUnixPath(parts));
|
||||
}
|
||||
|
||||
// Check if the asset exists.
|
||||
return DeleteAssetFromDir(name, *dir);
|
||||
}
|
||||
|
||||
absl::StatusOr<AssetProto*> ManifestBuilder::FindOrCreateDirPath(
|
||||
const std::vector<absl::string_view>& path, DirCreateMode create_dirs) {
|
||||
// Create the first manifest, if needed, independent of |create_dirs|.
|
||||
if (!manifest_->has_root_dir()) {
|
||||
InitNewAsset(absl::string_view(), AssetProto::DIRECTORY,
|
||||
manifest_->mutable_root_dir());
|
||||
}
|
||||
return FindOrCreateDirPathRec(path, 0, manifest_->mutable_root_dir(),
|
||||
create_dirs);
|
||||
}
|
||||
|
||||
absl::StatusOr<AssetProto*> ManifestBuilder::FindOrCreateDirPathRec(
|
||||
const std::vector<absl::string_view>& path, size_t path_idx,
|
||||
AssetProto* dir, DirCreateMode create_dirs) {
|
||||
if (path_idx >= path.size()) return dir;
|
||||
absl::string_view name = path[path_idx];
|
||||
|
||||
// Try to find the name in the direct assets.
|
||||
bool overwrite = create_dirs == DirCreateMode::kForceCreate;
|
||||
absl::StatusOr<AssetProto*> result = FindMutableAssetInList(
|
||||
name, AssetProto::DIRECTORY, overwrite, dir->mutable_dir_assets());
|
||||
if (result.ok()) {
|
||||
// Recurse into the sub-directory.
|
||||
return FindOrCreateDirPathRec(path, path_idx + 1, result.value(),
|
||||
create_dirs);
|
||||
}
|
||||
if (!absl::IsNotFound(result.status())) {
|
||||
// Return any unexpected error.
|
||||
return result;
|
||||
}
|
||||
|
||||
// Try to find the name in the list of indirect assets.
|
||||
for (const ContentIdProto& asset_list_id : dir->dir_indirect_assets()) {
|
||||
AssetListProto* asset_list;
|
||||
ASSIGN_OR_RETURN(asset_list, GetAssetList(asset_list_id));
|
||||
// In theory it can happen that the loaded asset_list is empty, in which
|
||||
// case it is null.
|
||||
if (!asset_list) continue;
|
||||
result = FindMutableAssetInList(name, AssetProto::DIRECTORY, overwrite,
|
||||
asset_list->mutable_assets());
|
||||
if (result.ok()) {
|
||||
// Recurse into the sub-directory.
|
||||
return FindOrCreateDirPathRec(path, path_idx + 1, result.value(),
|
||||
create_dirs);
|
||||
}
|
||||
if (!absl::IsNotFound(result.status())) {
|
||||
// Return any unexpected error.
|
||||
return WrapStatus(result.status(),
|
||||
"Failed to look up directory '%s' in AssetListProto %s",
|
||||
name, ContentId::ToHexString(asset_list_id));
|
||||
}
|
||||
}
|
||||
|
||||
// If we're not supposed to create the directory, return an error.
|
||||
if (create_dirs == DirCreateMode::kNoCreate) {
|
||||
return absl::NotFoundError(absl::string_view());
|
||||
}
|
||||
|
||||
// Create the missing directory.
|
||||
AssetProto* child = dir->add_dir_assets();
|
||||
InitNewAsset(name, AssetProto::DIRECTORY, child);
|
||||
return FindOrCreateDirPathRec(path, path_idx + 1, child, create_dirs);
|
||||
}
|
||||
|
||||
absl::StatusOr<AssetProto*> ManifestBuilder::FindAssetInDir(
|
||||
absl::string_view name, AssetProto* dir) {
|
||||
if (dir->type() != AssetProto::DIRECTORY) {
|
||||
return WrongAssetTypeError(dir->name(), dir->type(), AssetProto::DIRECTORY);
|
||||
}
|
||||
|
||||
// Try to find the name in the direct assets.
|
||||
absl::StatusOr<AssetProto*> result =
|
||||
FindMutableAssetInList(name, dir->mutable_dir_assets());
|
||||
if (result.ok()) {
|
||||
return result.value();
|
||||
}
|
||||
if (!absl::IsNotFound(result.status())) {
|
||||
// Return any unexpected error.
|
||||
return result;
|
||||
}
|
||||
|
||||
// Try to find the name in the list of indirect assets.
|
||||
for (const ContentIdProto& asset_list_id : dir->dir_indirect_assets()) {
|
||||
AssetListProto* asset_list;
|
||||
ASSIGN_OR_RETURN(asset_list, GetAssetList(asset_list_id),
|
||||
"Failed to look up asset '%s' in directory '%s'", name,
|
||||
dir->name());
|
||||
result = FindMutableAssetInList(name, asset_list->mutable_assets());
|
||||
if (result.ok()) {
|
||||
return result.value();
|
||||
}
|
||||
if (!absl::IsNotFound(result.status())) {
|
||||
// Return any unexpected error.
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
return absl::NotFoundError(absl::string_view());
|
||||
}
|
||||
|
||||
absl::StatusOr<AssetProto*> ManifestBuilder::FindMutableAssetInList(
|
||||
absl::string_view name, RepeatedAssetProto* assets) const {
|
||||
for (AssetProto& asset : *assets) {
|
||||
if (asset.name() == name) return &asset;
|
||||
}
|
||||
return absl::NotFoundError(absl::string_view());
|
||||
}
|
||||
|
||||
absl::StatusOr<AssetProto*> ManifestBuilder::FindMutableAssetInList(
|
||||
absl::string_view name, AssetProto::Type type, bool overwrite,
|
||||
RepeatedAssetProto* assets) const {
|
||||
AssetProto* asset;
|
||||
ASSIGN_OR_RETURN(asset, FindMutableAssetInList(name, assets));
|
||||
if (asset->type() != type) {
|
||||
// Return an error if the asset is not of the desired type and we're not
|
||||
// supposed to overwrite it.
|
||||
if (!overwrite) {
|
||||
return WrongAssetTypeError(asset->name(), asset->type(), type);
|
||||
}
|
||||
// Replace the asset with the new type.
|
||||
InitNewAsset(std::string(asset->name()), type, asset);
|
||||
}
|
||||
|
||||
return asset;
|
||||
}
|
||||
|
||||
absl::Status ManifestBuilder::DeleteAssetFromDir(absl::string_view name,
|
||||
AssetProto* dir) {
|
||||
if (dir->type() != AssetProto::DIRECTORY) {
|
||||
return WrongAssetTypeError(dir->name(), dir->type(), AssetProto::DIRECTORY);
|
||||
}
|
||||
|
||||
// Try to find the name in the direct assets.
|
||||
if (DeleteAssetFromList(name, dir->mutable_dir_assets())) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
// Try to find the name in the list of indirect assets.
|
||||
for (const ContentIdProto& asset_list_id : dir->dir_indirect_assets()) {
|
||||
AssetListProto* asset_list;
|
||||
ASSIGN_OR_RETURN(asset_list, GetAssetList(asset_list_id),
|
||||
"Failed to look up asset '%s' in directory '%s'", name,
|
||||
dir->name());
|
||||
if (DeleteAssetFromList(name, asset_list->mutable_assets())) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
bool ManifestBuilder::DeleteAssetFromList(absl::string_view name,
|
||||
RepeatedAssetProto* assets) const {
|
||||
for (int i = 0; i < assets->size(); ++i) {
|
||||
if (assets->at(i).name() == name) {
|
||||
// Move the asset to the end of the list, then remove it, to avoid all
|
||||
// other elements being moved.
|
||||
if (i != assets->size() - 1) {
|
||||
assets->SwapElements(i, assets->size() - 1);
|
||||
}
|
||||
assets->RemoveLast();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void ManifestBuilder::InitNewAsset(absl::string_view name,
|
||||
AssetProto::Type type,
|
||||
AssetProto* asset) const {
|
||||
asset->Clear();
|
||||
asset->set_name(name.data(), name.size());
|
||||
asset->set_type(type);
|
||||
asset->set_mtime_seconds(absl::ToUnixSeconds(absl::Now()));
|
||||
asset->set_permissions(type == AssetProto::DIRECTORY ? kDefaultDirPerms
|
||||
: kDefaultFilePerms);
|
||||
}
|
||||
|
||||
absl::StatusOr<AssetListProto*> ManifestBuilder::GetAssetList(
|
||||
const ContentIdProto& id) {
|
||||
// See if we loaded this proto already.
|
||||
AssetListMap::iterator it = asset_lists_.find(id);
|
||||
if (it != asset_lists_.end()) return it->second;
|
||||
// If not, we need to load it.
|
||||
AssetListProto* asset_list = MakeProto<AssetListProto>();
|
||||
RETURN_IF_ERROR(data_store_->GetProto(id, asset_list),
|
||||
"Failed to read the AssetListProto with ID %s from storage",
|
||||
ContentId::ToHexString(id));
|
||||
asset_lists_[id] = asset_list;
|
||||
return asset_list;
|
||||
}
|
||||
|
||||
absl::StatusOr<AssetListProto*> ManifestBuilder::TakeOutAssetList(
|
||||
const ContentIdProto& id) {
|
||||
AssetListProto* list;
|
||||
ASSIGN_OR_RETURN(list, GetAssetList(id));
|
||||
asset_lists_.erase(id);
|
||||
return list;
|
||||
}
|
||||
|
||||
absl::Status ManifestBuilder::WrongAssetTypeError(
|
||||
absl::string_view name, AssetProto::Type found,
|
||||
AssetProto::Type expected) const {
|
||||
return absl::InvalidArgumentError(absl::StrFormat(
|
||||
"Asset '%s' is of type %s, expected %s.", name,
|
||||
AssetProto::Type_Name(found), AssetProto::Type_Name(expected)));
|
||||
}
|
||||
|
||||
size_t ManifestBuilder::ManifestBytesWritten() const {
|
||||
return manifest_bytes_written_;
|
||||
}
|
||||
|
||||
size_t ManifestBuilder::ManifestsChunksWritten() const {
|
||||
return manifest_chunks_written_;
|
||||
}
|
||||
|
||||
const ContentIdProto& ManifestBuilder::ManifestId() const {
|
||||
return manifest_id_;
|
||||
}
|
||||
|
||||
const ManifestProto* ManifestBuilder::Manifest() const { return manifest_; }
|
||||
|
||||
const std::vector<ContentIdProto>& ManifestBuilder::FlushedContentIds() const {
|
||||
return flushed_content_ids_;
|
||||
}
|
||||
|
||||
absl::Status ManifestBuilder::FlushDir(AssetProto* dir) {
|
||||
// Flush all direct assets.
|
||||
RETURN_IF_ERROR(FlushAssetList(dir->mutable_dir_assets()),
|
||||
"Failed to flush directs assets of directory '%s'",
|
||||
dir->name());
|
||||
|
||||
RepeatedAssetProto overflow;
|
||||
RepeatedContentIdProto* indirect_assets = dir->mutable_dir_indirect_assets();
|
||||
|
||||
// Flush all indirect asset lists that were previously loaded.
|
||||
RepeatedContentIdProto::iterator it = indirect_assets->begin();
|
||||
while (it != indirect_assets->end()) {
|
||||
ContentIdProto& asset_list_id = *it;
|
||||
// Skip any list that was never loaded.
|
||||
AssetListMap::iterator asset_list_it = asset_lists_.find(asset_list_id);
|
||||
if (asset_list_it == asset_lists_.end()) {
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
AssetListProto* asset_list = asset_list_it->second;
|
||||
// Flush the list and enforce the chunk size limit.
|
||||
RETURN_IF_ERROR(FlushAssetList(asset_list->mutable_assets()),
|
||||
"Failed to flush indirect asset list %s in directory '%s'",
|
||||
ContentId::ToHexString(asset_list_id), dir->name());
|
||||
EnforceAssetListProtoSize(asset_list, &overflow);
|
||||
// If the asset list is empty, just delete it from the indirect asset list.
|
||||
if (asset_list->assets_size() <= 0) {
|
||||
it = indirect_assets->erase(it);
|
||||
continue;
|
||||
}
|
||||
// Write the list to the chunk store and update the content ID.
|
||||
RETURN_IF_ERROR(WriteProto(*asset_list, &asset_list_id),
|
||||
"Failed to write indirect asset list proto for directory "
|
||||
"'%s' to storage",
|
||||
dir->name());
|
||||
// If the content ID changed, we need to update the list's key in the map.
|
||||
if (asset_list_it->first != asset_list_id) {
|
||||
AssetListProto* list = asset_list_it->second;
|
||||
asset_lists_.erase(asset_list_it);
|
||||
asset_lists_[asset_list_id] = list;
|
||||
}
|
||||
++it;
|
||||
}
|
||||
|
||||
// Enforce size limit for this DIRECTORY asset.
|
||||
RETURN_IF_ERROR(EnforceDirProtoSize(dir, &overflow));
|
||||
// Add the overflown assets to the indirect assets list.
|
||||
return AppendAllocatedIndirectAssets(dir, &overflow);
|
||||
}
|
||||
|
||||
absl::Status ManifestBuilder::FlushAssetList(RepeatedAssetProto* assets) {
|
||||
// Flush all sub-directories.
|
||||
for (AssetProto& asset : *assets) {
|
||||
if (asset.type() == AssetProto::DIRECTORY)
|
||||
RETURN_IF_ERROR(FlushDir(&asset));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
inline void SortByProtoSizeDesc(RepeatedAssetProto* assets) {
|
||||
std::sort(assets->begin(), assets->end(),
|
||||
[](const AssetProto& a, const AssetProto& b) -> bool {
|
||||
// Compare greater than for descending order.
|
||||
return a.ByteSizeLong() > b.ByteSizeLong();
|
||||
});
|
||||
}
|
||||
|
||||
absl::Status ManifestBuilder::EnforceDirProtoSize(
|
||||
AssetProto* dir, RepeatedAssetProto* overflow) {
|
||||
// A max. size of zero means no limit.
|
||||
const size_t max_size = manifest_->cdc_params().avg_chunk_size();
|
||||
if (!max_size) return absl::OkStatus();
|
||||
// We cannot change the size of non-directory assets.
|
||||
if (dir->type() != AssetProto::DIRECTORY) return absl::OkStatus();
|
||||
// Calculate the full proto size only once.
|
||||
size_t proto_size = dir->ByteSizeLong();
|
||||
if (proto_size <= max_size) return absl::OkStatus();
|
||||
// Sort asset list by size so that we start with the largest assets.
|
||||
SortByProtoSizeDesc(dir->mutable_dir_assets());
|
||||
// Enforce the size limit of large FILE assets, where "large" is defined as
|
||||
// 1/16th of the target chunk size.
|
||||
const size_t max_asset_proto_size = max_size >> 4;
|
||||
if (max_asset_proto_size) {
|
||||
for (AssetProto& asset : *dir->mutable_dir_assets()) {
|
||||
size_t asset_proto_size = asset.ByteSizeLong();
|
||||
// Stop if the remaining assets are no longer large.
|
||||
if (proto_size <= max_size || asset_proto_size <= max_asset_proto_size) {
|
||||
break;
|
||||
}
|
||||
if (asset.type() != AssetProto::FILE) continue;
|
||||
RETURN_IF_ERROR(EnforceFileProtoSize(&asset, max_asset_proto_size));
|
||||
// Adjust the directory proto size.
|
||||
proto_size = proto_size + asset.ByteSizeLong() - asset_proto_size;
|
||||
}
|
||||
}
|
||||
// Move assets to the overflow list until the limit is respected.
|
||||
while (dir->dir_assets_size() && proto_size > max_size) {
|
||||
// Use the UnsafeArena* function to avoid a heap copy of the message.
|
||||
AssetProto* asset = dir->mutable_dir_assets()->UnsafeArenaReleaseLast();
|
||||
proto_size -= asset->ByteSizeLong() + kRepeatedProtoFieldOverhead;
|
||||
// When the estimates get us below the limit, calculate the accurate size.
|
||||
if (proto_size <= max_size) proto_size = dir->ByteSizeLong();
|
||||
overflow->UnsafeArenaAddAllocated(asset);
|
||||
}
|
||||
// At this point, we might still be over the size limit for a combination of
|
||||
// a very small chunk size and a very large directories. There's nothing we
|
||||
// can do about it with the current structure of the manifest proto.
|
||||
if (proto_size > max_size) {
|
||||
LOG_WARNING(
|
||||
"Manifest for directory '%s' is over the configured chunk size limit "
|
||||
"(%d > %d). Consider increasing the chunk size.",
|
||||
dir->name(), proto_size, max_size);
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ManifestBuilder::EnforceFileProtoSize(
|
||||
AssetProto* file, size_t max_asset_proto_size) {
|
||||
if (!max_asset_proto_size) return absl::OkStatus();
|
||||
assert(file->type() == AssetProto::FILE);
|
||||
// If there is only a single direct chunk, we cannot reduce the proto size.
|
||||
if (file->file_chunks_size() <= 1) return absl::OkStatus();
|
||||
// We expect no indirect chunk lists at this point. If we ever decide to
|
||||
// "rebalance" existing manifests with a smaller chunk size, we need to push
|
||||
// the indirect chunks before the existing ones.
|
||||
if (file->file_indirect_chunks_size() > 0) {
|
||||
return MakeStatus(
|
||||
"Given asset '%s' already has %d indirect chunk lists which is not "
|
||||
"supported",
|
||||
file->name(), file->file_indirect_chunks_size());
|
||||
}
|
||||
std::deque<ChunkRefProto*> overflow;
|
||||
size_t proto_size = file->ByteSizeLong();
|
||||
// Remove chunks until the size limit is respected.
|
||||
while (file->file_chunks_size() && proto_size > max_asset_proto_size) {
|
||||
// Use the UnsafeArena* function to avoid a heap copy of the message.
|
||||
ChunkRefProto* ref = file->mutable_file_chunks()->UnsafeArenaReleaseLast();
|
||||
proto_size -= ref->ByteSizeLong() + kRepeatedProtoFieldOverhead;
|
||||
// When the estimates get us below the limit, calculate the accurate size.
|
||||
if (proto_size <= max_asset_proto_size) proto_size = file->ByteSizeLong();
|
||||
overflow.push_back(ref);
|
||||
}
|
||||
if (overflow.empty()) return absl::OkStatus();
|
||||
|
||||
// Move chunks to indirect chunk lists. All proto memory is owned by the
|
||||
// |arena_|, we don't need to worry about leaking memory here.
|
||||
ChunkListProto* chunk_list = MakeProto<ChunkListProto>();
|
||||
size_t chunk_list_size = 0;
|
||||
uint64_t chunk_list_offset = overflow.back()->offset();
|
||||
const size_t max_size = manifest_->cdc_params().avg_chunk_size();
|
||||
while (!overflow.empty()) {
|
||||
ChunkRefProto* chunk_ref = overflow.back();
|
||||
overflow.pop_back();
|
||||
// Convert the chunk's absolute offset to a relative one.
|
||||
uint64_t chunk_absolute_offset = chunk_ref->offset();
|
||||
chunk_ref->set_offset(chunk_absolute_offset - chunk_list_offset);
|
||||
size_t chunkref_proto_size =
|
||||
chunk_ref->ByteSizeLong() + kRepeatedProtoFieldOverhead;
|
||||
// Write back a full chunk list and set offset and content ID accordingly.
|
||||
if (chunk_list_size > 0 &&
|
||||
chunk_list_size + chunkref_proto_size > max_size) {
|
||||
RETURN_IF_ERROR(WriteBackChunkList(chunk_list_offset, *chunk_list,
|
||||
file->add_file_indirect_chunks()));
|
||||
chunk_list->Clear();
|
||||
chunk_list_size = 0;
|
||||
// The first chunk in the list defines the chunk list's offset.
|
||||
chunk_list_offset = chunk_absolute_offset;
|
||||
chunk_ref->set_offset(0);
|
||||
chunkref_proto_size =
|
||||
chunk_ref->ByteSizeLong() + kRepeatedProtoFieldOverhead;
|
||||
}
|
||||
// Move chunk reference to the indirect list. Use the UnsafeArena* function
|
||||
// again to pass ownership without copying the data.
|
||||
chunk_list->mutable_chunks()->UnsafeArenaAddAllocated(chunk_ref);
|
||||
chunk_list_size += chunkref_proto_size;
|
||||
// When the estimates get us above the limit, calculate the accurate size.
|
||||
if (chunk_list_size > max_size)
|
||||
chunk_list_size = chunk_list->ByteSizeLong();
|
||||
}
|
||||
// Write back final chunk list.
|
||||
return WriteBackChunkList(chunk_list_offset, *chunk_list,
|
||||
file->add_file_indirect_chunks());
|
||||
}
|
||||
|
||||
bool ManifestBuilder::EnforceAssetListProtoSize(
|
||||
AssetListProto* asset_list, RepeatedAssetProto* overflow) const {
|
||||
// A max. size of zero means no limit.
|
||||
const size_t max_size = manifest_->cdc_params().avg_chunk_size();
|
||||
if (!max_size) return false;
|
||||
size_t proto_size = asset_list->ByteSizeLong();
|
||||
bool changed = false;
|
||||
while (proto_size > max_size) {
|
||||
// Use the UnsafeArena* function to avoid a heap copy of the message.
|
||||
AssetProto* asset = asset_list->mutable_assets()->UnsafeArenaReleaseLast();
|
||||
proto_size -= asset->ByteSizeLong() + kRepeatedProtoFieldOverhead;
|
||||
// When the estimates get us below the limit, calculate the accurate size.
|
||||
if (proto_size <= max_size) proto_size = asset_list->ByteSizeLong();
|
||||
overflow->UnsafeArenaAddAllocated(asset);
|
||||
changed = true;
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
|
||||
absl::Status ManifestBuilder::WriteBackAssetList(
|
||||
AssetListProto* asset_list, ContentIdProto* asset_list_id) {
|
||||
RETURN_IF_ERROR(WriteProto(*asset_list, asset_list_id),
|
||||
"Failed to write back AssetListProto");
|
||||
asset_lists_[*asset_list_id] = asset_list;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ManifestBuilder::WriteBackChunkList(
|
||||
uint64_t chunk_list_offset, const ChunkListProto& chunk_list,
|
||||
IndirectChunkListProto* indirect_chunk_list) {
|
||||
assert(chunk_list.chunks_size() > 0);
|
||||
RETURN_IF_ERROR(
|
||||
WriteProto(chunk_list, indirect_chunk_list->mutable_chunk_list_id()));
|
||||
indirect_chunk_list->set_offset(chunk_list_offset);
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ManifestBuilder::WriteProto(
|
||||
const google::protobuf::MessageLite& proto, ContentIdProto* content_id) {
|
||||
size_t proto_size = 0;
|
||||
RETURN_IF_ERROR(data_store_->PutProto(proto, content_id, &proto_size));
|
||||
flushed_content_ids_.push_back(*content_id);
|
||||
// Update stats.
|
||||
manifest_bytes_written_ += proto_size;
|
||||
++manifest_chunks_written_;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ManifestBuilder::AppendAllocatedIndirectAssets(
|
||||
AssetProto* dir, RepeatedAssetProto* assets) {
|
||||
if (assets->empty()) return absl::OkStatus();
|
||||
|
||||
// The max. manifest chunk size that we try to stay under.
|
||||
const size_t max_size = manifest_->cdc_params().avg_chunk_size();
|
||||
// Use asset_list to track the last allocated list, if any.
|
||||
AssetListProto* asset_list = nullptr;
|
||||
// Index to the indirect asset list within |dir| currently in use. Defaults to
|
||||
// zero, which means that if |dir| does not have any indirect asset lists, the
|
||||
// code below will create the first one and store it at index zero.
|
||||
int asset_list_index = 0;
|
||||
// Approximate byte size of the asset list proto currently in use. This size
|
||||
// is updated with the byte size of any asset proto that is appended to the
|
||||
// list, but ignores any overhead from the embedding proto format (which
|
||||
// should be negliable).
|
||||
size_t proto_size = 0;
|
||||
|
||||
// Find or create the AssetListProto where we can append the assets.
|
||||
if (dir->dir_indirect_assets_size() > 0) {
|
||||
// Load the last indirect asset list and see if we can append to it.
|
||||
asset_list_index = dir->dir_indirect_assets_size() - 1;
|
||||
const ContentIdProto& asset_list_id =
|
||||
dir->dir_indirect_assets(asset_list_index);
|
||||
// Take out the asset list from its original location since the content ID
|
||||
// will be updated anyway once we append more assets to it.
|
||||
ASSIGN_OR_RETURN(asset_list, TakeOutAssetList(asset_list_id));
|
||||
proto_size = asset_list->ByteSizeLong();
|
||||
} else {
|
||||
// Add the first indirect asset to |dir|, asset_list_index is already
|
||||
// initialized to zero.
|
||||
dir->add_dir_indirect_assets();
|
||||
asset_list = MakeProto<AssetListProto>();
|
||||
}
|
||||
|
||||
while (!assets->empty()) {
|
||||
// Use the UnsafeArena* function to avoid a heap copy of the message. Even
|
||||
// though it is released from the proto, the memory is still owned by the
|
||||
// |arena_| and shares its lifetime.
|
||||
AssetProto* asset = assets->UnsafeArenaReleaseLast();
|
||||
size_t asset_proto_size =
|
||||
asset->ByteSizeLong() + kRepeatedProtoFieldOverhead;
|
||||
// See if we need to create a new AssetListProto.
|
||||
if (max_size > 0 && proto_size > 0 &&
|
||||
proto_size + asset_proto_size > max_size) {
|
||||
// Write back the full list to the data store.
|
||||
RETURN_IF_ERROR(
|
||||
WriteBackAssetList(
|
||||
asset_list, dir->mutable_dir_indirect_assets(asset_list_index)),
|
||||
"Failed to write back asset list for directory '%s'", dir->name());
|
||||
// Create a new list.
|
||||
asset_list = MakeProto<AssetListProto>();
|
||||
proto_size = 0;
|
||||
asset_list_index = dir->dir_indirect_assets_size();
|
||||
dir->add_dir_indirect_assets();
|
||||
}
|
||||
// Append the allocated asset to the current list.
|
||||
asset_list->mutable_assets()->UnsafeArenaAddAllocated(asset);
|
||||
proto_size += asset_proto_size;
|
||||
}
|
||||
|
||||
// Write back the final asset list.
|
||||
RETURN_IF_ERROR(
|
||||
WriteBackAssetList(asset_list,
|
||||
dir->mutable_dir_indirect_assets(asset_list_index)),
|
||||
"Failed to write back final asset list for directory '%s'", dir->name());
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::StatusOr<ContentIdProto> ManifestBuilder::Flush() {
|
||||
manifest_bytes_written_ = 0;
|
||||
manifest_chunks_written_ = 0;
|
||||
flushed_content_ids_.clear();
|
||||
if (!manifest_->has_root_dir()) {
|
||||
InitNewAsset("", AssetProto::DIRECTORY, manifest_->mutable_root_dir());
|
||||
}
|
||||
RETURN_IF_ERROR(FlushDir(manifest_->mutable_root_dir()));
|
||||
RETURN_IF_ERROR(WriteProto(*manifest_, &manifest_id_));
|
||||
return manifest_id_;
|
||||
}
|
||||
|
||||
ManifestBuilder::FileLookupMap ManifestBuilder::CreateFileLookup() {
|
||||
std::unordered_map<std::string, AssetProto*> lookup;
|
||||
CreateFileLookupRec(std::string(), manifest_->mutable_root_dir(), lookup);
|
||||
return lookup;
|
||||
}
|
||||
|
||||
void ManifestBuilder::CreateFileLookupRec(const std::string& rel_path,
|
||||
AssetProto* asset,
|
||||
FileLookupMap& lookup) {
|
||||
std::string rel_file_path = path::JoinUnix(rel_path, asset->name());
|
||||
if (asset->type() == AssetProto::FILE) {
|
||||
lookup[rel_file_path] = asset;
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle all direct assets.
|
||||
for (AssetProto& child : *asset->mutable_dir_assets())
|
||||
CreateFileLookupRec(rel_file_path, &child, lookup);
|
||||
|
||||
// Add all (loaded!) indirect assets as well.
|
||||
for (const ContentIdProto& id : asset->dir_indirect_assets()) {
|
||||
const auto iter = asset_lists_.find(id);
|
||||
if (iter == asset_lists_.end()) continue;
|
||||
AssetListProto* asset_list = iter->second;
|
||||
assert(asset_list);
|
||||
for (AssetProto& child : *asset_list->mutable_assets())
|
||||
CreateFileLookupRec(rel_file_path, &child, lookup);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns used CDC parameters
|
||||
CdcParamsProto ManifestBuilder::CdcParameters() const {
|
||||
return manifest_->cdc_params();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T* ManifestBuilder::MakeProto() {
|
||||
return google::protobuf::Arena::CreateMessage<T>(&arena_);
|
||||
}
|
||||
|
||||
} // namespace cdc_ft
|
||||
296
manifest/manifest_builder.h
Normal file
296
manifest/manifest_builder.h
Normal file
@@ -0,0 +1,296 @@
|
||||
/*
|
||||
* Copyright 2022 Google LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MANIFEST_MANIFEST_BUILDER_H_
|
||||
#define MANIFEST_MANIFEST_BUILDER_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <list>
|
||||
|
||||
#include "absl/status/statusor.h"
|
||||
#include "data_store/data_store_writer.h"
|
||||
#include "google/protobuf/arena.h"
|
||||
#include "manifest/asset_builder.h"
|
||||
#include "manifest/content_id.h"
|
||||
#include "manifest/manifest_proto_defs.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
// The ManifestBuilder class is used to create a manifest proto for the assets
|
||||
// (DIRECTORY, FILE, and SYMLINK) that are added incrementally. The proto is
|
||||
// finalized with a call to Flush(). When the CdcParamsProto given during
|
||||
// construction specifies an average chunk size, then the manifest will be split
|
||||
// into balanced chunks of at most this size.
|
||||
//
|
||||
// See (internal).
|
||||
class ManifestBuilder {
|
||||
public:
|
||||
// Default permission bits for new directories and files, respectively.
|
||||
static constexpr uint32_t kDefaultDirPerms = 0755u;
|
||||
static constexpr uint32_t kDefaultFilePerms = 0644u;
|
||||
|
||||
// Maps relative Unix file paths to the corresponding file asset proto.
|
||||
using FileLookupMap = std::unordered_map<std::string, AssetProto*>;
|
||||
|
||||
// Creates a new builder which reads from/writes to the given |data_store|.
|
||||
// The |cdc_params| are included in the resulting manifest proto and influence
|
||||
// the size of the manifest chunks which are written back to the
|
||||
// |chunk_store|.
|
||||
ManifestBuilder(CdcParamsProto cdc_params, DataStoreWriter* data_store);
|
||||
~ManifestBuilder();
|
||||
|
||||
// Loads the manifest identified by |manifest_id| from the data store. Returns
|
||||
// an absl::NotFoundError if the manifest ID does not exist or other errors if
|
||||
// is not a valid manifest proto.
|
||||
absl::Status LoadManifest(const ContentIdProto& manifest_id);
|
||||
|
||||
// Loads the manifest identified by the hexadecimal representation
|
||||
// |manifest_hex_id| from the data store. Returns an error if the string
|
||||
// representation is invalid or if the manifest ID does not exist or is not a
|
||||
// valid manifest proto.
|
||||
absl::Status LoadManifest(const std::string& manifest_hex_id);
|
||||
|
||||
// Returns the asset identified by the given Windows or Unix |path| or creates
|
||||
// a new one of type |type| if it does not exist yet. The |path| is relative
|
||||
// to the manifest's root directory. If the asset is created, any missing
|
||||
// directories in |path| that lead up to the asset are automatically
|
||||
// created as DIRECTORY assets with default permissions. Use a DIRECTORY
|
||||
// |type| with an empty |path| to retrieve the root directory asset.
|
||||
//
|
||||
// If an asset at |path| exists but is of different |type|, the outcome
|
||||
// depends on |force_create|. If this is set to false (the default), an
|
||||
// absl::AlreadyExistsError is returned. If it is set to true, the existing
|
||||
// asset is removed (recursively for directories) and a new asset with the
|
||||
// same name is created instead.
|
||||
//
|
||||
// When |created| is given, then it will be set to true if that asset was
|
||||
// actually added, otherwise it will be set to false.
|
||||
absl::StatusOr<AssetBuilder> GetOrCreateAsset(const std::string& path,
|
||||
AssetProto::Type type,
|
||||
bool force_create = false,
|
||||
bool* created = nullptr);
|
||||
|
||||
// Deletes the asset with the given |path|. If the asset is of type DIRECTORY,
|
||||
// the entire directory is deleted recursively. If no asset with this path
|
||||
// exists, the function returns success.
|
||||
absl::Status DeleteAsset(const std::string& path);
|
||||
|
||||
// Updates the manifest to reflect all changes that were done. Splits the
|
||||
// manifest into chunks of sizes as specified by the CdcParamsProto given
|
||||
// during construction.
|
||||
//
|
||||
// Calling this function might invalidate pointers to wrapped protos that were
|
||||
// returned by GetOrCreateAsset() or AssetBuilder methods.
|
||||
absl::StatusOr<ContentIdProto> Flush();
|
||||
|
||||
// Creates a lookup of relative Unix file paths to protos of all loaded
|
||||
// protos. The lookup does not contain unloaded indirect dir assets.
|
||||
FileLookupMap CreateFileLookup();
|
||||
|
||||
// Returns the content ID of the manifest which was valid after the last call
|
||||
// to Flush().
|
||||
const ContentIdProto& ManifestId() const;
|
||||
|
||||
// Gets the manifest proto which was valid after the last call to Flush().
|
||||
const ManifestProto* Manifest() const;
|
||||
|
||||
// Returns a list of the content IDs of all manifest chunks that have been
|
||||
// written back to the data store during the last call of Flush().
|
||||
const std::vector<ContentIdProto>& FlushedContentIds() const;
|
||||
|
||||
// Access statistics after Flush() about the manifest that was built.
|
||||
size_t ManifestBytesWritten() const;
|
||||
size_t ManifestsChunksWritten() const;
|
||||
|
||||
// Returns used CDC parameters
|
||||
CdcParamsProto CdcParameters() const;
|
||||
|
||||
private:
|
||||
// Map for storing loaded AssetListProtos by content ID. The protos are
|
||||
// allocated on the arena which owns the memory.
|
||||
using AssetListMap = std::unordered_map<ContentIdProto, AssetListProto*>;
|
||||
|
||||
// Clears all loaded and/or changed data and resets the statictics.
|
||||
void Reset();
|
||||
|
||||
// Decides if and how directories are created.
|
||||
enum class DirCreateMode {
|
||||
// No directories are created and absl::NotFoundError might be returned.
|
||||
kNoCreate,
|
||||
// Missing directories are created, but absl::InvalidArgumentError might be
|
||||
// returned in case a non-directory asset with the same name exists.
|
||||
kCreate,
|
||||
// Missing directories are created, any asset of a different type will be
|
||||
// replaced with a DIRECTORY asset.
|
||||
kForceCreate
|
||||
};
|
||||
|
||||
// Follows the given |path| components along DIRECTORY assets and returns the
|
||||
// final DIRECTORY on success.
|
||||
//
|
||||
// |create_dirs| determines if and when any missing DIRECTORY asset along the
|
||||
// way are created and what errors can be expected.
|
||||
absl::StatusOr<AssetProto*> FindOrCreateDirPath(
|
||||
const std::vector<absl::string_view>& path, DirCreateMode create_dirs);
|
||||
absl::StatusOr<AssetProto*> FindOrCreateDirPathRec(
|
||||
const std::vector<absl::string_view>& path, size_t path_idx,
|
||||
AssetProto* dir, DirCreateMode create_dirs);
|
||||
|
||||
// Searches for an asset with the given |name| in the given DIRECTORY asset.
|
||||
// Does not recurse into sub-directories. If no such asset is found, an
|
||||
// absl::NotFoundError is returned.
|
||||
absl::StatusOr<AssetProto*> FindAssetInDir(absl::string_view name,
|
||||
AssetProto* dir);
|
||||
|
||||
// Searches for an asset by its |name| in the given list of |assets|. If no
|
||||
// such asset is found, an absl::NotFoundError is returned.
|
||||
absl::StatusOr<AssetProto*> FindMutableAssetInList(
|
||||
absl::string_view name, RepeatedAssetProto* assets) const;
|
||||
|
||||
// Searches for an asset by its |name| and |type| in the given list of assets.
|
||||
// If no such asset is found, an absl::NotFoundError is returned.
|
||||
//
|
||||
// If an asset with that name exists of a different type, the outcome is
|
||||
// conditional on |overwrite|. If |overwrite| is true, then the existing
|
||||
// asset's type will be replaced with the given type and the asset is
|
||||
// returned. If |overwrite| is false, an absl::InvalidArgumentError is
|
||||
// returned.
|
||||
absl::StatusOr<AssetProto*> FindMutableAssetInList(
|
||||
absl::string_view name, AssetProto::Type type, bool overwrite,
|
||||
RepeatedAssetProto* assets) const;
|
||||
|
||||
// Deletes an asset with the given |name| in the given DIRECTORY asset. Does
|
||||
// not recurse into sub-directories. If no such asset is found, success is
|
||||
// returned.
|
||||
absl::Status DeleteAssetFromDir(absl::string_view name, AssetProto* dir);
|
||||
|
||||
// Deletes an asset by its |name| in the given list of |assets|. Returns true
|
||||
// if the asset was found and deleted, false otherwise.
|
||||
bool DeleteAssetFromList(absl::string_view name,
|
||||
RepeatedAssetProto* assets) const;
|
||||
|
||||
// Initializes the given empty asset as an asset of the given |type| with
|
||||
// default values for permissions and timestamps. Does not clear the proto or
|
||||
// reset any other fields.
|
||||
void InitNewAsset(absl::string_view name, AssetProto::Type type,
|
||||
AssetProto* asset) const;
|
||||
|
||||
// Retrieves the AssetListProto referenced by the given content |id|. If the
|
||||
// proto has been previously loaded, the stored (and potentially modified)
|
||||
// proto is returned. Otherwise, the proto is read from the chunk store.
|
||||
absl::StatusOr<AssetListProto*> GetAssetList(const ContentIdProto& id);
|
||||
|
||||
// Like GetAssetList(), but removes the AssetListProto from the |asset_lists_|
|
||||
// mapping.
|
||||
absl::StatusOr<AssetListProto*> TakeOutAssetList(const ContentIdProto& id);
|
||||
|
||||
// Convenience wrapper function for returning an error that the asset with the
|
||||
// given |name| did not match the |expected| asset type.
|
||||
absl::Status WrongAssetTypeError(absl::string_view name,
|
||||
AssetProto::Type found,
|
||||
AssetProto::Type expected) const;
|
||||
|
||||
// Flushes all pending information for |dir| and all sub-directories, enforces
|
||||
// the chunk size limit, updates the content IDs, and writes the chunks to the
|
||||
// chunk store.
|
||||
absl::Status FlushDir(AssetProto* dir);
|
||||
|
||||
// Flushes all DIRECTORY assets in the given list recursively.
|
||||
absl::Status FlushAssetList(RepeatedAssetProto* assets);
|
||||
|
||||
// Enforces the chunk size limit for the given DIRECTORY asset |dir|. Any
|
||||
// direct asset that does not fit is moved to the |overflow| list. Returns
|
||||
// true if at least one asset was moved, otherwise returns false.
|
||||
absl::Status EnforceDirProtoSize(AssetProto* dir,
|
||||
RepeatedAssetProto* overflow);
|
||||
|
||||
// Enforces the chunk size limit for the given FILE asset |file| to be at most
|
||||
// |max_size|. Any chunk that does not fit is moved to the file's indirect
|
||||
// chunk list.
|
||||
absl::Status EnforceFileProtoSize(AssetProto* file, size_t max_size);
|
||||
|
||||
// Enforces the chunk size limit for the given |asset_list|. Any asset that
|
||||
// does no longer fit is moved to the |overflow| list. Returns true if at
|
||||
// least one asset was moved, otherwise returns false.
|
||||
bool EnforceAssetListProtoSize(AssetListProto* asset_list,
|
||||
RepeatedAssetProto* overflow) const;
|
||||
|
||||
// Appends the given list of allocated |assets| to the DIRECTORY asset |dir|.
|
||||
// Ownership of the items in |assets| is passed on to |dir|.
|
||||
absl::Status AppendAllocatedIndirectAssets(AssetProto* dir,
|
||||
RepeatedAssetProto* assets);
|
||||
|
||||
// Writes the given AssetListProto to storage and updates |asset_list_id| with
|
||||
// the list's content ID. If the call succeeds, the |asset_lists_| map is
|
||||
// updated such that the resulting |asset_list_id| is referencing the
|
||||
// |asset_list|.
|
||||
absl::Status WriteBackAssetList(AssetListProto* asset_list,
|
||||
ContentIdProto* asset_list_id);
|
||||
|
||||
// Writes the given ChunkListProto |chunk_list| to storage and updates
|
||||
// |indirect_chunk_list| with the given |chunk_list_offset| and the resulting
|
||||
// content ID.
|
||||
absl::Status WriteBackChunkList(uint64_t chunk_list_offset,
|
||||
const ChunkListProto& chunk_list,
|
||||
IndirectChunkListProto* indirect_chunk_list);
|
||||
|
||||
// Wrapper around ChunkStore::WriteProto() which keeps track of chunks and
|
||||
// bytes written.
|
||||
absl::Status WriteProto(const google::protobuf::MessageLite& proto,
|
||||
ContentIdProto* content_id);
|
||||
|
||||
// Recursively iterates assets, adding all loaded file protos into |lookup|.
|
||||
// |rel_path| is the relative Unix directory path containing the |asset|.
|
||||
void CreateFileLookupRec(const std::string& rel_path, AssetProto* asset,
|
||||
FileLookupMap& lookup);
|
||||
|
||||
// Convenient wrapper to allocate a proto message on the arena.
|
||||
template <typename T>
|
||||
T* MakeProto();
|
||||
|
||||
// Constant overhead in bytes per repeated proto field.
|
||||
static constexpr size_t kRepeatedProtoFieldOverhead = 2;
|
||||
|
||||
// Data store to read and write manifest chunks.
|
||||
DataStoreWriter* data_store_;
|
||||
|
||||
// Content ID of the resulting manifest, updated in Flush().
|
||||
ContentIdProto manifest_id_;
|
||||
|
||||
// Content IDs of all manifest chunks that were written back to the data store
|
||||
// during the last call of Flush().
|
||||
std::vector<ContentIdProto> flushed_content_ids_;
|
||||
|
||||
// Holds the manifest proto under construction.
|
||||
ManifestProto* manifest_ = nullptr;
|
||||
|
||||
// CDC params used for the manifest.
|
||||
CdcParamsProto cdc_params_;
|
||||
|
||||
// List of AssetListProtos loaded from data_store_.
|
||||
AssetListMap asset_lists_;
|
||||
|
||||
// Useful stats.
|
||||
size_t manifest_bytes_written_ = 0;
|
||||
size_t manifest_chunks_written_ = 0;
|
||||
|
||||
// Arena for protos allocated by this builder.
|
||||
google::protobuf::Arena arena_;
|
||||
};
|
||||
|
||||
} // namespace cdc_ft
|
||||
|
||||
#endif // MANIFEST_MANIFEST_BUILDER_H_
|
||||
1138
manifest/manifest_builder_test.cc
Normal file
1138
manifest/manifest_builder_test.cc
Normal file
File diff suppressed because it is too large
Load Diff
163
manifest/manifest_iterator.cc
Normal file
163
manifest/manifest_iterator.cc
Normal file
@@ -0,0 +1,163 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "manifest/manifest_iterator.h"
|
||||
|
||||
#include <google/protobuf/text_format.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "common/errno_mapping.h"
|
||||
#include "common/log.h"
|
||||
#include "common/path.h"
|
||||
#include "common/status_macros.h"
|
||||
#include "manifest/content_id.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
// Holds the iteration state for an opened DIRECTORY asset.
|
||||
struct ManifestIterator::OpenedDirectory {
|
||||
OpenedDirectory(AssetProto* dir) : dir(dir) {}
|
||||
~OpenedDirectory() = default;
|
||||
|
||||
// The DIRECTORY proto that is being iterated over. The object is owned by the
|
||||
// parent OpenedDirectory struct.
|
||||
AssetProto* dir;
|
||||
|
||||
// Holds the currently loaded indirect asset list.
|
||||
std::unique_ptr<AssetListProto> asset_list;
|
||||
|
||||
// Index of the next direct asset to be returned from this directory. If the
|
||||
// index is equal to dir->dir_assets_size(), all direct assets have been
|
||||
// exhausted.
|
||||
int next_asset = 0;
|
||||
|
||||
// Index of the next indirect asset list to be read. If the index is equal to
|
||||
// dir->dir_indirect_assets_size(), all indirect asset lists have been
|
||||
// exhausted.
|
||||
int next_asset_list = 0;
|
||||
|
||||
// Index of the next asset of the currently loaded indirect asset list. If the
|
||||
// index is equal to asset_list->assets_size(), all assets in this list have
|
||||
// been exhausted.
|
||||
int next_asset_list_asset = 0;
|
||||
};
|
||||
|
||||
ManifestIterator::ManifestIterator(DataStoreReader* data_store)
|
||||
: last_opened_dir_(nullptr), data_store_(data_store) {
|
||||
assert(data_store_ != nullptr);
|
||||
}
|
||||
|
||||
ManifestIterator::~ManifestIterator() = default;
|
||||
|
||||
absl::Status ManifestIterator::Open(const ContentIdProto& manifest_id) {
|
||||
Reset();
|
||||
status_ = data_store_->GetProto(manifest_id, &manifest_);
|
||||
if (status_.ok()) dirs_.emplace_back(manifest_.mutable_root_dir());
|
||||
return status_;
|
||||
}
|
||||
|
||||
absl::Status ManifestIterator::Open(const std::string& manifest_file) {
|
||||
Reset();
|
||||
errno = 0;
|
||||
// Open input file.
|
||||
std::ifstream fin(manifest_file, std::ios_base::in | std::ios_base::binary);
|
||||
if (!fin) {
|
||||
std::string msg =
|
||||
absl::StrFormat("failed to open file '%s' for reading", manifest_file);
|
||||
if (errno) {
|
||||
status_ = ErrnoToCanonicalStatus(errno, msg);
|
||||
} else {
|
||||
status_ = absl::UnknownError(msg);
|
||||
}
|
||||
return status_;
|
||||
}
|
||||
// Parse proto.
|
||||
if (!manifest_.ParseFromIstream(&fin)) {
|
||||
status_ = absl::InternalError(absl::StrFormat(
|
||||
"failed to parse Manifest proto from file '%s'", manifest_file));
|
||||
return status_;
|
||||
}
|
||||
dirs_.emplace_back(manifest_.mutable_root_dir());
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
bool ManifestIterator::Valid() const { return !dirs_.empty() && status_.ok(); }
|
||||
|
||||
AssetProto* ManifestIterator::MutableAsset(RepeatedAssetProto* assets,
|
||||
int index) {
|
||||
AssetProto* asset_pb = assets->Mutable(index);
|
||||
// Recurse into sub-directories.
|
||||
if (asset_pb->type() == AssetProto::DIRECTORY) dirs_.emplace_back(asset_pb);
|
||||
return asset_pb;
|
||||
}
|
||||
|
||||
void ManifestIterator::UpdateRelPath(const OpenedDirectory* od) {
|
||||
if (last_opened_dir_ == od) return;
|
||||
rel_path_.resize(0);
|
||||
for (const auto& opened_dir : dirs_) {
|
||||
path::AppendUnix(&rel_path_, opened_dir.dir->name());
|
||||
}
|
||||
last_opened_dir_ = od;
|
||||
}
|
||||
|
||||
const AssetProto* ManifestIterator::NextEntry() {
|
||||
while (!dirs_.empty() && status_.ok()) {
|
||||
OpenedDirectory* od = &dirs_.back();
|
||||
UpdateRelPath(od);
|
||||
|
||||
// First, iterate over the direct assets.
|
||||
if (od->next_asset >= 0 && od->next_asset < od->dir->dir_assets_size()) {
|
||||
return MutableAsset(od->dir->mutable_dir_assets(), od->next_asset++);
|
||||
}
|
||||
|
||||
// Next, iterate over the currently loaded indirect asset list.
|
||||
assert(od->next_asset_list_asset >= 0);
|
||||
if (od->asset_list &&
|
||||
od->next_asset_list_asset < od->asset_list->assets_size()) {
|
||||
return MutableAsset(od->asset_list->mutable_assets(),
|
||||
od->next_asset_list_asset++);
|
||||
}
|
||||
|
||||
// Finally, load the next AssetListProto from the indirect assets.
|
||||
assert(od->next_asset_list >= 0);
|
||||
if (od->next_asset_list < od->dir->dir_indirect_assets_size()) {
|
||||
// Create the proto, if needed.
|
||||
if (!od->asset_list) od->asset_list = std::make_unique<AssetListProto>();
|
||||
// Read the AssetListProto from the chunk store.
|
||||
const ContentIdProto& asset_list_id =
|
||||
od->dir->dir_indirect_assets(od->next_asset_list++);
|
||||
od->next_asset_list_asset = 0;
|
||||
status_ = data_store_->GetProto(asset_list_id, od->asset_list.get());
|
||||
if (!status_.ok()) return nullptr;
|
||||
// Restart the loop to read the first asset from the list.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Nothing more to visit, we are done with this node.
|
||||
dirs_.pop_back();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void ManifestIterator::Reset() {
|
||||
dirs_.clear();
|
||||
last_opened_dir_ = nullptr;
|
||||
status_ = absl::OkStatus();
|
||||
rel_path_.resize(0);
|
||||
}
|
||||
|
||||
} // namespace cdc_ft
|
||||
94
manifest/manifest_iterator.h
Normal file
94
manifest/manifest_iterator.h
Normal file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Copyright 2022 Google LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MANIFEST_MANIFEST_ITERATOR_H_
|
||||
#define MANIFEST_MANIFEST_ITERATOR_H_
|
||||
|
||||
#include <list>
|
||||
|
||||
#include "data_store/data_store_reader.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
class ManifestIterator {
|
||||
public:
|
||||
// Constructs a new manifest iterator that can read a manifest proto from the
|
||||
// given |chunk_store|.
|
||||
explicit ManifestIterator(DataStoreReader* data_store);
|
||||
~ManifestIterator();
|
||||
|
||||
// Opens the manifest identified by |manifest_id| from the chunk store. If
|
||||
// this method returns an Ok() status, an AssetProto may be fetched by
|
||||
// calling NextEntry(). In case of an error, the value of Status() is
|
||||
// returned.
|
||||
absl::Status Open(const ContentIdProto& manifest_id);
|
||||
|
||||
// Opens the manifest stored in the file path given as |manifest_file|.
|
||||
// Further chunks will be read from the chunk store, if needed. If this method
|
||||
// returns an Ok() status, an AssetProto may be fetched by calling
|
||||
// NextEntry(). In case of an error, the value of Status() is returned.
|
||||
absl::Status Open(const std::string& manifest_file);
|
||||
|
||||
// Returns any error that might have occured so far.
|
||||
absl::Status Status() const { return status_; }
|
||||
|
||||
// Returns true as long as a manifest has been opened, no error has occured,
|
||||
// and a call to NextEntry() has a chance to succeed.
|
||||
bool Valid() const;
|
||||
|
||||
// Yields the next asset from the opened manifest. Returns nullptr in case of
|
||||
// an error or if no more assets are available. Check Status() to distinguish
|
||||
// between those two cases.
|
||||
//
|
||||
// Calling NextEntry() invalidates any references to objects returned by
|
||||
// previous calls to this function.
|
||||
const AssetProto* NextEntry();
|
||||
|
||||
// Returns the current relative path. This corresponds to the directory path
|
||||
// in which the asset returned from the last call to NextEntry() is located,
|
||||
// relative to the manifest root.
|
||||
const std::string& RelativePath() const { return rel_path_; }
|
||||
|
||||
// Returns a reference to the loaded manifest proto. Only valid after a
|
||||
// successful call to Open().
|
||||
const ManifestProto& Manifest() const { return manifest_; }
|
||||
|
||||
private:
|
||||
struct OpenedDirectory;
|
||||
|
||||
// Resets the iterator for a new Open() call.
|
||||
void Reset();
|
||||
|
||||
// Returns the AssetProto at |index| from the given list |assets|. If the
|
||||
// AssetProto is of type DIRECTORY, it is pushed on top of the stack of open
|
||||
// directories. Does not check if |index| is out-of-bounds.
|
||||
AssetProto* MutableAsset(RepeatedAssetProto* assets, int index);
|
||||
|
||||
// Updates the relative path according to the current stack of opened
|
||||
// directories.
|
||||
void UpdateRelPath(const OpenedDirectory* od);
|
||||
|
||||
ManifestProto manifest_;
|
||||
std::list<OpenedDirectory> dirs_;
|
||||
const OpenedDirectory* last_opened_dir_;
|
||||
std::string rel_path_;
|
||||
absl::Status status_;
|
||||
DataStoreReader* data_store_;
|
||||
};
|
||||
|
||||
} // namespace cdc_ft
|
||||
|
||||
#endif // MANIFEST_MANIFEST_ITERATOR_H_
|
||||
59
manifest/manifest_printer.cc
Normal file
59
manifest/manifest_printer.cc
Normal file
@@ -0,0 +1,59 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "manifest/manifest_printer.h"
|
||||
|
||||
#include "manifest/content_id.h"
|
||||
#include "manifest/manifest_proto_defs.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
// A special text proto printer that prints all ContentId protos using a
|
||||
// hexadecimal representation instead of octal-escaped string values.
|
||||
class ContentIdPrinter : public google::protobuf::TextFormat::MessagePrinter {
|
||||
public:
|
||||
ContentIdPrinter() = default;
|
||||
virtual ~ContentIdPrinter() = default;
|
||||
void Print(const google::protobuf::Message& message, bool single_line_mode,
|
||||
google::protobuf::TextFormat::BaseTextGenerator* generator)
|
||||
const override {
|
||||
const ContentIdProto* content_id =
|
||||
dynamic_cast<const ContentIdProto*>(&message);
|
||||
if (content_id) {
|
||||
generator->PrintLiteral("blake3_sum_160: \"");
|
||||
generator->PrintString(ContentId::ToHexString(*content_id));
|
||||
generator->PrintLiteral("\"");
|
||||
} else {
|
||||
// Technically, we should just call the inherited Print() function, but
|
||||
// this results in a linker error for unknown reasons. But since we are
|
||||
// never supposed to be called for any other message type, let's not
|
||||
// bother.
|
||||
generator->PrintLiteral("(given message is no ContentId proto)");
|
||||
}
|
||||
if (!single_line_mode) generator->PrintLiteral("\n");
|
||||
}
|
||||
};
|
||||
|
||||
ManifestPrinter::ManifestPrinter() {
|
||||
ContentIdPrinter* printer = new ContentIdPrinter;
|
||||
// If registration of a printer is successful, the callee takes ownership of
|
||||
// the object.
|
||||
if (!RegisterMessagePrinter(ContentIdProto::default_instance().descriptor(),
|
||||
printer)) {
|
||||
// Registration unsuccessful, delete the object.
|
||||
delete printer;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace cdc_ft
|
||||
42
manifest/manifest_printer.h
Normal file
42
manifest/manifest_printer.h
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright 2022 Google LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MANIFEST_MANIFEST_PRINTER_H_
|
||||
#define MANIFEST_MANIFEST_PRINTER_H_
|
||||
|
||||
#include <google/protobuf/text_format.h>
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
// This class prints manifest protos as text, but uses a hexadecimal
|
||||
// representation for all ContentId protos to make them human-readable.
|
||||
//
|
||||
// Usage:
|
||||
// AssetListProto pb;
|
||||
// // ...
|
||||
// ManifestPrinter printer;
|
||||
// std::string s;
|
||||
// printer.PrintToString(pb, s);
|
||||
// std::cout << s << std::endl;
|
||||
class ManifestPrinter : public google::protobuf::TextFormat::Printer {
|
||||
public:
|
||||
ManifestPrinter();
|
||||
virtual ~ManifestPrinter() = default;
|
||||
};
|
||||
|
||||
} // namespace cdc_ft
|
||||
|
||||
#endif // MANIFEST_MANIFEST_PRINTER_H_
|
||||
52
manifest/manifest_proto_defs.h
Normal file
52
manifest/manifest_proto_defs.h
Normal file
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright 2022 Google LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MANIFEST_MANIFEST_PROTO_DEFS_H_
|
||||
#define MANIFEST_MANIFEST_PROTO_DEFS_H_
|
||||
|
||||
#include "proto/manifest.pb.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
// Convenience typedefs to make the protos more easily accessible.
|
||||
using AssetListProto = proto::AssetList;
|
||||
using AssetProto = proto::Asset;
|
||||
using CdcParamsProto = proto::CdcParameters;
|
||||
using ChunkListProto = proto::ChunkList;
|
||||
using ChunkRefProto = proto::ChunkRef;
|
||||
using ContentIdProto = proto::ContentId;
|
||||
using IndirectChunkListProto = proto::IndirectChunkList;
|
||||
using ManifestProto = proto::Manifest;
|
||||
using RepeatedAssetProto = google::protobuf::RepeatedPtrField<AssetProto>;
|
||||
using RepeatedChunkRefProto = google::protobuf::RepeatedPtrField<ChunkRefProto>;
|
||||
using RepeatedContentIdProto =
|
||||
google::protobuf::RepeatedPtrField<ContentIdProto>;
|
||||
using RepeatedIndirectChunkListProto =
|
||||
google::protobuf::RepeatedPtrField<IndirectChunkListProto>;
|
||||
using RepeatedStringProto = google::protobuf::RepeatedPtrField<std::string>;
|
||||
|
||||
namespace proto {
|
||||
|
||||
inline bool operator==(const Asset& a, const Asset& b) {
|
||||
return a.SerializeAsString() == b.SerializeAsString();
|
||||
}
|
||||
|
||||
inline bool operator!=(const Asset& a, const Asset& b) { return !(a == b); }
|
||||
|
||||
} // namespace proto
|
||||
} // namespace cdc_ft
|
||||
|
||||
#endif // MANIFEST_MANIFEST_PROTO_DEFS_H_
|
||||
239
manifest/manifest_test_base.cc
Normal file
239
manifest/manifest_test_base.cc
Normal file
@@ -0,0 +1,239 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "manifest/manifest_test_base.h"
|
||||
|
||||
#include "common/path.h"
|
||||
#include "common/status_test_macros.h"
|
||||
#include "fastcdc/fastcdc.h"
|
||||
#include "manifest/manifest_iterator.h"
|
||||
#include "manifest/manifest_printer.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
namespace {
|
||||
// Helper function that tries to parse data as any of the protos written to the
|
||||
// store and returns its text proto representation.
|
||||
//
|
||||
// In order to disambiguate the proto auto-detection logic, you can temporarily
|
||||
// assign globally unique field numbers to all fields in manifest.proto.
|
||||
std::string ToTextProto(const ContentIdProto& content_id, const void* data,
|
||||
size_t size) {
|
||||
std::string text_proto;
|
||||
std::string proto_name = "(unknown proto format)";
|
||||
|
||||
ManifestProto manifest_pb;
|
||||
AssetListProto asset_list_pb;
|
||||
ChunkListProto chunk_list_pb;
|
||||
int isize = static_cast<int>(size);
|
||||
|
||||
ManifestPrinter printer;
|
||||
|
||||
if (size > 0) {
|
||||
if (manifest_pb.ParseFromArray(data, isize) &&
|
||||
!manifest_pb.GetReflection()
|
||||
->GetUnknownFields(manifest_pb)
|
||||
.field_count()) {
|
||||
printer.PrintToString(manifest_pb, &text_proto);
|
||||
proto_name = manifest_pb.GetTypeName();
|
||||
} else if (asset_list_pb.ParseFromArray(data, isize) &&
|
||||
!asset_list_pb.GetReflection()
|
||||
->GetUnknownFields(asset_list_pb)
|
||||
.field_count()) {
|
||||
printer.PrintToString(asset_list_pb, &text_proto);
|
||||
proto_name = asset_list_pb.GetTypeName();
|
||||
} else if (chunk_list_pb.ParseFromArray(data, isize) &&
|
||||
!chunk_list_pb.GetReflection()
|
||||
->GetUnknownFields(chunk_list_pb)
|
||||
.field_count()) {
|
||||
printer.PrintToString(chunk_list_pb, &text_proto);
|
||||
proto_name = chunk_list_pb.GetTypeName();
|
||||
}
|
||||
}
|
||||
return absl::StrFormat("# %s => %s (size: %d)\n%s",
|
||||
ContentId::ToHexString(content_id), proto_name, isize,
|
||||
text_proto);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// Prints an AssetInfo object.
|
||||
std::ostream& operator<<(std::ostream& os,
|
||||
const ManifestTestBase::AssetInfoForTest& ai) {
|
||||
os << "{.path = \"" << ai.info.path
|
||||
<< "\", .type = " << AssetProto::Type_Name(ai.info.type)
|
||||
<< ", .mtime = " << ai.info.mtime << ", .size = " << ai.info.size
|
||||
<< ", .in_progress = " << (ai.in_progress ? "true" : "false") << "}";
|
||||
return os;
|
||||
}
|
||||
|
||||
ManifestTestBase::ManifestTestBase(std::string base_dir)
|
||||
: ::testing::Test(), base_dir_(base_dir) {}
|
||||
|
||||
std::vector<ManifestTestBase::AssetInfoForTest>
|
||||
ManifestTestBase::GetAllManifestAssets(ContentIdProto actual_manifest_id) {
|
||||
ContentIdProto manifest_id;
|
||||
EXPECT_OK(data_store_.GetProto(manifest_store_id_, &manifest_id));
|
||||
EXPECT_EQ(manifest_id, actual_manifest_id);
|
||||
|
||||
ManifestIterator manifest_iter(&data_store_);
|
||||
EXPECT_OK(manifest_iter.Open(manifest_id));
|
||||
|
||||
std::vector<AssetInfoForTest> assets;
|
||||
const AssetProto* entry;
|
||||
while ((entry = manifest_iter.NextEntry()) != nullptr) {
|
||||
AssetInfoForTest ai;
|
||||
ai.info.path = path::JoinUnix(manifest_iter.RelativePath(), entry->name());
|
||||
ai.info.type = entry->type();
|
||||
ai.info.mtime = entry->mtime_seconds();
|
||||
ai.info.size = entry->file_size();
|
||||
ai.in_progress = entry->in_progress();
|
||||
assets.push_back(std::move(ai));
|
||||
}
|
||||
|
||||
EXPECT_OK(manifest_iter.Status());
|
||||
return assets;
|
||||
}
|
||||
|
||||
ManifestTestBase::AssetInfoForTest ManifestTestBase::MakeAssetInfo(
|
||||
const std::string& rel_path) {
|
||||
std::string full_path = path::Join(cfg_.src_dir, rel_path);
|
||||
path::Stats stats;
|
||||
EXPECT_OK(path::GetStats(full_path, &stats));
|
||||
// Don't use the stats.modified_time as this returns timestamps in the
|
||||
// machine's local time, whereas GetFileTime() returns UTC time.
|
||||
time_t mtime;
|
||||
EXPECT_OK(path::GetFileTime(full_path, &mtime));
|
||||
|
||||
AssetInfoForTest ai;
|
||||
ai.info.path = rel_path;
|
||||
ai.info.type =
|
||||
stats.mode & path::MODE_IFDIR ? AssetProto::DIRECTORY : AssetProto::FILE;
|
||||
ai.info.mtime = static_cast<int64_t>(mtime);
|
||||
ai.info.size = ai.info.type == AssetProto::DIRECTORY ? 0 : stats.size;
|
||||
return ai;
|
||||
}
|
||||
|
||||
std::vector<ManifestTestBase::AssetInfoForTest>
|
||||
ManifestTestBase::MakeAssetInfos(std::initializer_list<std::string> rel_paths) {
|
||||
std::vector<AssetInfoForTest> ais;
|
||||
for (const std::string& rel_path : rel_paths) {
|
||||
ais.push_back(MakeAssetInfo(rel_path));
|
||||
}
|
||||
return ais;
|
||||
}
|
||||
|
||||
ManifestUpdater::OperationList* ManifestTestBase::MakeOps(
|
||||
Operator op, std::initializer_list<std::string> rel_paths) {
|
||||
ops_.clear();
|
||||
ops_.reserve(rel_paths.size());
|
||||
for (const auto& rel_path : rel_paths) {
|
||||
ops_.emplace_back(op, MakeAssetInfo(rel_path).info);
|
||||
}
|
||||
return &ops_;
|
||||
}
|
||||
|
||||
ManifestUpdater::OperationList* ManifestTestBase::MakeDeleteOps(
|
||||
std::initializer_list<std::string> rel_paths) {
|
||||
return MakeOps(Operator::kDelete, rel_paths);
|
||||
}
|
||||
|
||||
ManifestUpdater::OperationList* ManifestTestBase::MakeUpdateOps(
|
||||
std::initializer_list<std::string> rel_paths) {
|
||||
return MakeOps(Operator::kUpdate, rel_paths);
|
||||
}
|
||||
|
||||
void ManifestTestBase::ExpectAssetInfosEqual(std::vector<AssetInfoForTest> a,
|
||||
std::vector<AssetInfoForTest> b,
|
||||
bool equal) {
|
||||
std::sort(a.begin(), a.end());
|
||||
std::sort(b.begin(), b.end());
|
||||
if (equal) {
|
||||
EXPECT_EQ(a, b);
|
||||
} else {
|
||||
EXPECT_NE(a, b);
|
||||
}
|
||||
}
|
||||
|
||||
void ManifestTestBase::ExpectManifestEquals(
|
||||
std::initializer_list<std::string> rel_paths,
|
||||
const ContentIdProto& actual_manifest_id) {
|
||||
std::vector<AssetInfoForTest> manifest_ais =
|
||||
GetAllManifestAssets(actual_manifest_id);
|
||||
std::vector<AssetInfoForTest> expected_ais = MakeAssetInfos(rel_paths);
|
||||
ExpectAssetInfosEqual(manifest_ais, expected_ais);
|
||||
}
|
||||
|
||||
bool ManifestTestBase::InProgress(const ContentIdProto& manifest_id,
|
||||
const char* path) {
|
||||
// Special case: the root directory is not returned by the manifest iterator.
|
||||
if (absl::string_view(path) == "") {
|
||||
ManifestProto manifest;
|
||||
EXPECT_OK(data_store_.GetProto(manifest_id, &manifest));
|
||||
return manifest.root_dir().in_progress();
|
||||
}
|
||||
|
||||
ManifestIterator manifest_iter(&data_store_);
|
||||
EXPECT_OK(manifest_iter.Open(manifest_id));
|
||||
if (!manifest_iter.Status().ok()) return false;
|
||||
|
||||
const AssetProto* entry;
|
||||
while ((entry = manifest_iter.NextEntry()) != nullptr) {
|
||||
if (path == path::JoinUnix(manifest_iter.RelativePath(), entry->name()))
|
||||
return entry->in_progress();
|
||||
}
|
||||
|
||||
EXPECT_TRUE(false) << "'" << path << "' not found in manifest";
|
||||
return false;
|
||||
}
|
||||
|
||||
void ManifestTestBase::ValidateChunkLookup(const std::string& rel_path,
|
||||
bool expect_contained) {
|
||||
uint64_t offset = 0;
|
||||
auto handler = [&offset, &rel_path, file_chunks = &file_chunks_,
|
||||
expect_contained](const void* data, size_t size) {
|
||||
ContentIdProto id = ContentId::FromArray(data, size);
|
||||
|
||||
std::string lookup_path;
|
||||
uint64_t lookup_offset = 0;
|
||||
uint32_t lookup_size = 0;
|
||||
EXPECT_EQ(
|
||||
file_chunks->Lookup(id, &lookup_path, &lookup_offset, &lookup_size),
|
||||
expect_contained);
|
||||
if (expect_contained) {
|
||||
EXPECT_EQ(lookup_path, rel_path);
|
||||
EXPECT_EQ(lookup_offset, offset);
|
||||
EXPECT_EQ(lookup_size, size);
|
||||
}
|
||||
|
||||
offset += size;
|
||||
};
|
||||
fastcdc::Config cdc_cfg(cfg_.min_chunk_size, cfg_.avg_chunk_size,
|
||||
cfg_.max_chunk_size);
|
||||
fastcdc::Chunker chunker(cdc_cfg, handler);
|
||||
|
||||
Buffer b;
|
||||
EXPECT_OK(path::ReadFile(path::Join(cfg_.src_dir, rel_path), &b));
|
||||
chunker.Process(reinterpret_cast<uint8_t*>(b.data()), b.size());
|
||||
chunker.Finalize();
|
||||
}
|
||||
|
||||
std::string ManifestTestBase::DumpDataStoreProtos() const {
|
||||
std::string s;
|
||||
for (const auto& [content_id, chunk] : data_store_.Chunks()) {
|
||||
s += ToTextProto(content_id, chunk.data(), chunk.size());
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
} // namespace cdc_ft
|
||||
155
manifest/manifest_test_base.h
Normal file
155
manifest/manifest_test_base.h
Normal file
@@ -0,0 +1,155 @@
|
||||
/*
|
||||
* Copyright 2022 Google LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MANIFEST_MANIFEST_TEST_BASE_H_
|
||||
#define MANIFEST_MANIFEST_TEST_BASE_H_
|
||||
|
||||
#include <initializer_list>
|
||||
|
||||
#include "data_store/mem_data_store.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "manifest/file_chunk_map.h"
|
||||
#include "manifest/manifest_updater.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
// Test helper class to compare expected and actual manifests.
|
||||
class ManifestTestBase : public ::testing::Test {
|
||||
public:
|
||||
struct AssetInfoForTest {
|
||||
AssetInfo info;
|
||||
bool in_progress = false;
|
||||
|
||||
bool operator==(const AssetInfoForTest& other) const {
|
||||
return info == other.info && in_progress == other.in_progress;
|
||||
}
|
||||
|
||||
bool operator!=(const AssetInfoForTest& other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
// Compares by file path.
|
||||
bool operator<(const AssetInfoForTest& other) const {
|
||||
return info.path < other.info.path;
|
||||
}
|
||||
};
|
||||
|
||||
explicit ManifestTestBase(std::string base_dir);
|
||||
~ManifestTestBase() = default;
|
||||
|
||||
protected:
|
||||
using Operation = ManifestUpdater::Operation;
|
||||
using Operator = ManifestUpdater::Operator;
|
||||
|
||||
// Returns the list of assets in the manifest stored in |data_store_|.
|
||||
std::vector<AssetInfoForTest> GetAllManifestAssets(
|
||||
ContentIdProto actual_manifest_id);
|
||||
|
||||
// Creates AssetInfo from the real files at |rel_path|.
|
||||
// The path is relative to |cfg_.src_dir|.
|
||||
AssetInfoForTest MakeAssetInfo(const std::string& rel_path);
|
||||
|
||||
// Creates AssetInfos from the real files at |rel_paths|.
|
||||
// The paths are relative to |cfg_.src_dir|.
|
||||
std::vector<AssetInfoForTest> MakeAssetInfos(
|
||||
std::initializer_list<std::string> rel_paths);
|
||||
|
||||
// Creates |op| operations for the given list of file paths.
|
||||
// The paths are relative to |cfg_.src_dir|.
|
||||
ManifestUpdater::OperationList* MakeOps(
|
||||
Operator op, std::initializer_list<std::string> rel_paths);
|
||||
|
||||
// Creates kDelete operations for the given list of file paths.
|
||||
// The paths are relative to |cfg_.src_dir|.
|
||||
ManifestUpdater::OperationList* MakeDeleteOps(
|
||||
std::initializer_list<std::string> rel_paths);
|
||||
|
||||
// Creates kUpdate operations from the real files at |rel_paths|.
|
||||
// The paths are relative to |cfg_.src_dir|.
|
||||
ManifestUpdater::OperationList* MakeUpdateOps(
|
||||
std::initializer_list<std::string> rel_paths);
|
||||
|
||||
// Expects that |a| and |b| are (not) equal, independently of order.
|
||||
void ExpectAssetInfosEqual(std::vector<AssetInfoForTest> a,
|
||||
std::vector<AssetInfoForTest> b,
|
||||
bool equal = true);
|
||||
|
||||
// Compares the contents of the manifest to the real files at |rel_paths|.
|
||||
// The paths are relative to |cfg_.src_dir|.
|
||||
void ExpectManifestEquals(std::initializer_list<std::string> rel_paths,
|
||||
const ContentIdProto& actual_manifest_id);
|
||||
|
||||
// Returns true if the file at Unix |path| contains file chunks in the
|
||||
// manifest referenced by |manifest_id|.
|
||||
// Expects the file to be present.
|
||||
bool InProgress(const ContentIdProto& manifest_id, const char* path);
|
||||
|
||||
// Validates that all file chunks in the file at |rel_path| are present in
|
||||
// |file_chunks_| if |expect_contained| is true. Otherwise, validates that
|
||||
// none of the chunks are present.
|
||||
void ValidateChunkLookup(const std::string& rel_path, bool expect_contained);
|
||||
|
||||
// Tries to parse all stored data chunks as manifest protos and formats them
|
||||
// as text protos. In order to disambiguate the proto auto-detection logic,
|
||||
// you can temporarily assign globally unique field numbers to all fields in
|
||||
// manifest.proto.
|
||||
//
|
||||
// Sample output:
|
||||
//
|
||||
// # aa8bef577a9af66e9330140c394e5fce557bd677 =>
|
||||
// cdc_ft.proto.Manifest (size: 48)
|
||||
// root_dir {
|
||||
// type: DIRECTORY
|
||||
// mtime_seconds: 1663935163
|
||||
// permissions: 493
|
||||
// dir_indirect_assets {
|
||||
// blake3_sum_160: "27b0cd2923714d143f32ec5394a02421fc89f5bc"
|
||||
// }
|
||||
// }
|
||||
// cdc_params {
|
||||
// min_chunk_size: 8
|
||||
// avg_chunk_size: 16
|
||||
// max_chunk_size: 32
|
||||
// }
|
||||
// # 27b0cd2923714d143f32ec5394a02421fc89f5bc =>
|
||||
// cdc_ft.proto.AssetList (size: 52)
|
||||
// assets {
|
||||
// name: "a.txt"
|
||||
// type: FILE
|
||||
// mtime_seconds: 1653999616
|
||||
// permissions: 420
|
||||
// file_size: 8
|
||||
// file_chunks {
|
||||
// chunk_id {
|
||||
// blake3_sum_160: "b1e57baceafdc3b03ab5189cb245757799874fbf"
|
||||
// }
|
||||
// }
|
||||
// in_progress: true
|
||||
// }
|
||||
std::string DumpDataStoreProtos() const;
|
||||
|
||||
std::string base_dir_;
|
||||
MemDataStore data_store_;
|
||||
UpdaterConfig cfg_;
|
||||
|
||||
FileChunkMap file_chunks_{/*enable_stats=*/false};
|
||||
ManifestUpdater::OperationList ops_;
|
||||
ContentIdProto manifest_store_id_ = ManifestUpdater::GetManifestStoreId();
|
||||
};
|
||||
|
||||
} // namespace cdc_ft
|
||||
|
||||
#endif // MANIFEST_MANIFEST_TEST_BASE_H_
|
||||
816
manifest/manifest_updater.cc
Normal file
816
manifest/manifest_updater.cc
Normal file
@@ -0,0 +1,816 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "manifest/manifest_updater.h"
|
||||
|
||||
#include <future>
|
||||
#include <thread>
|
||||
|
||||
#include "absl/strings/match.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "common/log.h"
|
||||
#include "common/path.h"
|
||||
#include "common/stopwatch.h"
|
||||
#include "common/threadpool.h"
|
||||
#include "common/util.h"
|
||||
#include "data_store/data_store_writer.h"
|
||||
#include "fastcdc/fastcdc.h"
|
||||
#include "manifest/asset_builder.h"
|
||||
#include "manifest/file_chunk_map.h"
|
||||
#include "manifest/manifest_builder.h"
|
||||
#include "manifest/manifest_iterator.h"
|
||||
#include "manifest/manifest_proto_defs.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
namespace {
|
||||
|
||||
// Returns AssetInfos for all files and dirs in |src_dir| + |rel_path|. Does not
|
||||
// recurse into sub-directories.
|
||||
absl::Status GetAllSrcAssets(const std::string& src_dir,
|
||||
const std::string& rel_path,
|
||||
std::vector<AssetInfo>* src_assets) {
|
||||
std::string full_src_dir = path::Join(src_dir, rel_path);
|
||||
|
||||
path::EnsureEndsWithPathSeparator(&full_src_dir);
|
||||
auto handler = [src_assets, &src_dir = full_src_dir,
|
||||
rel_path = path::ToUnix(rel_path)](
|
||||
const std::string& dir, const std::string& filename,
|
||||
int64_t mtime, uint64_t size, bool is_dir) {
|
||||
AssetInfo ai;
|
||||
ai.path = path::JoinUnix(rel_path, filename);
|
||||
ai.type = is_dir ? AssetProto::DIRECTORY : AssetProto::FILE;
|
||||
ai.mtime = mtime;
|
||||
ai.size = is_dir ? 0 : size;
|
||||
src_assets->push_back(std::move(ai));
|
||||
return absl::OkStatus();
|
||||
};
|
||||
#if PLATFORM_WINDOWS
|
||||
// Windows expects a globbing pattern to search a path.
|
||||
std::string src_pattern = path::Join(full_src_dir, "*");
|
||||
#else
|
||||
std::string src_pattern = src_dir;
|
||||
#endif
|
||||
absl::Status status =
|
||||
path::SearchFiles(src_pattern, /*recursive=*/false, handler);
|
||||
std::sort(src_assets->begin(), src_assets->end());
|
||||
return status;
|
||||
}
|
||||
|
||||
// Creates a fastcdc::Config struct from a CdcParamsProto.
|
||||
fastcdc::Config CdcConfigFromProto(const CdcParamsProto& cfg_pb) {
|
||||
return fastcdc::Config(cfg_pb.min_chunk_size(), cfg_pb.avg_chunk_size(),
|
||||
cfg_pb.max_chunk_size());
|
||||
}
|
||||
|
||||
// Checks if a given CdcParamsProto is sane and can be used for FastCDC.
|
||||
bool ValidateCdcParams(const CdcParamsProto& params) {
|
||||
return params.min_chunk_size() <= params.avg_chunk_size() &&
|
||||
params.avg_chunk_size() <= params.max_chunk_size() &&
|
||||
params.max_chunk_size() > 0;
|
||||
}
|
||||
|
||||
// Returns the max. number of tasks that should be enqueued in the given thread
|
||||
// pool.
|
||||
size_t MaxQueuedTasks(const Threadpool& pool) { return pool.NumThreads() << 1; }
|
||||
|
||||
} // namespace
|
||||
|
||||
void AssetInfo::AppendCopyChunks(const RepeatedChunkRefProto& list,
|
||||
uint64_t list_offset) {
|
||||
chunks.reserve(chunks.size() + list.size());
|
||||
for (const ChunkRefProto& ch : list)
|
||||
chunks.emplace_back(ch.chunk_id(), ch.offset() + list_offset);
|
||||
}
|
||||
|
||||
void AssetInfo::AppendMoveChunks(RepeatedChunkRefProto* list,
|
||||
uint64_t list_offset) {
|
||||
chunks.reserve(chunks.size() + list->size());
|
||||
for (ChunkRefProto& ch : *list)
|
||||
chunks.emplace_back(std::move(*ch.mutable_chunk_id()),
|
||||
ch.offset() + list_offset);
|
||||
}
|
||||
|
||||
// Common fields for tasks that fill in manifest data.
|
||||
class ManifestTask : public Task {
|
||||
public:
|
||||
ManifestTask(std::string src_dir, std::string relative_unix_path,
|
||||
std::string filename)
|
||||
: src_dir_(std::move(src_dir)),
|
||||
rel_unix_path_(std::move(relative_unix_path)),
|
||||
filename_(std::move(filename)) {}
|
||||
|
||||
// Relative unix path of the directory containing the file or directory for
|
||||
// this task.
|
||||
const std::string& RelativeUnixPath() const { return rel_unix_path_; }
|
||||
|
||||
// Relative unix path of the file or directory for this task.
|
||||
std::string RelativeUnixFilePath() const {
|
||||
return path::JoinUnix(rel_unix_path_, filename_);
|
||||
}
|
||||
|
||||
// Name of the file or directory to process with this task.
|
||||
const std::string& Filename() const { return filename_; }
|
||||
|
||||
// Full path of the file or directory to process with this task.
|
||||
std::string FilePath() const {
|
||||
return path::Join(src_dir_, path::ToNative(rel_unix_path_), filename_);
|
||||
}
|
||||
|
||||
// Returns the final status of the task.
|
||||
// Should not be accessed before the task is finished.
|
||||
const absl::Status& Status() const { return status_; }
|
||||
|
||||
protected:
|
||||
const std::string src_dir_;
|
||||
const std::string rel_unix_path_;
|
||||
const std::string filename_;
|
||||
|
||||
absl::Status status_;
|
||||
};
|
||||
|
||||
// ThreadPool task that runs the CDC chunker on a given file.
|
||||
class FileChunkerTask : public ManifestTask {
|
||||
public:
|
||||
FileChunkerTask(std::string src_dir, std::string relative_path,
|
||||
std::string filename, const fastcdc::Config* cfg,
|
||||
Buffer buffer)
|
||||
: ManifestTask(std::move(src_dir), std::move(relative_path),
|
||||
std::move(filename)),
|
||||
cfg_(cfg),
|
||||
buffer_(std::move(buffer)) {
|
||||
assert(cfg_->max_size > 0);
|
||||
}
|
||||
|
||||
// Returns the number of bytes processed. Should match file size unless some
|
||||
// error occurred.
|
||||
// Should not be accessed before the task is finished.
|
||||
uint64_t ProcessedBytes() const { return processed_bytes_; }
|
||||
|
||||
// True if the file looks like a Linux executable based on elf/shebang magic
|
||||
// headers.
|
||||
// Should not be accessed before the task is finished.
|
||||
bool IsExecutable() const { return is_executable_; }
|
||||
|
||||
// Returns the chunk hashes and offsets.
|
||||
// Should not be accessed before the task is finished.
|
||||
google::protobuf::RepeatedPtrField<ChunkRefProto>* Chunks() {
|
||||
return &chunks_;
|
||||
}
|
||||
|
||||
// Releases the allocated buffer and returns it to the caller.
|
||||
Buffer&& ReleaseBuffer() { return std::move(buffer_); }
|
||||
|
||||
// Task:
|
||||
void ThreadRun(IsCancelledPredicate is_cancelled) override {
|
||||
// TODO: Retry with backoff if this fails in practice, e.g. if the file is
|
||||
// changed repeatedly.
|
||||
std::string file_path = FilePath();
|
||||
absl::StatusOr<FILE*> file = path::OpenFile(file_path, "rb");
|
||||
if (!file.ok()) {
|
||||
status_ =
|
||||
WrapStatus(file.status(), "Failed to open file '%s'", file_path);
|
||||
return;
|
||||
}
|
||||
path::FileCloser closer(*file);
|
||||
|
||||
auto chunk_handler = [chunks = &chunks_, offset = &processed_bytes_](
|
||||
const void* data, size_t size) {
|
||||
ChunkRefProto* chunk = chunks->Add();
|
||||
*chunk->mutable_chunk_id() = ContentId::FromArray(data, size);
|
||||
chunk->set_offset(*offset);
|
||||
*offset += size;
|
||||
};
|
||||
fastcdc::Chunker chunker(*cfg_, chunk_handler);
|
||||
|
||||
bool first_chunk = true;
|
||||
auto stream_handler = [&chunker, &is_cancelled, &first_chunk,
|
||||
is_executable = &is_executable_,
|
||||
&file_path](const void* data, size_t size) {
|
||||
chunker.Process(static_cast<const uint8_t*>(data), size);
|
||||
if (first_chunk) {
|
||||
first_chunk = false;
|
||||
*is_executable = Util::IsExecutable(data, size);
|
||||
}
|
||||
return is_cancelled() ? absl::CancelledError(absl::StrFormat(
|
||||
"chunking file '%s' cancelled", file_path))
|
||||
: absl::OkStatus();
|
||||
};
|
||||
|
||||
status_ = path::StreamReadFileContents(*file, &buffer_, stream_handler);
|
||||
chunker.Finalize();
|
||||
}
|
||||
|
||||
private:
|
||||
const fastcdc::Config* const cfg_;
|
||||
|
||||
google::protobuf::RepeatedPtrField<ChunkRefProto> chunks_;
|
||||
uint64_t processed_bytes_ = 0;
|
||||
bool is_executable_ = false;
|
||||
Buffer buffer_;
|
||||
};
|
||||
|
||||
// ThreadPool task that creates assets for the contents of a directory.
|
||||
class DirScannerTask : public ManifestTask {
|
||||
public:
|
||||
DirScannerTask(std::string src_dir, std::string relative_path,
|
||||
std::string filename, AssetBuilder dir,
|
||||
DataStoreReader* data_store)
|
||||
: ManifestTask(std::move(src_dir), std::move(relative_path),
|
||||
std::move(filename)),
|
||||
dir_(dir),
|
||||
data_store_(data_store) {}
|
||||
|
||||
// Task:
|
||||
void ThreadRun(IsCancelledPredicate is_cancelled) override {
|
||||
std::vector<AssetInfo> src_assets, manifest_assets;
|
||||
// Collect all files from the given directory.
|
||||
status_ = GetAllSrcAssets(src_dir_, path::ToNative(RelativeUnixFilePath()),
|
||||
&src_assets);
|
||||
if (!status_.ok()) return;
|
||||
// Collect all assets from the manifest.
|
||||
status_ = GetAllAssetsFromDirAsset(&manifest_assets, is_cancelled);
|
||||
if (!status_.ok()) return;
|
||||
CompareAssets(src_assets, manifest_assets);
|
||||
if (is_cancelled()) status_ = absl::CancelledError();
|
||||
}
|
||||
|
||||
// Returns the IDs of indirect lists that were fetched when executing this
|
||||
// task.
|
||||
std::vector<ContentIdProto>* ManifestContentIds() {
|
||||
return &manifest_content_ids_;
|
||||
}
|
||||
|
||||
// Returns the AssetBuilder representing the directory this task is scanning.
|
||||
AssetBuilder* Dir() { return &dir_; }
|
||||
|
||||
// Returns the list of assets that need to be added or updated in the
|
||||
// directory that this task was scanning.
|
||||
ManifestUpdater::OperationList* Operations() { return &operations_; }
|
||||
|
||||
private:
|
||||
using Operator = ManifestUpdater::Operator;
|
||||
|
||||
// Stores AssetInfo structs for all assets found in |assets| in the
|
||||
// target param |asset_infos|.
|
||||
void GetAssetInfosFromList(const std::string& rel_path,
|
||||
const RepeatedAssetProto& assets,
|
||||
std::vector<AssetInfo>* asset_infos) {
|
||||
asset_infos->reserve(asset_infos->size() + assets.size());
|
||||
|
||||
for (const AssetProto& asset : assets) {
|
||||
AssetInfo ai;
|
||||
ai.path = path::JoinUnix(rel_path, asset.name());
|
||||
ai.type = asset.type();
|
||||
ai.mtime = asset.mtime_seconds();
|
||||
ai.size = asset.type() == AssetProto::DIRECTORY ? 0 : asset.file_size();
|
||||
|
||||
if (asset.type() == AssetProto::FILE) {
|
||||
// Copy chunks from the direct chunk list.
|
||||
ai.AppendCopyChunks(asset.file_chunks(), 0);
|
||||
|
||||
// Append all chunk IDs from indirect chunk lists.
|
||||
for (const IndirectChunkListProto& icl : asset.file_indirect_chunks()) {
|
||||
ChunkListProto chunk_list;
|
||||
absl::Status status =
|
||||
data_store_->GetProto(icl.chunk_list_id(), &chunk_list);
|
||||
if (!status.ok()) {
|
||||
// Pretend the file is empty.
|
||||
ai.chunks.clear();
|
||||
// Log a warning and continue so that the file is re-added and
|
||||
// corrected.
|
||||
LOG_WARNING(
|
||||
"Can't read indirect chunk list for file '%s': %s. The "
|
||||
"affected asset will be updated from disk.",
|
||||
ai.path, status.ToString());
|
||||
break;
|
||||
}
|
||||
ai.AppendMoveChunks(chunk_list.mutable_chunks(), icl.offset());
|
||||
// Collect the content IDs of all indirect chunk lists.
|
||||
manifest_content_ids_.push_back(icl.chunk_list_id());
|
||||
}
|
||||
}
|
||||
|
||||
asset_infos->emplace_back(std::move(ai));
|
||||
}
|
||||
}
|
||||
|
||||
// Collects all assets from the manifest directory at RelativeUnixFilePath()
|
||||
// and adds corresponding AssetInfo structs to |asset_infos|.
|
||||
absl::Status GetAllAssetsFromDirAsset(std::vector<AssetInfo>* asset_infos,
|
||||
IsCancelledPredicate is_cancelled) {
|
||||
// Collect all direct assets from the manifest.
|
||||
std::string rel_path = dir_.RelativeFilePath();
|
||||
GetAssetInfosFromList(rel_path, dir_.Proto()->dir_assets(), asset_infos);
|
||||
// Load all indirect asset lists, if there are any.
|
||||
if (dir_.Proto()->dir_indirect_assets_size() > 0) {
|
||||
auto it = dir_.Proto()->mutable_dir_indirect_assets()->begin();
|
||||
while (it != dir_.Proto()->mutable_dir_indirect_assets()->end()) {
|
||||
if (is_cancelled()) return absl::CancelledError();
|
||||
|
||||
AssetListProto list;
|
||||
absl::Status status = data_store_->GetProto(*it, &list);
|
||||
if (status.ok()) {
|
||||
GetAssetInfosFromList(rel_path, list.assets(), asset_infos);
|
||||
// Collect the content IDs of all indirect asset lists.
|
||||
manifest_content_ids_.push_back(*it);
|
||||
++it;
|
||||
} else {
|
||||
// In case of an error, log a warning and continue.
|
||||
LOG_WARNING(
|
||||
"Can't read indirect asset list for directory '%s': %s. The "
|
||||
"affected assets will be updated from disk.",
|
||||
rel_path, status.ToString());
|
||||
it = dir_.Proto()->mutable_dir_indirect_assets()->erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
std::sort(asset_infos->begin(), asset_infos->end());
|
||||
return is_cancelled() ? absl::CancelledError() : absl::OkStatus();
|
||||
}
|
||||
|
||||
// Both |srcs_assets| and |manifest_assets| must be sorted.
|
||||
void CompareAssets(const std::vector<AssetInfo>& src_assets,
|
||||
const std::vector<AssetInfo>& manifest_assets) {
|
||||
// Compare the arrays, sorting the assets into the right buckets.
|
||||
auto src_iter = src_assets.begin();
|
||||
auto manifest_iter = manifest_assets.begin();
|
||||
|
||||
while (src_iter != src_assets.end() ||
|
||||
manifest_iter != manifest_assets.end()) {
|
||||
const int order = src_iter == src_assets.end()
|
||||
? 1 // Extraneous manifest asset.
|
||||
: manifest_iter == manifest_assets.end()
|
||||
? -1 // Missing/outdated manifest asset.
|
||||
: src_iter->path.compare(manifest_iter->path);
|
||||
|
||||
if (order < 0) {
|
||||
// Missing manifest file -> add to manifest.
|
||||
operations_.emplace_back(Operator::kAdd, std::move(*src_iter));
|
||||
++src_iter;
|
||||
} else if (order > 0) {
|
||||
// Extraneous manifest asset -> delete.
|
||||
operations_.emplace_back(Operator::kDelete, std::move(*manifest_iter));
|
||||
++manifest_iter;
|
||||
} else if (src_iter->mtime == manifest_iter->mtime &&
|
||||
src_iter->type == manifest_iter->type &&
|
||||
// For files, compare the size.
|
||||
(src_iter->type != AssetProto::FILE ||
|
||||
src_iter->size == manifest_iter->size) &&
|
||||
// Directories always need to be updated recursively.
|
||||
src_iter->type != AssetProto::DIRECTORY) {
|
||||
// Assets match, keep content IDs from the manifest asset for populating
|
||||
// the FileChunkMap.
|
||||
operations_.emplace_back(Operator::kKeep, std::move(*manifest_iter));
|
||||
++src_iter;
|
||||
++manifest_iter;
|
||||
} else {
|
||||
// Source asset changed -> update manifest asset.
|
||||
operations_.emplace_back(Operator::kUpdate, std::move(*src_iter));
|
||||
++src_iter;
|
||||
++manifest_iter;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DataStoreReader* data_store_;
|
||||
AssetBuilder dir_;
|
||||
std::vector<ContentIdProto> manifest_content_ids_;
|
||||
ManifestUpdater::OperationList operations_;
|
||||
};
|
||||
|
||||
// static
|
||||
ContentIdProto ManifestUpdater::GetManifestStoreId() {
|
||||
ContentIdProto manifest_store_id;
|
||||
ContentId::FromHexString("0000000000000000000000000000000000000000",
|
||||
&manifest_store_id);
|
||||
return manifest_store_id;
|
||||
}
|
||||
|
||||
// static
|
||||
absl::Status ManifestUpdater::IsValidDir(std::string dir) {
|
||||
path::EnsureDoesNotEndWithPathSeparator(&dir);
|
||||
|
||||
if (!path::IsAbsolute(dir)) {
|
||||
return absl::FailedPreconditionError(
|
||||
absl::StrFormat("Directory '%s' must be an absolute path.", dir));
|
||||
}
|
||||
|
||||
if (!path::Exists(dir)) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrFormat("Failed to find directory '%s'.", dir));
|
||||
}
|
||||
|
||||
if (!path::DirExists(dir)) {
|
||||
return absl::FailedPreconditionError(
|
||||
absl::StrFormat("Path '%s' should be a directory.", dir));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
ManifestUpdater::ManifestUpdater(DataStoreWriter* data_store, UpdaterConfig cfg)
|
||||
: data_store_(data_store), cfg_(std::move(cfg)) {
|
||||
path::EnsureEndsWithPathSeparator(&cfg_.src_dir);
|
||||
}
|
||||
|
||||
ManifestUpdater::~ManifestUpdater() = default;
|
||||
|
||||
absl::Status ManifestUpdater::UpdateAll(
|
||||
FileChunkMap* file_chunks,
|
||||
PushIntermediateManifest push_intermediate_manifest) {
|
||||
RETURN_IF_ERROR(ManifestUpdater::IsValidDir(cfg_.src_dir));
|
||||
|
||||
// Don't use the Windows localized time from path::GetStats.
|
||||
time_t mtime;
|
||||
RETURN_IF_ERROR(path::GetFileTime(cfg_.src_dir, &mtime));
|
||||
|
||||
// Create the info for the root directory to start the recursive search.
|
||||
AssetInfo ri;
|
||||
ri.type = AssetProto::DIRECTORY;
|
||||
ri.mtime = mtime;
|
||||
|
||||
std::vector<Operation> operations{{Operator::kAdd, std::move(ri)}};
|
||||
|
||||
absl::Status status =
|
||||
Update(&operations, file_chunks, push_intermediate_manifest,
|
||||
/*recursive=*/true);
|
||||
|
||||
if (status.ok() || !absl::IsUnavailable(status)) return status;
|
||||
|
||||
// In case we receive an absl::UnavailableError, it means that not all
|
||||
// manifest chunks could be located. In that case, we wipe all data and
|
||||
// rebuild the manifest from scratch.
|
||||
LOG_WARNING("Failed to load manifest, building from scratch: %s",
|
||||
status.ToString());
|
||||
|
||||
RETURN_IF_ERROR(data_store_->Wipe());
|
||||
file_chunks->Clear();
|
||||
|
||||
RETURN_IF_ERROR(Update(&operations, file_chunks, push_intermediate_manifest,
|
||||
/*recursive=*/true),
|
||||
"Failed to build manifest from scratch");
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
ContentIdProto ManifestUpdater::DefaultManifestId() {
|
||||
CdcParamsProto params;
|
||||
params.set_min_chunk_size(cfg_.min_chunk_size);
|
||||
params.set_avg_chunk_size(cfg_.avg_chunk_size);
|
||||
params.set_max_chunk_size(cfg_.max_chunk_size);
|
||||
ManifestBuilder manifest_builder(params, data_store_);
|
||||
|
||||
// Load the manifest id from the store. It's necessary to extract the CDC
|
||||
// parameters used last time.
|
||||
ContentIdProto manifest_id;
|
||||
if ((data_store_->GetProto(GetManifestStoreId(), &manifest_id).ok()) &&
|
||||
manifest_builder.LoadManifest(manifest_id).ok() &&
|
||||
ValidateCdcParams(manifest_builder.CdcParameters())) {
|
||||
params = manifest_builder.CdcParameters();
|
||||
}
|
||||
|
||||
// Create an empty manifest with correct CDC parameters.
|
||||
ManifestBuilder new_manifest_builder(params, data_store_);
|
||||
absl::StatusOr<ContentIdProto> result = new_manifest_builder.Flush();
|
||||
assert(result.ok());
|
||||
manifest_id_ = *result;
|
||||
std::string id_str = manifest_id_.SerializeAsString();
|
||||
|
||||
absl::Status status =
|
||||
data_store_->Put(GetManifestStoreId(), id_str.data(), id_str.size());
|
||||
if (!status.ok()) {
|
||||
LOG_ERROR("Failed to store default manifest ID in data store: %s",
|
||||
status.ToString());
|
||||
}
|
||||
return manifest_id_;
|
||||
}
|
||||
|
||||
size_t ManifestUpdater::QueueTasks(Threadpool* pool,
|
||||
const fastcdc::Config* cdc_cfg,
|
||||
ManifestBuilder* manifest_builder) {
|
||||
const size_t max_tasks_queued = MaxQueuedTasks(*pool);
|
||||
size_t num_tasks_queued = 0;
|
||||
while (pool->NumQueuedTasks() < max_tasks_queued && !queue_.empty() &&
|
||||
!buffers_.empty()) {
|
||||
PendingAsset asset = std::move(queue_.front());
|
||||
absl::StatusOr<AssetBuilder> dir;
|
||||
queue_.pop_front();
|
||||
|
||||
switch (asset.type) {
|
||||
case AssetProto::FILE:
|
||||
pool->QueueTask(std::make_unique<FileChunkerTask>(
|
||||
cfg_.src_dir, std::move(asset.relative_path),
|
||||
std::move(asset.filename), cdc_cfg, std::move(buffers_.back())));
|
||||
buffers_.pop_back();
|
||||
break;
|
||||
|
||||
case AssetProto::DIRECTORY:
|
||||
dir = manifest_builder->GetOrCreateAsset(
|
||||
path::JoinUnix(asset.relative_path, asset.filename),
|
||||
AssetProto::DIRECTORY, true);
|
||||
if (!dir.ok()) {
|
||||
LOG_ERROR(
|
||||
"Failed to locate directory '%s' in the manifest, skipping it: "
|
||||
"%s",
|
||||
asset.relative_path, dir.status().ToString());
|
||||
continue;
|
||||
}
|
||||
pool->QueueTask(std::make_unique<DirScannerTask>(
|
||||
cfg_.src_dir, std::move(asset.relative_path),
|
||||
std::move(asset.filename), std::move(dir.value()), data_store_));
|
||||
break;
|
||||
|
||||
default:
|
||||
LOG_ERROR("Unexpected type '%s' for asset '%s'",
|
||||
AssetProto::Type_Name(asset.type), asset.relative_path);
|
||||
continue;
|
||||
}
|
||||
++num_tasks_queued;
|
||||
}
|
||||
return num_tasks_queued;
|
||||
}
|
||||
|
||||
absl::Status ManifestUpdater::ApplyOperations(
|
||||
std::vector<Operation>* operations, FileChunkMap* file_chunks,
|
||||
ManifestBuilder* manifest_builder, AssetBuilder* parent, bool recursive) {
|
||||
assert(manifest_builder != nullptr);
|
||||
if (operations->empty()) return absl::OkStatus();
|
||||
|
||||
// First, handle all deletions to make the outcome independent of the order of
|
||||
// operations (e.g., when the same file is added and deleted again).
|
||||
const std::string* last_deleted = nullptr;
|
||||
for (const Operation& op : *operations) {
|
||||
if (op.op != Operator::kDelete) continue;
|
||||
const AssetInfo& ai = op.info;
|
||||
|
||||
++stats_.total_assets_deleted;
|
||||
file_chunks->Remove(ai.path);
|
||||
if (last_deleted && absl::StartsWith(ai.path, *last_deleted) &&
|
||||
ai.path[last_deleted->size()] == '/') {
|
||||
// Optimization: |path| is part of a deleted dir, so it can be
|
||||
// skipped.
|
||||
continue;
|
||||
}
|
||||
RETURN_IF_ERROR(manifest_builder->DeleteAsset(ai.path),
|
||||
"Failed to delete asset '%s' from manifest", ai.path);
|
||||
last_deleted = &ai.path;
|
||||
}
|
||||
|
||||
// Second, handle additions and updates.
|
||||
AssetBuilder asset_builder;
|
||||
for (Operation& op : *operations) {
|
||||
AssetInfo& ai = op.info;
|
||||
bool created = true;
|
||||
|
||||
switch (op.op) {
|
||||
case Operator::kDelete:
|
||||
continue;
|
||||
|
||||
case Operator::kKeep:
|
||||
file_chunks->Init(ai.path, ai.size, &ai.chunks);
|
||||
continue;
|
||||
|
||||
case Operator::kAdd:
|
||||
// If a parent was given, assets are added as direct children of that
|
||||
// parent directory.
|
||||
if (parent) {
|
||||
asset_builder = parent->AppendAsset(path::BaseName(ai.path), ai.type);
|
||||
break;
|
||||
}
|
||||
[[fallthrough]];
|
||||
|
||||
case Operator::kUpdate:
|
||||
ASSIGN_OR_RETURN(asset_builder,
|
||||
manifest_builder->GetOrCreateAsset(ai.path, ai.type,
|
||||
true, &created),
|
||||
"Failed to add '%s' to the manifest", ai.path);
|
||||
break;
|
||||
}
|
||||
|
||||
if (created) ++stats_.total_assets_added_or_updated;
|
||||
asset_builder.SetMtimeSeconds(ai.mtime);
|
||||
|
||||
if (ai.type == AssetProto::FILE) {
|
||||
// Assume everything is executable for the intermediate manifest.
|
||||
// The executable bit is derived from the file data, which is not
|
||||
// available at this point.
|
||||
asset_builder.SetPermissions(kExecutablePerms);
|
||||
asset_builder.TruncateChunks();
|
||||
asset_builder.SetFileSize(ai.size);
|
||||
// Queue chunker tasks for files.
|
||||
asset_builder.SetInProgress(true);
|
||||
} else if (recursive && ai.type == AssetProto::DIRECTORY) {
|
||||
// We are recursing into all sub-directories, so we add queue up the
|
||||
// child directory for scanning.
|
||||
asset_builder.SetInProgress(true);
|
||||
}
|
||||
|
||||
// If the asset is marked as in-progress, we need to queue it up.
|
||||
if (asset_builder.InProgress()) {
|
||||
queue_.emplace_back(ai.type, asset_builder.RelativePath(),
|
||||
asset_builder.Name());
|
||||
}
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ManifestUpdater::HandleFileChunkerResult(
|
||||
FileChunkerTask* task, FileChunkMap* file_chunks,
|
||||
ManifestBuilder* manifest_builder) {
|
||||
const std::string rel_file_path = task->RelativeUnixFilePath();
|
||||
buffers_.emplace_back(task->ReleaseBuffer());
|
||||
|
||||
AssetBuilder asset_builder;
|
||||
ASSIGN_OR_RETURN(asset_builder, manifest_builder->GetOrCreateAsset(
|
||||
rel_file_path, AssetProto::FILE));
|
||||
asset_builder.SetInProgress(false);
|
||||
if (!task->Status().ok()) {
|
||||
// In case of an error, pretend the file is empty.
|
||||
asset_builder.SetFileSize(0);
|
||||
file_chunks->Init(rel_file_path, 0);
|
||||
|
||||
++stats_.total_files_failed;
|
||||
return task->Status();
|
||||
}
|
||||
|
||||
// Update the asset and the stats.
|
||||
uint64_t file_size = task->ProcessedBytes();
|
||||
stats_.total_chunks += task->Chunks()->size();
|
||||
stats_.total_processed_bytes += file_size;
|
||||
++stats_.total_files_added_or_updated;
|
||||
|
||||
asset_builder.SwapChunks(task->Chunks(), file_size);
|
||||
asset_builder.SetPermissions(task->IsExecutable()
|
||||
? kExecutablePerms
|
||||
: ManifestBuilder::kDefaultFilePerms);
|
||||
|
||||
file_chunks->Init(rel_file_path, file_size);
|
||||
file_chunks->AppendCopy(rel_file_path, asset_builder.Proto()->file_chunks(),
|
||||
0);
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ManifestUpdater::HandleDirScannerResult(
|
||||
DirScannerTask* task, FileChunkMap* file_chunks,
|
||||
ManifestBuilder* manifest_builder,
|
||||
std::unordered_set<ContentIdProto>* manifest_content_ids) {
|
||||
// Include the error in the stats, but we can still try to process the
|
||||
// (partial) results.
|
||||
if (!task->Status().ok()) {
|
||||
++stats_.total_dirs_failed;
|
||||
}
|
||||
|
||||
// DirScannerTasks are inherently recursive.
|
||||
RETURN_IF_ERROR(ApplyOperations(task->Operations(), file_chunks,
|
||||
manifest_builder, task->Dir(),
|
||||
/*recursive=*/true));
|
||||
task->Dir()->SetInProgress(false);
|
||||
// Union all manifest chunk content IDs.
|
||||
assert(manifest_content_ids != nullptr);
|
||||
manifest_content_ids->insert(task->ManifestContentIds()->begin(),
|
||||
task->ManifestContentIds()->end());
|
||||
return task->Status();
|
||||
}
|
||||
|
||||
absl::Status ManifestUpdater::Update(
|
||||
OperationList* operations, FileChunkMap* file_chunks,
|
||||
PushIntermediateManifest push_intermediate_manifest, bool recursive) {
|
||||
Stopwatch sw;
|
||||
LOG_INFO(
|
||||
"Updating manifest for '%s': applying %u changes, "
|
||||
"%srecursive",
|
||||
cfg_.src_dir, operations->size(), recursive ? "" : "non-");
|
||||
|
||||
stats_ = UpdaterStats();
|
||||
|
||||
CdcParamsProto cdc_params;
|
||||
cdc_params.set_min_chunk_size(cfg_.min_chunk_size);
|
||||
cdc_params.set_avg_chunk_size(cfg_.avg_chunk_size);
|
||||
cdc_params.set_max_chunk_size(cfg_.max_chunk_size);
|
||||
ManifestBuilder manifest_builder(cdc_params, data_store_);
|
||||
|
||||
// Load the manifest id from the store.
|
||||
ContentIdProto manifest_id;
|
||||
absl::Status status =
|
||||
data_store_->GetProto(GetManifestStoreId(), &manifest_id);
|
||||
if (!status.ok()) {
|
||||
if (!absl::IsNotFound(status))
|
||||
return WrapStatus(status, "Failed to load manifest id");
|
||||
|
||||
// A non-existing manifest is not an issue, just build it from scratch.
|
||||
LOG_INFO("No cached manifest found. Building from scratch.");
|
||||
} else {
|
||||
RETURN_IF_ERROR(manifest_builder.LoadManifest(manifest_id),
|
||||
"Failed to load manifest with id '%s'",
|
||||
ContentId::ToHexString(manifest_id));
|
||||
// The CDC params might have changed when loading the manifest.
|
||||
if (ValidateCdcParams(manifest_builder.Manifest()->cdc_params())) {
|
||||
cdc_params = manifest_builder.Manifest()->cdc_params();
|
||||
}
|
||||
}
|
||||
|
||||
RETURN_IF_ERROR(ApplyOperations(operations, file_chunks, &manifest_builder,
|
||||
nullptr, recursive));
|
||||
|
||||
Threadpool pool(cfg_.num_threads > 0 ? cfg_.num_threads
|
||||
: std::thread::hardware_concurrency());
|
||||
// Pre-allocate one buffer per queueable task with 2 * max_chunk_size.
|
||||
const size_t max_queued_tasks = MaxQueuedTasks(pool);
|
||||
buffers_.reserve(max_queued_tasks);
|
||||
while (buffers_.size() < max_queued_tasks)
|
||||
buffers_.emplace_back(cfg_.max_chunk_size << 1);
|
||||
size_t num_tasks_queued = 0;
|
||||
|
||||
// Collect the content IDs that make up the manifest when recursing. They are
|
||||
// used to prune the manifest cache directory in the end.
|
||||
std::unordered_set<ContentIdProto> manifest_content_ids;
|
||||
|
||||
// Push intermediate manifest if there are queued chunker tasks.
|
||||
if (push_intermediate_manifest && !queue_.empty()) {
|
||||
file_chunks->FlushUpdates();
|
||||
ASSIGN_OR_RETURN(manifest_id_, manifest_builder.Flush(),
|
||||
"Failed to flush intermediate manifest");
|
||||
// Add all content IDs that were just written back.
|
||||
manifest_content_ids.insert(manifest_builder.FlushedContentIds().begin(),
|
||||
manifest_builder.FlushedContentIds().end());
|
||||
push_intermediate_manifest(manifest_id_);
|
||||
}
|
||||
|
||||
fastcdc::Config cdc_cfg = CdcConfigFromProto(cdc_params);
|
||||
|
||||
// Wait for the chunker tasks and update file assets.
|
||||
while (!queue_.empty() || num_tasks_queued > 0) {
|
||||
num_tasks_queued += QueueTasks(&pool, &cdc_cfg, &manifest_builder);
|
||||
std::unique_ptr<Task> task = pool.GetCompletedTask();
|
||||
assert(num_tasks_queued > 0);
|
||||
--num_tasks_queued;
|
||||
|
||||
FileChunkerTask* chunker_task = dynamic_cast<FileChunkerTask*>(task.get());
|
||||
if (chunker_task) {
|
||||
status =
|
||||
HandleFileChunkerResult(chunker_task, file_chunks, &manifest_builder);
|
||||
|
||||
if (!status.ok()) {
|
||||
LOG_ERROR("Failed to process file '%s': %s", chunker_task->FilePath(),
|
||||
status.ToString());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
DirScannerTask* scanner_task = dynamic_cast<DirScannerTask*>(task.get());
|
||||
if (scanner_task) {
|
||||
status = HandleDirScannerResult(scanner_task, file_chunks,
|
||||
&manifest_builder, &manifest_content_ids);
|
||||
if (!status.ok()) {
|
||||
LOG_ERROR("Failed to process directory '%s': %s",
|
||||
scanner_task->FilePath(), status.ToString());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
file_chunks->FlushUpdates();
|
||||
ASSIGN_OR_RETURN(manifest_id_, manifest_builder.Flush(),
|
||||
"Failed to flush manifest");
|
||||
|
||||
// Save the manifest id to the store.
|
||||
std::string id_str = manifest_id_.SerializeAsString();
|
||||
RETURN_IF_ERROR(
|
||||
data_store_->Put(GetManifestStoreId(), id_str.data(), id_str.size()),
|
||||
"Failed to store manifest id");
|
||||
|
||||
// Remove manifest chunks that are no longer referenced when recursing through
|
||||
// all sub-directories. This also makes sure that all referenced manifest
|
||||
// chunks are present.
|
||||
if (status.ok() && recursive) {
|
||||
// Retain the chunk that stores the manifest ID.
|
||||
manifest_content_ids.insert(ManifestUpdater::GetManifestStoreId());
|
||||
// Add all content IDs that were just written back.
|
||||
manifest_content_ids.insert(manifest_builder.FlushedContentIds().begin(),
|
||||
manifest_builder.FlushedContentIds().end());
|
||||
status = data_store_->Prune(std::move(manifest_content_ids));
|
||||
if (!status.ok()) {
|
||||
// Signal to the caller that the manifest needs to be rebuilt from
|
||||
// scratch.
|
||||
return absl::UnavailableError(status.ToString());
|
||||
}
|
||||
}
|
||||
|
||||
LOG_INFO("Manifest for '%s' successfully updated in %0.3f seconds",
|
||||
cfg_.src_dir, sw.ElapsedSeconds());
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace cdc_ft
|
||||
268
manifest/manifest_updater.h
Normal file
268
manifest/manifest_updater.h
Normal file
@@ -0,0 +1,268 @@
|
||||
/*
|
||||
* Copyright 2022 Google LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MANIFEST_MANIFEST_UPDATER_H_
|
||||
#define MANIFEST_MANIFEST_UPDATER_H_
|
||||
|
||||
#include <list>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/status/statusor.h"
|
||||
#include "common/buffer.h"
|
||||
#include "manifest/asset_builder.h"
|
||||
#include "manifest/file_chunk_map.h"
|
||||
#include "manifest/manifest_proto_defs.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
namespace fastcdc {
|
||||
struct Config;
|
||||
}
|
||||
|
||||
class AssetBuilder;
|
||||
class DataStoreWriter;
|
||||
class DirScannerTask;
|
||||
class FileChunkerTask;
|
||||
class ManifestBuilder;
|
||||
class Threadpool;
|
||||
|
||||
struct UpdaterConfig {
|
||||
// Source directory from which to build the manifest from recursively.
|
||||
std::string src_dir;
|
||||
|
||||
// Minimum allowed chunk size.
|
||||
size_t min_chunk_size = 128 << 10;
|
||||
|
||||
// Target average chunk size.
|
||||
size_t avg_chunk_size = 256 << 10;
|
||||
|
||||
// Maximum allowed chunk size.
|
||||
size_t max_chunk_size = 1024 << 10;
|
||||
|
||||
// Size of the chunker thread pool. Defaults to the number of available CPUs.
|
||||
uint32_t num_threads = 0;
|
||||
};
|
||||
|
||||
struct UpdaterStats {
|
||||
// Total no. of assets that were added or updated.
|
||||
size_t total_assets_added_or_updated = 0;
|
||||
|
||||
// Total no. of assets of type FILE that were added or updated.
|
||||
size_t total_files_added_or_updated = 0;
|
||||
|
||||
// Total no. of files where processing failed.
|
||||
size_t total_files_failed = 0;
|
||||
|
||||
// Total no. of directories where processing failed.
|
||||
size_t total_dirs_failed = 0;
|
||||
|
||||
// Total no. of assets that were deleted (not counting subdirectory files).
|
||||
size_t total_assets_deleted = 0;
|
||||
|
||||
// Total no. of chunks created.
|
||||
size_t total_chunks = 0;
|
||||
|
||||
// Total no. of bytes processed from the files added or updated.
|
||||
size_t total_processed_bytes = 0;
|
||||
};
|
||||
|
||||
struct AssetInfo {
|
||||
// Unix path to the asset relative to the source directory.
|
||||
std::string path;
|
||||
|
||||
// Type (file, dir, etc.)
|
||||
AssetProto::Type type = AssetProto::FILE;
|
||||
|
||||
// Modification time in seconds since Epoch.
|
||||
int64_t mtime = 0;
|
||||
|
||||
// File size (0 for directories).
|
||||
uint64_t size = 0;
|
||||
|
||||
// File chunks (empty for directories). This list is ignored when comparing
|
||||
// one AssetInfo to another.
|
||||
std::vector<FileChunk> chunks;
|
||||
|
||||
// Appends the chunks from |list| to |chunks|.
|
||||
void AppendCopyChunks(const RepeatedChunkRefProto& list,
|
||||
uint64_t list_offset);
|
||||
|
||||
// Appends the chunks from |list| to |chunks|, but moves the data out of
|
||||
// |list| instead of copying, wherever possible.
|
||||
void AppendMoveChunks(RepeatedChunkRefProto* list, uint64_t list_offset);
|
||||
|
||||
bool operator==(const AssetInfo& other) const {
|
||||
return path == other.path && type == other.type && mtime == other.mtime &&
|
||||
size == other.size;
|
||||
}
|
||||
bool operator!=(const AssetInfo& other) const { return !(*this == other); }
|
||||
|
||||
// Compares by file path.
|
||||
bool operator<(const AssetInfo& other) const { return path < other.path; }
|
||||
};
|
||||
|
||||
// Incrementally updates a manifest
|
||||
class ManifestUpdater {
|
||||
public:
|
||||
// Selects the update operation to be performed.
|
||||
enum class Operator { kAdd, kUpdate, kDelete, kKeep };
|
||||
|
||||
// Represents an update operation that shall be applied to the owned manifest.
|
||||
struct Operation {
|
||||
Operation() {}
|
||||
Operation(Operator op, AssetInfo info) : op(op), info(std::move(info)) {}
|
||||
|
||||
Operator op;
|
||||
AssetInfo info;
|
||||
};
|
||||
|
||||
using OperationList = std::vector<Operation>;
|
||||
|
||||
// Permissions for executable files.
|
||||
static constexpr uint32_t kExecutablePerms = 0755u;
|
||||
|
||||
// Id of the chunk that stores the manifest id.
|
||||
static ContentIdProto GetManifestStoreId();
|
||||
|
||||
// Returns an error if |dir| does not exist or it is not a directory.
|
||||
static absl::Status IsValidDir(std::string dir);
|
||||
|
||||
using PushIntermediateManifest =
|
||||
std::function<void(const ContentIdProto& manifest_id)>;
|
||||
|
||||
// |data_store| is used to store manifest chunks. File data chunks are not
|
||||
// stored explicitly as they can be read from the original files.
|
||||
// |cfg| determines the source directory to update the manifest from as well
|
||||
// as configuration details about chunking.
|
||||
ManifestUpdater(DataStoreWriter* data_store, UpdaterConfig cfg);
|
||||
~ManifestUpdater();
|
||||
|
||||
ManifestUpdater(const ManifestUpdater&) = delete;
|
||||
ManifestUpdater& operator=(const ManifestUpdater&) = delete;
|
||||
|
||||
// Reads the full source directory and syncs the manifest to it. Prunes old,
|
||||
// unreferenced manifest chunks. Updates and flushes |file_chunks|.
|
||||
//
|
||||
// If a valid |push_intermediate_manifest| is passed, then a manifest is
|
||||
// flushed after the root directory has been added, but before all files and
|
||||
// directories have been processed. That means, the manifest does not yet
|
||||
// contains all assets, all incomplete assets are set to in-progress.
|
||||
absl::Status UpdateAll(FileChunkMap* file_chunks,
|
||||
PushIntermediateManifest push_intermediate_manifest =
|
||||
PushIntermediateManifest());
|
||||
|
||||
// Updates the manifest by applying the |operations| list. Deletions are
|
||||
// handled first to make the outcome independent of the order in the list.
|
||||
// Also updates and flushes |file_chunks| with the changes made. See
|
||||
// UpdateAll() for a description of |push_intermediate_manifest|.
|
||||
//
|
||||
// All paths should be Unix paths. If |recursive| is true, then a directory
|
||||
// scanner task is enqueued for each directory that is added to the manifest.
|
||||
// This is only needed during UpdateAll(). When the manifest is updated in
|
||||
// response to file watcher changes, then |recursive| should be set to false.
|
||||
absl::Status Update(OperationList* operations, FileChunkMap* file_chunks,
|
||||
PushIntermediateManifest push_intermediate_manifest =
|
||||
PushIntermediateManifest(),
|
||||
bool recursive = false);
|
||||
|
||||
// Content id of the current manifest.
|
||||
const ContentIdProto& ManifestId() const { return manifest_id_; }
|
||||
|
||||
// Returns stats created from the last call to UpdateAll() or Update().
|
||||
const UpdaterStats& Stats() const { return stats_; }
|
||||
|
||||
// Returns the manifest updater configuration.
|
||||
const UpdaterConfig& Config() const { return cfg_; }
|
||||
|
||||
// Returns an empty manifest.
|
||||
ContentIdProto DefaultManifestId();
|
||||
|
||||
private:
|
||||
// Adds enough pending assets from |queue_| as tasks to the |pool| to keep all
|
||||
// worker threads busy. Returns the number of tasks that were added.
|
||||
size_t QueueTasks(Threadpool* pool, const fastcdc::Config* cdc_cfg,
|
||||
ManifestBuilder* manifest_builder);
|
||||
|
||||
// Applies the |operatio ns| list to the manifest owned by the
|
||||
// |manifest_builder|. First, all deletions are handled and the corresponding
|
||||
// files are removed from the |file_chunks| map, then all added or updated
|
||||
// assets are processed. This guarantees that the outcome is independent of
|
||||
// the order in the list.
|
||||
//
|
||||
// If |parent| is non-null, then it must be of type DIRECTORY and all added
|
||||
// assets are made direct children of |parent|. The function does *not* verify
|
||||
// that all children have |parent| as directory path.
|
||||
//
|
||||
// Enqueues tasks to chunk the given files for files that were added or
|
||||
// updated. If |recursive| is true, then it will also enqueue directory
|
||||
// scanner tasks for all given directories.
|
||||
absl::Status ApplyOperations(std::vector<Operation>* operations,
|
||||
FileChunkMap* file_chunks,
|
||||
ManifestBuilder* manifest_builder,
|
||||
AssetBuilder* parent, bool recursive);
|
||||
|
||||
// Handles the results of a completed FileChunkerTask.
|
||||
absl::Status HandleFileChunkerResult(FileChunkerTask* task,
|
||||
FileChunkMap* file_chunks,
|
||||
ManifestBuilder* manifest_builder);
|
||||
|
||||
// Handles the results of a completed DirScannerTask.
|
||||
absl::Status HandleDirScannerResult(
|
||||
DirScannerTask* task, FileChunkMap* file_chunks,
|
||||
ManifestBuilder* manifest_builder,
|
||||
std::unordered_set<ContentIdProto>* manifest_content_ids);
|
||||
|
||||
// Represents an asset that has not been fully processed yet.
|
||||
struct PendingAsset {
|
||||
PendingAsset() {}
|
||||
PendingAsset(AssetProto::Type type, std::string relative_path,
|
||||
std::string filename)
|
||||
: type(type),
|
||||
relative_path(std::move(relative_path)),
|
||||
filename(std::move(filename)) {}
|
||||
|
||||
// The asset type (either FILE or DIRECTORY).
|
||||
AssetProto::Type type = AssetProto::UNKNOWN;
|
||||
|
||||
// Relative unix path of the directory containing this asset.
|
||||
std::string relative_path;
|
||||
|
||||
// File name of the asset that still needs processing.
|
||||
std::string filename;
|
||||
};
|
||||
|
||||
// Queue of pending assets waiting for completion.
|
||||
std::list<PendingAsset> queue_;
|
||||
|
||||
// Pool of pre-allocated buffers
|
||||
std::vector<Buffer> buffers_;
|
||||
|
||||
// Store for manifest chunks and the manifest id itself.
|
||||
DataStoreWriter* const data_store_;
|
||||
|
||||
// Source directory to build the manifest from and configuration details.
|
||||
UpdaterConfig cfg_;
|
||||
|
||||
// ID of the manifest chunk.
|
||||
ContentIdProto manifest_id_;
|
||||
|
||||
// Stats for the last Update*() operation.
|
||||
UpdaterStats stats_;
|
||||
};
|
||||
|
||||
}; // namespace cdc_ft
|
||||
|
||||
#endif // MANIFEST_MANIFEST_UPDATER_H_
|
||||
655
manifest/manifest_updater_test.cc
Normal file
655
manifest/manifest_updater_test.cc
Normal file
@@ -0,0 +1,655 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "manifest/manifest_updater.h"
|
||||
|
||||
#include "absl/strings/match.h"
|
||||
#include "common/path.h"
|
||||
#include "common/status_test_macros.h"
|
||||
#include "common/test_main.h"
|
||||
#include "data_store/mem_data_store.h"
|
||||
#include "fastcdc/fastcdc.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "manifest/file_chunk_map.h"
|
||||
#include "manifest/manifest_builder.h"
|
||||
#include "manifest/manifest_iterator.h"
|
||||
#include "manifest/manifest_test_base.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
void PrintTo(const AssetInfo& ai, std::ostream* o) {
|
||||
*o << "path=" << ai.path << ", type=" << ai.type << ", mtime=" << ai.mtime
|
||||
<< ", size=" << ai.size;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr uint64_t kFileSizeA = 8; // a.txt
|
||||
constexpr uint64_t kFileSizeB = 32; // subdir/b.txt
|
||||
constexpr uint64_t kFileSizeC = 1; // subdir/c.txt
|
||||
constexpr uint64_t kFileSizeD = 1; // subdir/d.txt
|
||||
constexpr uint64_t kTotalFileSize =
|
||||
kFileSizeA + kFileSizeB + kFileSizeC + kFileSizeD;
|
||||
|
||||
class ManifestUpdaterTest : public ManifestTestBase {
|
||||
public:
|
||||
ManifestUpdaterTest()
|
||||
: ManifestTestBase(GetTestDataDir("manifest_updater")) {}
|
||||
|
||||
void SetUp() override {
|
||||
path::CreateDirRec(empty_dir_).IgnoreError();
|
||||
cfg_.num_threads = 1;
|
||||
}
|
||||
|
||||
void TearDown() override { path::RemoveDirRec(empty_dir_).IgnoreError(); }
|
||||
|
||||
protected:
|
||||
std::string empty_dir_ = path::Join(path::GetTempDir(), "empty");
|
||||
};
|
||||
|
||||
// Runs UpdateAll() on an empty dir.
|
||||
TEST_F(ManifestUpdaterTest, UpdateAll_EmptySrcDirectory) {
|
||||
cfg_.src_dir = empty_dir_;
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
|
||||
UpdaterStats stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_added_or_updated, 0);
|
||||
EXPECT_EQ(stats.total_files_added_or_updated, 0);
|
||||
EXPECT_EQ(stats.total_files_failed, 0);
|
||||
EXPECT_EQ(stats.total_assets_deleted, 0);
|
||||
EXPECT_EQ(stats.total_chunks, 0);
|
||||
EXPECT_EQ(stats.total_processed_bytes, 0);
|
||||
|
||||
// Store should contain a chunk for the manifest id and one for the manifest.
|
||||
EXPECT_EQ(data_store_.Chunks().size(), 2);
|
||||
ASSERT_NO_FATAL_FAILURE(ExpectManifestEquals({}, updater.ManifestId()));
|
||||
}
|
||||
|
||||
// Runs UpdateAll() on a non-empty dir.
|
||||
TEST_F(ManifestUpdaterTest, UpdateAll_NonEmptySrcDirectory) {
|
||||
// Contains a.txt and subdir/b.txt.
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
|
||||
const UpdaterStats& stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_added_or_updated, 5);
|
||||
EXPECT_EQ(stats.total_files_added_or_updated, 4);
|
||||
EXPECT_EQ(stats.total_files_failed, 0);
|
||||
EXPECT_EQ(stats.total_assets_deleted, 0);
|
||||
EXPECT_EQ(stats.total_chunks, 4);
|
||||
EXPECT_EQ(stats.total_processed_bytes, kTotalFileSize);
|
||||
|
||||
// Store should contain a chunk for the manifest id and one for the manifest.
|
||||
EXPECT_EQ(data_store_.Chunks().size(), 2);
|
||||
ASSERT_NO_FATAL_FAILURE(ExpectManifestEquals(
|
||||
{"a.txt", "subdir", "subdir/b.txt", "subdir/c.txt", "subdir/d.txt"},
|
||||
updater.ManifestId()));
|
||||
}
|
||||
|
||||
// Runs UpdateAll() with existing manifest that misses a file.
|
||||
TEST_F(ManifestUpdaterTest, UpdateAll_AddFileIncremental) {
|
||||
// Create a manifest with "subdir/b.txt" missing.
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
EXPECT_OK(updater.Update(
|
||||
MakeDeleteOps({"subdir/b.txt", "subdir/c.txt", "subdir/d.txt"}),
|
||||
&file_chunks_));
|
||||
ASSERT_NO_FATAL_FAILURE(
|
||||
ExpectManifestEquals({"a.txt", "subdir"}, updater.ManifestId()));
|
||||
|
||||
// UpdateAll() should compute the proper diff from {"a.txt", "subdir"} to
|
||||
// {"a.txt", "subdir", "subdir/b.txt", "subdir/c.txt", "subdir/d.txt"} and
|
||||
// only add/update one file.
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
|
||||
const UpdaterStats& stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_added_or_updated, 3);
|
||||
EXPECT_EQ(stats.total_files_added_or_updated, 3);
|
||||
EXPECT_EQ(stats.total_files_failed, 0);
|
||||
EXPECT_EQ(stats.total_assets_deleted, 0);
|
||||
EXPECT_EQ(stats.total_chunks, 3);
|
||||
EXPECT_EQ(stats.total_processed_bytes, kFileSizeB + kFileSizeC + kFileSizeD);
|
||||
ASSERT_NO_FATAL_FAILURE(ExpectManifestEquals(
|
||||
{"a.txt", "subdir", "subdir/b.txt", "subdir/c.txt", "subdir/d.txt"},
|
||||
updater.ManifestId()));
|
||||
}
|
||||
|
||||
// Runs UpdateAll() with existing manifest that has an excessive file.
|
||||
TEST_F(ManifestUpdaterTest, UpdateAll_DeleteFileIncremental) {
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
|
||||
// Smuggle c.txt into the manifest.
|
||||
CdcParamsProto params;
|
||||
params.set_min_chunk_size(cfg_.min_chunk_size);
|
||||
params.set_avg_chunk_size(cfg_.avg_chunk_size);
|
||||
params.set_max_chunk_size(cfg_.max_chunk_size);
|
||||
ManifestBuilder mb(params, &data_store_);
|
||||
EXPECT_OK(mb.LoadManifest(updater.ManifestId()));
|
||||
EXPECT_OK(mb.GetOrCreateAsset("c.txt", AssetProto::FILE));
|
||||
EXPECT_OK(mb.Flush());
|
||||
std::string id_str = mb.ManifestId().SerializeAsString();
|
||||
EXPECT_OK(data_store_.Put(manifest_store_id_, id_str.data(), id_str.size()));
|
||||
|
||||
// UpdateAll() should compute the proper diff from
|
||||
// {"a.txt", "c.txt", "subdir", "subdir/b.txt"} to
|
||||
// {"a.txt", "subdir", "subdir/b.txt", "subdir/c.txt", "subdir/d.txt"} and
|
||||
// only delete one file.
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
|
||||
const UpdaterStats& stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_added_or_updated, 0);
|
||||
EXPECT_EQ(stats.total_files_added_or_updated, 0);
|
||||
EXPECT_EQ(stats.total_files_failed, 0);
|
||||
EXPECT_EQ(stats.total_assets_deleted, 1);
|
||||
EXPECT_EQ(stats.total_chunks, 0);
|
||||
EXPECT_EQ(stats.total_processed_bytes, 0);
|
||||
ASSERT_NO_FATAL_FAILURE(ExpectManifestEquals(
|
||||
{"a.txt", "subdir", "subdir/b.txt", "subdir/c.txt", "subdir/d.txt"},
|
||||
updater.ManifestId()));
|
||||
}
|
||||
|
||||
// UpdateAll() removes unreferenced manifest chunks.
|
||||
TEST_F(ManifestUpdaterTest, UpdateAll_PrunesUnreferencedChunks) {
|
||||
// Reduce chunk sizes to produce a bunch of indirect lists.
|
||||
cfg_.min_chunk_size = 8;
|
||||
cfg_.avg_chunk_size = 16;
|
||||
cfg_.max_chunk_size = 32;
|
||||
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"a.txt"}), &file_chunks_));
|
||||
// 1 for manifest id, 1 for manifest, 1 indirect assets.
|
||||
EXPECT_EQ(data_store_.Chunks().size(), 3);
|
||||
|
||||
EXPECT_OK(updater.Update(
|
||||
MakeUpdateOps({"subdir/b.txt", "subdir/c.txt", "subdir/d.txt"}),
|
||||
&file_chunks_));
|
||||
// 1 for manifest id, 1 for manifest, 5 indirect assets.
|
||||
// 2 additional chunks from the first Update() that are now unreferenced.
|
||||
// -1, because the indirect asset for "a.txt" is deduplicated
|
||||
EXPECT_EQ(data_store_.Chunks().size(), 8)
|
||||
<< "Manifest: " << ContentId::ToHexString(updater.ManifestId())
|
||||
<< std::endl
|
||||
<< DumpDataStoreProtos();
|
||||
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
// 1 for manifest id, 1 for manifest, 5 indirect assets.
|
||||
// Pruning has removed the 2 unreferenced ones.
|
||||
EXPECT_EQ(data_store_.Chunks().size(), 7)
|
||||
<< "Manifest: " << ContentId::ToHexString(updater.ManifestId())
|
||||
<< std::endl
|
||||
<< DumpDataStoreProtos();
|
||||
}
|
||||
|
||||
// UpdateAll() recovers if there are missing referenced manifest chunks.
|
||||
TEST_F(ManifestUpdaterTest, UpdateAll_RecoversFromMissingChunks) {
|
||||
// Reduce chunk sizes to produce a bunch of indirect lists.
|
||||
cfg_.min_chunk_size = 8;
|
||||
cfg_.avg_chunk_size = 16;
|
||||
cfg_.max_chunk_size = 32;
|
||||
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"a.txt"}), &file_chunks_));
|
||||
// 1 for manifest id, 1 for manifest, 1 indirect assets.
|
||||
EXPECT_EQ(data_store_.Chunks().size(), 3)
|
||||
<< "Manifest: " << ContentId::ToHexString(updater.ManifestId())
|
||||
<< std::endl
|
||||
<< DumpDataStoreProtos();
|
||||
|
||||
// Remove one of the indirect chunks list.
|
||||
for (const auto& [id, _] : data_store_.Chunks()) {
|
||||
if (id != ManifestUpdater::GetManifestStoreId() &&
|
||||
id != updater.ManifestId()) {
|
||||
data_store_.Chunks().erase(id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
// 1 for manifest id, 1 for manifest, 5 indirect assets.
|
||||
// There would be 7 chunks without the removal above, see UpdateAll_Prune.
|
||||
EXPECT_EQ(data_store_.Chunks().size(), 7)
|
||||
<< "Manifest: " << ContentId::ToHexString(updater.ManifestId())
|
||||
<< std::endl
|
||||
<< DumpDataStoreProtos();
|
||||
}
|
||||
|
||||
// Verifies that |file_chunks_| contains the expected chunks after UpdateAll().
|
||||
TEST_F(ManifestUpdaterTest, UpdateAll_FileChunkMapFromScratch) {
|
||||
// Reduce chunk sizes to produce a bunch of indirect lists.
|
||||
cfg_.min_chunk_size = 8;
|
||||
cfg_.avg_chunk_size = 16;
|
||||
cfg_.max_chunk_size = 32;
|
||||
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
|
||||
ValidateChunkLookup("a.txt", true);
|
||||
ValidateChunkLookup("subdir/b.txt", true);
|
||||
ValidateChunkLookup("subdir/c.txt", true);
|
||||
ValidateChunkLookup("subdir/d.txt", true);
|
||||
}
|
||||
|
||||
// Verifies that |file_chunks_| contains the expected chunks after UpdateAll().
|
||||
TEST_F(ManifestUpdaterTest, UpdateAll_FileChunkMapAfterUpdate) {
|
||||
// Reduce chunk sizes to produce a bunch of indirect lists.
|
||||
cfg_.min_chunk_size = 8;
|
||||
cfg_.avg_chunk_size = 16;
|
||||
cfg_.max_chunk_size = 32;
|
||||
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
ASSERT_OK(updater.UpdateAll(&file_chunks_));
|
||||
// The file chunks will be populated again by UpdateAll().
|
||||
file_chunks_.Clear();
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
|
||||
ValidateChunkLookup("a.txt", true);
|
||||
ValidateChunkLookup("subdir/b.txt", true);
|
||||
ValidateChunkLookup("subdir/c.txt", true);
|
||||
ValidateChunkLookup("subdir/d.txt", true);
|
||||
}
|
||||
|
||||
// Verifies that the intermediate manifest contains the expected files.
|
||||
TEST_F(ManifestUpdaterTest, UpdateAll_PushIntermediateManifest) {
|
||||
ContentIdProto intermediate_id;
|
||||
auto push_intermediate_manifest =
|
||||
[&intermediate_id](const ContentIdProto& manifest_id) {
|
||||
intermediate_id = manifest_id;
|
||||
};
|
||||
|
||||
// Contains a.txt and subdir/b.txt.
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_, push_intermediate_manifest));
|
||||
|
||||
// Double check that the files in the final manifest are no longer in
|
||||
// progress.
|
||||
EXPECT_FALSE(InProgress(updater.ManifestId(), "a.txt"));
|
||||
EXPECT_FALSE(InProgress(updater.ManifestId(), "subdir/b.txt"));
|
||||
EXPECT_FALSE(InProgress(updater.ManifestId(), "subdir/c.txt"));
|
||||
EXPECT_FALSE(InProgress(updater.ManifestId(), "subdir/d.txt"));
|
||||
|
||||
// Verify that the intermediate manifest is there, but it is empty.
|
||||
std::string ser_id = intermediate_id.SerializeAsString();
|
||||
EXPECT_OK(data_store_.Put(manifest_store_id_, ser_id.data(), ser_id.size()));
|
||||
ASSERT_NO_FATAL_FAILURE(ExpectManifestEquals({}, intermediate_id));
|
||||
// The root directory of the intermediate manifest is in progress.
|
||||
EXPECT_TRUE(InProgress(intermediate_id, ""));
|
||||
}
|
||||
|
||||
// Runs Update() with a single file to be added.
|
||||
TEST_F(ManifestUpdaterTest, Update_AddFile) {
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"a.txt"}), &file_chunks_));
|
||||
|
||||
const UpdaterStats& stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_added_or_updated, 1);
|
||||
EXPECT_EQ(stats.total_files_added_or_updated, 1);
|
||||
EXPECT_EQ(stats.total_files_failed, 0);
|
||||
EXPECT_EQ(stats.total_assets_deleted, 0);
|
||||
EXPECT_EQ(stats.total_chunks, 1);
|
||||
EXPECT_EQ(stats.total_processed_bytes, kFileSizeA);
|
||||
ASSERT_NO_FATAL_FAILURE(
|
||||
ExpectManifestEquals({"a.txt"}, updater.ManifestId()));
|
||||
}
|
||||
|
||||
// Runs Update() with a single file to be added. The file is in a dir that is
|
||||
// not contained in the manifest yet, so the dir will get auto-created.
|
||||
TEST_F(ManifestUpdaterTest, Update_AddFileAutoCreateSubdir) {
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"subdir/b.txt"}), &file_chunks_));
|
||||
|
||||
const UpdaterStats& stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_added_or_updated, 1);
|
||||
EXPECT_EQ(stats.total_files_added_or_updated, 1);
|
||||
EXPECT_EQ(stats.total_files_failed, 0);
|
||||
EXPECT_EQ(stats.total_assets_deleted, 0);
|
||||
EXPECT_EQ(stats.total_chunks, 1);
|
||||
EXPECT_EQ(stats.total_processed_bytes, kFileSizeB);
|
||||
|
||||
// Note: The manifest does NOT contain the proper "subdir" asset now. Since it
|
||||
// was auto-created because of "subdir/b.txt", it does not have the
|
||||
// proper file time.
|
||||
std::vector<AssetInfoForTest> manifest_ais =
|
||||
GetAllManifestAssets(updater.ManifestId());
|
||||
std::vector<AssetInfoForTest> expected_ais =
|
||||
MakeAssetInfos({"subdir", "subdir/b.txt"});
|
||||
ExpectAssetInfosEqual(manifest_ais, expected_ais, false);
|
||||
manifest_ais[0].info.mtime = expected_ais[0].info.mtime;
|
||||
ExpectAssetInfosEqual(manifest_ais, expected_ais, true);
|
||||
}
|
||||
|
||||
// Calls Update() with a single file to be deleted.
|
||||
TEST_F(ManifestUpdaterTest, Update_DeleteFiles) {
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
EXPECT_OK(updater.Update(MakeDeleteOps({"a.txt"}), &file_chunks_));
|
||||
|
||||
const UpdaterStats& stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_added_or_updated, 0);
|
||||
EXPECT_EQ(stats.total_files_added_or_updated, 0);
|
||||
EXPECT_EQ(stats.total_files_failed, 0);
|
||||
EXPECT_EQ(stats.total_assets_deleted, 1);
|
||||
EXPECT_EQ(stats.total_chunks, 0);
|
||||
EXPECT_EQ(stats.total_processed_bytes, 0);
|
||||
ASSERT_NO_FATAL_FAILURE(ExpectManifestEquals(
|
||||
{"subdir", "subdir/b.txt", "subdir/c.txt", "subdir/d.txt"},
|
||||
updater.ManifestId()));
|
||||
|
||||
// Delete another one in a subdirectory.
|
||||
EXPECT_OK(updater.Update(MakeDeleteOps({"subdir/b.txt"}), &file_chunks_));
|
||||
ASSERT_NO_FATAL_FAILURE(ExpectManifestEquals(
|
||||
{"subdir", "subdir/c.txt", "subdir/d.txt"}, updater.ManifestId()));
|
||||
}
|
||||
|
||||
// Calls Update() with a single dir to be deleted.
|
||||
TEST_F(ManifestUpdaterTest, Update_DeleteDir) {
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
EXPECT_OK(updater.Update(MakeDeleteOps({"subdir"}), &file_chunks_));
|
||||
|
||||
const UpdaterStats& stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_added_or_updated, 0);
|
||||
EXPECT_EQ(stats.total_files_added_or_updated, 0);
|
||||
EXPECT_EQ(stats.total_files_failed, 0);
|
||||
EXPECT_EQ(stats.total_assets_deleted, 1);
|
||||
EXPECT_EQ(stats.total_chunks, 0);
|
||||
EXPECT_EQ(stats.total_processed_bytes, 0);
|
||||
ASSERT_NO_FATAL_FAILURE(
|
||||
ExpectManifestEquals({"a.txt"}, updater.ManifestId()));
|
||||
}
|
||||
|
||||
// Calls Update() with a non-existing asset to be deleted.
|
||||
TEST_F(ManifestUpdaterTest, Update_DeleteNonExistingAsset) {
|
||||
cfg_.src_dir = empty_dir_;
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
// We need to craft AssetInfos for non-existing assets manually.
|
||||
AssetInfo ai{"non_existing", AssetProto::DIRECTORY};
|
||||
ManifestUpdater::OperationList ops{{Operator::kDelete, ai}};
|
||||
EXPECT_OK(updater.Update(&ops, &file_chunks_));
|
||||
|
||||
const UpdaterStats& stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_deleted, 1);
|
||||
}
|
||||
|
||||
// Calls Update() with a non-existing file to be added.
|
||||
TEST_F(ManifestUpdaterTest, Update_AddNonExistingFile) {
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
|
||||
// Note that Update() succeeds even through the "non_existing" file failed.
|
||||
AssetInfo ai;
|
||||
ai.path = "non_existing";
|
||||
ManifestUpdater::OperationList ops{
|
||||
{Operator::kAdd, ai}, {Operator::kAdd, MakeAssetInfo("a.txt").info}};
|
||||
EXPECT_OK(updater.Update(&ops, &file_chunks_));
|
||||
|
||||
const UpdaterStats& stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_added_or_updated, 2);
|
||||
EXPECT_EQ(stats.total_files_added_or_updated, 1);
|
||||
EXPECT_EQ(stats.total_files_failed, 1);
|
||||
// "non_existing" and "a.txt" were still added, but the former is empty.
|
||||
std::vector<AssetInfoForTest> manifest_ais =
|
||||
GetAllManifestAssets(updater.ManifestId());
|
||||
std::vector<AssetInfoForTest> expected_ais = {AssetInfoForTest{ai},
|
||||
MakeAssetInfo("a.txt")};
|
||||
ExpectAssetInfosEqual(manifest_ais, expected_ais);
|
||||
}
|
||||
|
||||
// Verifies that the intermediate manifest contains the expected files.
|
||||
TEST_F(ManifestUpdaterTest, Update_PushIntermediateManifest) {
|
||||
// Create a manifest without a.txt.
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
EXPECT_OK(updater.Update(
|
||||
MakeDeleteOps({"subdir/b.txt", "subdir/c.txt", "subdir/d.txt"}),
|
||||
&file_chunks_));
|
||||
|
||||
// Add a.txt back and check intermediate manifest.
|
||||
ContentIdProto intermediate_id;
|
||||
auto push_intermediate_manifest =
|
||||
[&intermediate_id](const ContentIdProto& manifest_id) {
|
||||
intermediate_id = manifest_id;
|
||||
};
|
||||
EXPECT_OK(updater.Update(
|
||||
MakeUpdateOps({"subdir/b.txt", "subdir/c.txt", "subdir/d.txt"}),
|
||||
&file_chunks_, push_intermediate_manifest));
|
||||
EXPECT_GT(intermediate_id.blake3_sum_160().size(), 0);
|
||||
|
||||
// Only file a.txt is done in the intermediate manifest, all others are in
|
||||
// progress.
|
||||
EXPECT_FALSE(InProgress(intermediate_id, "a.txt"));
|
||||
EXPECT_TRUE(InProgress(intermediate_id, "subdir/b.txt"));
|
||||
EXPECT_TRUE(InProgress(intermediate_id, "subdir/c.txt"));
|
||||
EXPECT_TRUE(InProgress(intermediate_id, "subdir/d.txt"));
|
||||
}
|
||||
|
||||
// Verifies that |file_chunks_| contains the expected chunks after Update().
|
||||
TEST_F(ManifestUpdaterTest, Update_FileChunkMap) {
|
||||
// Reduce chunk sizes to produce a bunch of indirect lists.
|
||||
cfg_.min_chunk_size = 8;
|
||||
cfg_.avg_chunk_size = 16;
|
||||
cfg_.max_chunk_size = 32;
|
||||
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
|
||||
// Add a.txt.
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"a.txt"}), &file_chunks_));
|
||||
ValidateChunkLookup("a.txt", true);
|
||||
ValidateChunkLookup("subdir/b.txt", false);
|
||||
|
||||
// Add subdir/b.txt.
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"subdir/b.txt"}), &file_chunks_));
|
||||
ValidateChunkLookup("a.txt", true);
|
||||
ValidateChunkLookup("subdir/b.txt", true);
|
||||
|
||||
// Remove a.txt.
|
||||
EXPECT_OK(updater.Update(MakeDeleteOps({"a.txt"}), &file_chunks_));
|
||||
ValidateChunkLookup("a.txt", false);
|
||||
ValidateChunkLookup("subdir/b.txt", true);
|
||||
}
|
||||
|
||||
// Verifies that |file_chunks_| contains the expected chunks an intermediate
|
||||
// update (and does not deadlock!).
|
||||
TEST_F(ManifestUpdaterTest, Update_IntermediateFileChunkMap) {
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
|
||||
// Add a.txt.
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"a.txt"}), &file_chunks_));
|
||||
|
||||
// Add subdir/b.txt and check intermediate lookups.
|
||||
auto push_intermediate_manifest = [this](const ContentIdProto&) {
|
||||
ValidateChunkLookup("a.txt", true);
|
||||
ValidateChunkLookup("subdir/b.txt", false); // Not in yet.
|
||||
};
|
||||
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"subdir/b.txt"}), &file_chunks_,
|
||||
push_intermediate_manifest));
|
||||
ValidateChunkLookup("a.txt", true);
|
||||
ValidateChunkLookup("subdir/b.txt", true); // Now it's in!
|
||||
}
|
||||
|
||||
// A call to ManifestId() returns the manifest id!!!
|
||||
TEST_F(ManifestUpdaterTest, ManifestId) {
|
||||
cfg_.src_dir = empty_dir_;
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
|
||||
ContentIdProto manifest_id;
|
||||
EXPECT_OK(data_store_.GetProto(manifest_store_id_, &manifest_id));
|
||||
EXPECT_EQ(updater.ManifestId(), manifest_id);
|
||||
}
|
||||
|
||||
// Makes sure that executables are properly detected.
|
||||
TEST_F(ManifestUpdaterTest, DetectExecutables) {
|
||||
cfg_.src_dir = path::Join(base_dir_, "executables");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
|
||||
ContentIdProto manifest_id;
|
||||
EXPECT_OK(data_store_.GetProto(manifest_store_id_, &manifest_id));
|
||||
|
||||
ManifestIterator manifest_iter(&data_store_);
|
||||
EXPECT_OK(manifest_iter.Open(manifest_id));
|
||||
|
||||
std::unordered_map<std::string, uint32_t> path_to_perms;
|
||||
const AssetProto* entry;
|
||||
while ((entry = manifest_iter.NextEntry()) != nullptr)
|
||||
path_to_perms[entry->name()] = entry->permissions();
|
||||
EXPECT_OK(manifest_iter.Status());
|
||||
|
||||
EXPECT_EQ(path_to_perms["game.elf"], ManifestUpdater::kExecutablePerms);
|
||||
EXPECT_EQ(path_to_perms["win.exe"], ManifestUpdater::kExecutablePerms);
|
||||
EXPECT_EQ(path_to_perms["script.sh"], ManifestUpdater::kExecutablePerms);
|
||||
EXPECT_EQ(path_to_perms["normal.txt"], ManifestBuilder::kDefaultFilePerms);
|
||||
}
|
||||
|
||||
TEST_F(ManifestUpdaterTest, UpdateAll_LargeIntermediateIndirectDirAssets) {
|
||||
// Reduce chunk sizes to produce a bunch of indirect lists.
|
||||
cfg_.min_chunk_size = 8;
|
||||
cfg_.avg_chunk_size = 16;
|
||||
cfg_.max_chunk_size = 32;
|
||||
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
|
||||
// (internal): Run UpdateAll() with intermediate manifest push. The push
|
||||
// causes a Flush() call to the manifest builder, which pushes some assets to
|
||||
// indirect lists. This used to invalidate pointers and cause asserts to
|
||||
// trigger.
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_, [](const ContentIdProto&) {}));
|
||||
}
|
||||
|
||||
// Runs increamental UpdateAll() on an empty dir.
|
||||
TEST_F(ManifestUpdaterTest, UpdateAll_EmptySrcDirectory_Incremental) {
|
||||
cfg_.src_dir = empty_dir_;
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
|
||||
CdcParamsProto params;
|
||||
params.set_min_chunk_size(cfg_.min_chunk_size);
|
||||
params.set_avg_chunk_size(cfg_.avg_chunk_size);
|
||||
params.set_max_chunk_size(cfg_.max_chunk_size);
|
||||
ManifestBuilder mb(params, &data_store_);
|
||||
EXPECT_OK(mb.LoadManifest(updater.ManifestId()));
|
||||
EXPECT_OK(mb.GetOrCreateAsset("folder1", AssetProto::DIRECTORY));
|
||||
EXPECT_OK(mb.DeleteAsset("folder1"));
|
||||
}
|
||||
|
||||
TEST_F(ManifestUpdaterTest, UpdateAll_FileAsRootFails) {
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty", "a.txt");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
auto status = updater.UpdateAll(&file_chunks_);
|
||||
EXPECT_NOT_OK(status);
|
||||
EXPECT_TRUE(absl::IsFailedPrecondition(status)) << status.ToString();
|
||||
}
|
||||
|
||||
TEST_F(ManifestUpdaterTest, UpdateAll_RootNotExistFails) {
|
||||
cfg_.src_dir = path::Join(base_dir_, "non-existing");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
auto status = updater.UpdateAll(&file_chunks_);
|
||||
EXPECT_NOT_OK(status);
|
||||
EXPECT_TRUE(absl::IsNotFound(status)) << status.ToString();
|
||||
}
|
||||
|
||||
// Runs UpdateAll() multiple times on an empty dir with no changes.
|
||||
TEST_F(ManifestUpdaterTest, UpdateAll_EmptySrcDirectoryMultiTimesNoChange) {
|
||||
cfg_.src_dir = empty_dir_;
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
|
||||
UpdaterStats stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_added_or_updated, 0);
|
||||
EXPECT_EQ(stats.total_files_added_or_updated, 0);
|
||||
EXPECT_EQ(stats.total_files_failed, 0);
|
||||
EXPECT_EQ(stats.total_assets_deleted, 0);
|
||||
EXPECT_EQ(stats.total_chunks, 0);
|
||||
EXPECT_EQ(stats.total_processed_bytes, 0);
|
||||
|
||||
// Store should contain a chunk for the manifest id and one for the manifest.
|
||||
EXPECT_EQ(data_store_.Chunks().size(), 2);
|
||||
ASSERT_NO_FATAL_FAILURE(ExpectManifestEquals({}, updater.ManifestId()));
|
||||
|
||||
// No new changes should be done.
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_added_or_updated, 0);
|
||||
EXPECT_EQ(stats.total_files_added_or_updated, 0);
|
||||
EXPECT_EQ(stats.total_files_failed, 0);
|
||||
EXPECT_EQ(stats.total_assets_deleted, 0);
|
||||
EXPECT_EQ(stats.total_chunks, 0);
|
||||
EXPECT_EQ(stats.total_processed_bytes, 0);
|
||||
}
|
||||
|
||||
// Runs UpdateAll() multiple times on a non-empty dir with no changes.
|
||||
TEST_F(ManifestUpdaterTest, UpdateAll_NonEmptySrcDirectoryMultiTimesNoChange) {
|
||||
// Contains a.txt and subdir/b.txt.
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
|
||||
UpdaterStats stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_added_or_updated, 5);
|
||||
EXPECT_EQ(stats.total_files_added_or_updated, 4);
|
||||
EXPECT_EQ(stats.total_files_failed, 0);
|
||||
EXPECT_EQ(stats.total_assets_deleted, 0);
|
||||
EXPECT_EQ(stats.total_chunks, 4);
|
||||
EXPECT_EQ(stats.total_processed_bytes, kTotalFileSize);
|
||||
|
||||
// Store should contain a chunk for the manifest id and one for the manifest.
|
||||
EXPECT_EQ(data_store_.Chunks().size(), 2);
|
||||
ASSERT_NO_FATAL_FAILURE(ExpectManifestEquals(
|
||||
{"a.txt", "subdir", "subdir/b.txt", "subdir/c.txt", "subdir/d.txt"},
|
||||
updater.ManifestId()));
|
||||
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
|
||||
// No new changes should be done.
|
||||
stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_added_or_updated, 0);
|
||||
EXPECT_EQ(stats.total_files_added_or_updated, 0);
|
||||
EXPECT_EQ(stats.total_files_failed, 0);
|
||||
EXPECT_EQ(stats.total_assets_deleted, 0);
|
||||
EXPECT_EQ(stats.total_chunks, 0);
|
||||
EXPECT_EQ(stats.total_processed_bytes, 0);
|
||||
}
|
||||
|
||||
TEST_F(ManifestUpdaterTest, IsValidDir) {
|
||||
EXPECT_OK(ManifestUpdater::IsValidDir(path::Join(base_dir_, "non_empty")));
|
||||
EXPECT_TRUE(absl::IsNotFound(
|
||||
ManifestUpdater::IsValidDir(path::Join(base_dir_, "non-existing"))));
|
||||
EXPECT_TRUE(absl::IsFailedPrecondition(ManifestUpdater::IsValidDir(
|
||||
path::Join(base_dir_, "non_empty", "a.txt"))));
|
||||
EXPECT_TRUE(
|
||||
absl::IsFailedPrecondition(ManifestUpdater::IsValidDir("relative_dir")));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace cdc_ft
|
||||
278
manifest/stats_printer.cc
Normal file
278
manifest/stats_printer.cc
Normal file
@@ -0,0 +1,278 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "manifest/stats_printer.h"
|
||||
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "common/path.h"
|
||||
#include "common/util.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
namespace {
|
||||
|
||||
// See https://ss64.com/nt/syntax-ansi.html.
|
||||
enum class AnsiCode {
|
||||
// Foreground colors
|
||||
kBlackFg = 0,
|
||||
kDarkRedFg = 1,
|
||||
kDarkGreenFg = 2,
|
||||
kDarkYellowFg = 3,
|
||||
kDarkBlueFg = 4,
|
||||
kDarkMagentaFg = 5,
|
||||
kDarkCyanFg = 6,
|
||||
kLightGrayFg = 7,
|
||||
kDarkGrayFg = 8,
|
||||
kLightRedFg = 9,
|
||||
kLightGreenFg = 10,
|
||||
kLightYellowFg = 11,
|
||||
kLightBlueFg = 12,
|
||||
kLightMagentaFg = 13,
|
||||
kLightCyanFg = 14,
|
||||
kWhiteFg = 15,
|
||||
|
||||
// Background colors
|
||||
kBlackBg = 16,
|
||||
kDarkRedBg = 17,
|
||||
kDarkGreenBg = 18,
|
||||
kDarkYellowBg = 19,
|
||||
kDarkBlueBg = 20,
|
||||
kDarkMagentaBg = 21,
|
||||
kDarkCyanBg = 22,
|
||||
kLightGrayBg = 23,
|
||||
kDarkGrayBg = 24,
|
||||
kLightRedBg = 25,
|
||||
kLightGreenBg = 26,
|
||||
kLightYellowBg = 27,
|
||||
kLightBlueBg = 28,
|
||||
kLightMagentaBg = 29,
|
||||
kLightCyanBg = 30,
|
||||
kWhiteBg = 31,
|
||||
|
||||
// Misc
|
||||
kBold = 32,
|
||||
kUnderline = 33,
|
||||
kNoUnderline = 34,
|
||||
kReverseText = 35,
|
||||
kNoReverseText = 36,
|
||||
kDefault = 37
|
||||
};
|
||||
|
||||
constexpr char kAnsiCodeStr[][7]{
|
||||
"\033[30m", "\033[31m", "\033[32m", "\033[33m", "\033[34m",
|
||||
"\033[35m", "\033[36m", "\033[37m", "\033[90m", "\033[91m",
|
||||
"\033[92m", "\033[93m", "\033[94m", "\033[95m", "\033[96m",
|
||||
"\033[97m", "\033[40m", "\033[41m", "\033[42m", "\033[43m",
|
||||
"\033[44m", "\033[45m", "\033[46m", "\033[47m", "\033[100m",
|
||||
"\033[101m", "\033[102m", "\033[103m", "\033[104m", "\033[105m",
|
||||
"\033[106m", "\033[107m", "\033[1m", "\033[4m", "\033[24m",
|
||||
"\033[7m", "\033[27m", "\033[0m"};
|
||||
|
||||
constexpr int kBgColors[] = {
|
||||
static_cast<int>(AnsiCode::kLightRedBg),
|
||||
static_cast<int>(AnsiCode::kLightGreenBg),
|
||||
static_cast<int>(AnsiCode::kLightBlueBg),
|
||||
static_cast<int>(AnsiCode::kLightYellowBg),
|
||||
static_cast<int>(AnsiCode::kLightMagentaBg),
|
||||
static_cast<int>(AnsiCode::kLightCyanBg),
|
||||
static_cast<int>(AnsiCode::kDarkRedBg),
|
||||
static_cast<int>(AnsiCode::kDarkGreenBg),
|
||||
static_cast<int>(AnsiCode::kDarkBlueBg),
|
||||
static_cast<int>(AnsiCode::kDarkYellowBg),
|
||||
static_cast<int>(AnsiCode::kDarkMagentaBg),
|
||||
static_cast<int>(AnsiCode::kDarkCyanBg),
|
||||
};
|
||||
constexpr int kNumBgColors = static_cast<int>(std::size(kBgColors));
|
||||
|
||||
constexpr int kFgColors[] = {
|
||||
static_cast<int>(AnsiCode::kBlackFg),
|
||||
static_cast<int>(AnsiCode::kDarkGrayFg),
|
||||
static_cast<int>(AnsiCode::kLightGrayFg),
|
||||
};
|
||||
constexpr int kNumFgColors = static_cast<int>(std::size(kFgColors));
|
||||
|
||||
// Max length of filenames to print.
|
||||
constexpr size_t kMaxFilenameSize = 32;
|
||||
|
||||
// Number of most recent files to print.
|
||||
constexpr size_t kMaxNumRecentFiles = 32;
|
||||
|
||||
void PrintPadded(std::string line, size_t padded_size) {
|
||||
line.resize(padded_size, ' ');
|
||||
printf("%s\n", line.c_str());
|
||||
}
|
||||
|
||||
// Returns teh base name of |path|, shortened to |kMaxFilenameSize| characters.
|
||||
std::string GetShortFilename(const std::string path) {
|
||||
std::string filename = path::BaseName(path);
|
||||
if (filename.size() > kMaxFilenameSize)
|
||||
filename = filename.substr(0, kMaxFilenameSize - 2) + "..";
|
||||
return filename;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
StatsPrinter::StatsPrinter() = default;
|
||||
|
||||
StatsPrinter::~StatsPrinter() = default;
|
||||
|
||||
void StatsPrinter::InitFile(const std::string& path, size_t num_chunks) {
|
||||
path_to_file_[path].chunks.resize(num_chunks);
|
||||
}
|
||||
|
||||
void StatsPrinter::Clear() {
|
||||
recent_files_.clear();
|
||||
path_to_file_.clear();
|
||||
thread_id_to_color_.clear();
|
||||
num_threads_ = 0;
|
||||
|
||||
// Don't clear max_bandwidth_, it can't be recalculated, the others can.
|
||||
total_streamed_bytes_ = 0;
|
||||
total_cached_bytes_ = 0;
|
||||
}
|
||||
|
||||
void StatsPrinter::ResetBandwidthStats() {
|
||||
bandwidth_timer_.Reset();
|
||||
curr_bandwidth_ = 0;
|
||||
curr_streamed_bytes_ = 0;
|
||||
}
|
||||
|
||||
void StatsPrinter::RecordStreamedChunk(const std::string& path, size_t index,
|
||||
uint32_t size, size_t thread_id) {
|
||||
AddToRecentFiles(path);
|
||||
assert(path_to_file_.find(path) != path_to_file_.end());
|
||||
assert(index < path_to_file_[path].chunks.size());
|
||||
path_to_file_[path].chunks[index] =
|
||||
FileChunk(ChunkState::kStreamed, thread_id);
|
||||
curr_streamed_bytes_ += size;
|
||||
total_streamed_bytes_ += size;
|
||||
|
||||
// Update thread-to-color map.
|
||||
if (thread_id_to_color_.find(thread_id) == thread_id_to_color_.end())
|
||||
thread_id_to_color_[thread_id] = num_threads_++;
|
||||
}
|
||||
|
||||
void StatsPrinter::RecordCachedChunk(const std::string& path, size_t index,
|
||||
uint32_t size) {
|
||||
AddToRecentFiles(path);
|
||||
path_to_file_[path].chunks[index] = FileChunk(ChunkState::kCached, 0);
|
||||
total_cached_bytes_ += size;
|
||||
}
|
||||
|
||||
void StatsPrinter::Print() {
|
||||
int console_width = Util::GetConsoleWidth();
|
||||
if (console_width < static_cast<int>(kMaxFilenameSize) + 4) return;
|
||||
printf("\r");
|
||||
|
||||
size_t max_filename_size = 0;
|
||||
for (const std::string& path : recent_files_) {
|
||||
max_filename_size =
|
||||
std::max(max_filename_size, GetShortFilename(path).size());
|
||||
}
|
||||
|
||||
std::string line;
|
||||
for (const std::string& path : recent_files_) {
|
||||
const File& file = path_to_file_[path];
|
||||
line = GetShortFilename(path);
|
||||
line.resize(max_filename_size + 1, ' ');
|
||||
|
||||
// Fill the rest of the line with a visualization of the chunk states.
|
||||
size_t num_chunks = file.chunks.size();
|
||||
size_t print_width =
|
||||
std::min(num_chunks, static_cast<size_t>(console_width) - line.size());
|
||||
size_t num_chars = line.size() + print_width;
|
||||
|
||||
for (int n = 0; n < print_width; ++n) {
|
||||
// There can be multiple chunks per output char. Pick the most recent one.
|
||||
size_t begin_idx = n * num_chunks / print_width;
|
||||
size_t end_idx = (n + 1) * num_chunks / print_width;
|
||||
|
||||
absl::Time last_modified_time = file.chunks[begin_idx].modified_time;
|
||||
size_t last_modified_idx = begin_idx;
|
||||
for (size_t k = begin_idx + 1; k < end_idx; ++k) {
|
||||
if (last_modified_time < file.chunks[k].modified_time) {
|
||||
last_modified_time = file.chunks[k].modified_time;
|
||||
last_modified_idx = k;
|
||||
}
|
||||
}
|
||||
|
||||
// Print character depending on the chunk type:
|
||||
// - for chunks that have not been loaded.
|
||||
// X for chunks that have been streamed.
|
||||
// C for chunks that were cached.
|
||||
const FileChunk& chunk = file.chunks[last_modified_idx];
|
||||
if (chunk.state == ChunkState::kNotLoaded) {
|
||||
line += kAnsiCodeStr[static_cast<int>(AnsiCode::kDefault)];
|
||||
line.push_back('-');
|
||||
} else if (chunk.state == ChunkState::kCached) {
|
||||
line += kAnsiCodeStr[static_cast<int>(AnsiCode::kBlackFg)];
|
||||
line += kAnsiCodeStr[static_cast<int>(AnsiCode::kLightGrayBg)];
|
||||
line.push_back('C');
|
||||
} else {
|
||||
int col = thread_id_to_color_[chunk.thread_id];
|
||||
line += kAnsiCodeStr[kBgColors[col % kNumBgColors]];
|
||||
line += kAnsiCodeStr[kFgColors[(col / kNumBgColors) % kNumFgColors]];
|
||||
line.push_back('X');
|
||||
}
|
||||
|
||||
// Return to default coloring.
|
||||
line += kAnsiCodeStr[static_cast<int>(AnsiCode::kDefault)];
|
||||
}
|
||||
|
||||
// Fill with spaces and print.
|
||||
PrintPadded(std::move(line), line.size() + console_width - num_chars + 1);
|
||||
}
|
||||
|
||||
// Print bandwidth and other stats.
|
||||
UpdateBandwidthStats();
|
||||
|
||||
line = "Legend: (-) not loaded, (C) cached, (X) streamed (color=FUSE thread)";
|
||||
PrintPadded(std::move(line), console_width);
|
||||
|
||||
constexpr double MBd = 1024.0 * 1024.0;
|
||||
line = absl::StrFormat("Bandwidth %7.2f MB/sec (curr) %7.2f MB/sec (max)",
|
||||
curr_bandwidth_ / MBd, max_bandwidth_ / MBd);
|
||||
PrintPadded(std::move(line), console_width);
|
||||
|
||||
constexpr int MBi = 1024 * 1024;
|
||||
line =
|
||||
absl::StrFormat("Total data %6i MB (streamed) %7i MB (cached)",
|
||||
total_streamed_bytes_ / MBi, total_cached_bytes_ / MBi);
|
||||
PrintPadded(std::move(line), console_width);
|
||||
|
||||
// Move cursor up, so that printing again overwrites the old content.
|
||||
for (size_t n = 0; n < recent_files_.size() + 3; ++n) printf("\033[F");
|
||||
}
|
||||
|
||||
void StatsPrinter::AddToRecentFiles(const std::string& path) {
|
||||
if (std::find(recent_files_.begin(), recent_files_.end(), path) !=
|
||||
recent_files_.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
recent_files_.push_back(path);
|
||||
if (recent_files_.size() > kMaxNumRecentFiles) recent_files_.pop_front();
|
||||
}
|
||||
|
||||
void StatsPrinter::UpdateBandwidthStats() {
|
||||
double deltaSec = bandwidth_timer_.ElapsedSeconds();
|
||||
if (deltaSec < 1.0f) return;
|
||||
|
||||
curr_bandwidth_ = curr_streamed_bytes_ / deltaSec;
|
||||
if (max_bandwidth_ < curr_bandwidth_) max_bandwidth_ = curr_bandwidth_;
|
||||
|
||||
curr_streamed_bytes_ = 0;
|
||||
bandwidth_timer_.Reset();
|
||||
}
|
||||
|
||||
} // namespace cdc_ft
|
||||
135
manifest/stats_printer.h
Normal file
135
manifest/stats_printer.h
Normal file
@@ -0,0 +1,135 @@
|
||||
/*
|
||||
* Copyright 2022 Google LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MANIFEST_STATS_PRINTER_H_
|
||||
#define MANIFEST_STATS_PRINTER_H_
|
||||
|
||||
#include <deque>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/container/flat_hash_map.h"
|
||||
#include "absl/time/time.h"
|
||||
#include "common/stopwatch.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
// Collects and prints statistics about chunks streamed and cached per file.
|
||||
// Also prints general bandwidth and total bytes statistics.
|
||||
// Sample output (X's are colored by FUSE thread id, not shown):
|
||||
// gamedata.pak CCCXXXXXXXXXX------
|
||||
// lib.so XXX------
|
||||
// Legend: (-) not loaded, (C) cached, (X) streamed (color=FUSE thread)
|
||||
// Bandwidth 0.00 MB/sec (curr) 2.39 MB/sec (max)
|
||||
// Total data 3 MB (streamed) 1 MB (cached)
|
||||
// Each X/C/- represents a chunk unless the file is large and the chunks don't
|
||||
// fit into a single line. In that case, the X/C/- represents the most recently
|
||||
// accessed chunk in a group of several chunks.
|
||||
class StatsPrinter {
|
||||
public:
|
||||
StatsPrinter();
|
||||
~StatsPrinter();
|
||||
|
||||
// Registers a file for the given relative Unix |path| that has |num_chunks|
|
||||
// chunks.
|
||||
void InitFile(const std::string& path, size_t num_chunks);
|
||||
|
||||
// Clears all data expect max bandwidth.
|
||||
void Clear();
|
||||
|
||||
// Resets measurement of current bandwidth.
|
||||
void ResetBandwidthStats();
|
||||
|
||||
// Records a chunk that was streamed from the workstation.
|
||||
// |path| is the relative Unix path of a file that contains the chunk.
|
||||
// |index| is the index of the chunk.
|
||||
// |size| is the size of the chunk in bytes.
|
||||
// |thread_id| is the id of the thread that requested the chunk on the
|
||||
// gamelet, usually the hash of the std::thread::id.
|
||||
// Asserts that the file was registered with InitFile() and that |index| is
|
||||
// smaller than |num_chunks| passed to InitFile().
|
||||
void RecordStreamedChunk(const std::string& path, size_t index, uint32_t size,
|
||||
size_t thread_id);
|
||||
|
||||
// Records a chunk that is cached on the gamelet.
|
||||
// |path| is the relative Unix path of a file that contains the chunk.
|
||||
// |index| is the index of the chunk.
|
||||
// |size| is the size of the chunk in bytes.
|
||||
// Asserts that the file was registered with InitFile() and that |index| is
|
||||
// smaller than |num_chunks| passed to InitFile().
|
||||
void RecordCachedChunk(const std::string& path, size_t index, uint32_t size);
|
||||
|
||||
// Prints all statistics.
|
||||
void Print();
|
||||
|
||||
private:
|
||||
// Adds |path| to |recent_files_| if it's not already there and removes the
|
||||
// first entry if the list gets too large.
|
||||
void AddToRecentFiles(const std::string& path);
|
||||
|
||||
// Updates the current and total bandwidth stats.
|
||||
void UpdateBandwidthStats();
|
||||
|
||||
enum class ChunkState : uint8_t {
|
||||
kNotLoaded = 0, // Chunk is neither cached nor streamed.
|
||||
kStreamed = 1, // Chunk was streamed from the workstation.
|
||||
kCached = 2, // Chunk was cached on the gamelet.
|
||||
};
|
||||
|
||||
struct FileChunk {
|
||||
// Thread on gamelet that requested a streamed chunk.
|
||||
// Unused for cached chunks and chunks that are not loaded.
|
||||
size_t thread_id = 0;
|
||||
|
||||
// Time when this data was modified.
|
||||
absl::Time modified_time;
|
||||
|
||||
// Whether the chunk is cached, was streamed or is not loaded.
|
||||
ChunkState state = ChunkState::kNotLoaded;
|
||||
|
||||
FileChunk() {}
|
||||
explicit FileChunk(ChunkState state, size_t thread_id)
|
||||
: thread_id(thread_id), modified_time(absl::Now()), state(state) {}
|
||||
};
|
||||
|
||||
struct File {
|
||||
// All chunks in the file.
|
||||
std::vector<FileChunk> chunks;
|
||||
};
|
||||
|
||||
// LRU access list.
|
||||
std::deque<std::string> recent_files_;
|
||||
|
||||
// Map from relative Unix file path to all chunks in that file.
|
||||
using PathToFileMap = absl::flat_hash_map<std::string, File>;
|
||||
PathToFileMap path_to_file_;
|
||||
|
||||
// Assigns each thread a fixed color.
|
||||
std::unordered_map<size_t, int> thread_id_to_color_;
|
||||
int num_threads_ = 0;
|
||||
|
||||
Stopwatch bandwidth_timer_;
|
||||
double curr_bandwidth_ = 0;
|
||||
uint64_t curr_streamed_bytes_ = 0;
|
||||
|
||||
double max_bandwidth_ = 0;
|
||||
uint64_t total_streamed_bytes_ = 0;
|
||||
uint64_t total_cached_bytes_ = 0;
|
||||
};
|
||||
|
||||
} // namespace cdc_ft
|
||||
|
||||
#endif // MANIFEST_STATS_PRINTER_H_
|
||||
BIN
manifest/testdata/manifest_updater/executables/game.elf
vendored
Normal file
BIN
manifest/testdata/manifest_updater/executables/game.elf
vendored
Normal file
Binary file not shown.
1
manifest/testdata/manifest_updater/executables/normal.txt
vendored
Normal file
1
manifest/testdata/manifest_updater/executables/normal.txt
vendored
Normal file
@@ -0,0 +1 @@
|
||||
*waves hand* This is not an executable!
|
||||
17
manifest/testdata/manifest_updater/executables/script.sh
vendored
Normal file
17
manifest/testdata/manifest_updater/executables/script.sh
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
#!/bin/sh
|
||||
# Copyright 2022 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
echo Hi!
|
||||
BIN
manifest/testdata/manifest_updater/executables/win.exe
vendored
Normal file
BIN
manifest/testdata/manifest_updater/executables/win.exe
vendored
Normal file
Binary file not shown.
1
manifest/testdata/manifest_updater/non_empty/a.txt
vendored
Normal file
1
manifest/testdata/manifest_updater/non_empty/a.txt
vendored
Normal file
@@ -0,0 +1 @@
|
||||
aaaaaaaa
|
||||
1
manifest/testdata/manifest_updater/non_empty/subdir/b.txt
vendored
Normal file
1
manifest/testdata/manifest_updater/non_empty/subdir/b.txt
vendored
Normal file
@@ -0,0 +1 @@
|
||||
bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
|
||||
1
manifest/testdata/manifest_updater/non_empty/subdir/c.txt
vendored
Normal file
1
manifest/testdata/manifest_updater/non_empty/subdir/c.txt
vendored
Normal file
@@ -0,0 +1 @@
|
||||
c
|
||||
1
manifest/testdata/manifest_updater/non_empty/subdir/d.txt
vendored
Normal file
1
manifest/testdata/manifest_updater/non_empty/subdir/d.txt
vendored
Normal file
@@ -0,0 +1 @@
|
||||
d
|
||||
0
manifest/testdata/root.txt
vendored
Normal file
0
manifest/testdata/root.txt
vendored
Normal file
Reference in New Issue
Block a user