mirror of
https://github.com/nestriness/cdc-file-transfer.git
synced 2026-01-30 10:35:37 +02:00
Merge dynamic manifest updates to Github (#7)
This change introduces dynamic manifest updates to asset streaming. Asset streaming describes the directory to be streamed in a manifest, which is a proto definition of all content metadata. This information is sufficient to answer `stat` and `readdir` calls in the FUSE layer without additional round-trips to the workstation. When a directory is streamed for the first time, the corresponding manifest is created in two steps: 1. The directory is traversed recursively and the inode information of all contained files and directories is written to the manifest. 2. The content of all identified files is processed to generate each file's chunk list. This list is part of the definition of a file in the manifest. * The chunk boundaries are identified using our implementation of the FastCDC algorithm. * The hash of each chunk is calculated using the BLAKE3 hash function. * The length and hash of each chunk is appended to the file's chunk list. Prior to this change, when the user mounted a workstation directory on a client, the asset streaming server pushed an intermediate manifest to the gamelet as soon as step 1 was completed. At this point, the FUSE client started serving the virtual file system and was ready to answer `stat` and `readdir` calls. In case the FUSE client received any call that required file contents, such as `read`, it would block the caller until the server completed step 2 above and pushed the final manifest to the client. This works well for large directories (> 100GB) with a reasonable number of files (< 100k). But when dealing with millions of tiny files, creating the full manifest can take several minutes. With this change, we introduce dynamic manifest updates. When the FUSE layer receives an `open` or `readdir` request for a file or directory that is incomplete, it sends an RPC to the workstation about what information is missing from the manifest. The workstation identifies the corresponding file chunker or directory scanner tasks and moves them to the front of the queue. As soon as the task is completed, the workstation pushes an updated intermediate manifest to the client which now includes the information to serve the FUSE request. The queued FUSE request is resumed and returns the result to the caller. While this does not reduce the required time to build the final manifest, it splits up the work into smaller tasks. This allows us to interrupt the current work and prioritize those tasks which are required to handle an incoming request from the client. While this still takes a round-trip to the workstation plus the processing time for the task, an updated manifest is received within a few seconds, which is much better than blocking for several minutes. This latency is only visible when serving data while the manifest is still being created. The situation improves as the manifest creation on the workstation progresses. As soon as the final manifest is pushed, all metadata can be served directly without having to wait for pending tasks.
This commit is contained in:
@@ -111,8 +111,14 @@ cc_library(
|
||||
|
||||
cc_library(
|
||||
name = "manifest_updater",
|
||||
srcs = ["manifest_updater.cc"],
|
||||
hdrs = ["manifest_updater.h"],
|
||||
srcs = [
|
||||
"manifest_updater.cc",
|
||||
"pending_assets_queue.cc",
|
||||
],
|
||||
hdrs = [
|
||||
"manifest_updater.h",
|
||||
"pending_assets_queue.h",
|
||||
],
|
||||
deps = [
|
||||
":file_chunk_map",
|
||||
":manifest_builder",
|
||||
|
||||
@@ -91,15 +91,19 @@ absl::StatusOr<AssetBuilder> ManifestBuilder::GetOrCreateAsset(
|
||||
name = parts.back();
|
||||
parts.pop_back();
|
||||
}
|
||||
DirCreateMode create_mode =
|
||||
force_create ? DirCreateMode::kForceCreate : DirCreateMode::kCreate;
|
||||
DirCreateMode create_mode = type == AssetProto::UNKNOWN
|
||||
? DirCreateMode::kNoCreate
|
||||
: force_create ? DirCreateMode::kForceCreate
|
||||
: DirCreateMode::kCreate;
|
||||
AssetProto* dir;
|
||||
ASSIGN_OR_RETURN(dir, FindOrCreateDirPath(parts, create_mode),
|
||||
"Failed to create directory '%s'", JoinUnixPath(parts));
|
||||
|
||||
if (name.empty()) {
|
||||
// Special case: return the root directory for a DIRECTORY with empty name.
|
||||
if (type == AssetProto::DIRECTORY) return AssetBuilder(dir, std::string());
|
||||
if (type == AssetProto::DIRECTORY || type == AssetProto::UNKNOWN) {
|
||||
return AssetBuilder(dir, std::string());
|
||||
}
|
||||
return absl::InvalidArgumentError("Empty path given");
|
||||
}
|
||||
|
||||
@@ -108,6 +112,10 @@ absl::StatusOr<AssetBuilder> ManifestBuilder::GetOrCreateAsset(
|
||||
AssetProto* asset = nullptr;
|
||||
if (result.ok()) {
|
||||
asset = result.value();
|
||||
// If the asset type is unknown, we return any type.
|
||||
if (type == AssetProto::UNKNOWN) {
|
||||
return AssetBuilder(asset, path::DirName(unix_path));
|
||||
}
|
||||
// Verify that both assets are of the same type.
|
||||
if (asset->type() != type) {
|
||||
if (force_create) {
|
||||
@@ -125,11 +133,15 @@ absl::StatusOr<AssetBuilder> ManifestBuilder::GetOrCreateAsset(
|
||||
}
|
||||
// Create the asset if it was not found or it was deleted.
|
||||
if (!asset) {
|
||||
if (type == AssetProto::UNKNOWN) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrFormat("Asset '%s' does not exist.", path));
|
||||
}
|
||||
asset = dir->add_dir_assets();
|
||||
InitNewAsset(name, type, asset);
|
||||
if (created) *created = true;
|
||||
}
|
||||
return AssetBuilder(asset, path::ToUnix(path::DirName(path)));
|
||||
return AssetBuilder(asset, path::DirName(unix_path));
|
||||
}
|
||||
|
||||
absl::Status ManifestBuilder::DeleteAsset(const std::string& path) {
|
||||
|
||||
@@ -76,6 +76,10 @@ class ManifestBuilder {
|
||||
// asset is removed (recursively for directories) and a new asset with the
|
||||
// same name is created instead.
|
||||
//
|
||||
// When |type| is UNKNOWN, an existing assets of any type is returned, no new
|
||||
// asset is created when it does not exist, nor are any of the directories
|
||||
// that lead up to that asset.
|
||||
//
|
||||
// When |created| is given, then it will be set to true if that asset was
|
||||
// actually added, otherwise it will be set to false.
|
||||
absl::StatusOr<AssetBuilder> GetOrCreateAsset(const std::string& path,
|
||||
|
||||
@@ -344,6 +344,52 @@ TEST_F(ManifestBuilderTest, FilesDirsCreatedOnlyOnce) {
|
||||
VerifyAssets(assets, builder.ManifestId());
|
||||
}
|
||||
|
||||
TEST_F(ManifestBuilderTest, GetAssetsOfUnkonwnType) {
|
||||
ManifestBuilder builder(cdc_params_, &cache_);
|
||||
AssetMap assets;
|
||||
assets["file1.txt"] = {"a"};
|
||||
assets["dir1"] = {};
|
||||
|
||||
ASSERT_OK(AddAssets(assets, &builder));
|
||||
bool created = false;
|
||||
|
||||
// Get existing assets, force_create == false
|
||||
EXPECT_OK(builder.GetOrCreateAsset("file1.txt", AssetProto::UNKNOWN, false,
|
||||
&created));
|
||||
EXPECT_FALSE(created);
|
||||
EXPECT_OK(
|
||||
builder.GetOrCreateAsset("dir1", AssetProto::UNKNOWN, false, &created));
|
||||
EXPECT_FALSE(created);
|
||||
|
||||
// Get existing assets, force_create == true
|
||||
EXPECT_OK(builder.GetOrCreateAsset("file1.txt", AssetProto::UNKNOWN, true,
|
||||
&created));
|
||||
EXPECT_FALSE(created);
|
||||
EXPECT_OK(
|
||||
builder.GetOrCreateAsset("dir1", AssetProto::UNKNOWN, true, &created));
|
||||
EXPECT_FALSE(created);
|
||||
|
||||
// Get the root directory.
|
||||
EXPECT_OK(builder.GetOrCreateAsset("", AssetProto::UNKNOWN));
|
||||
|
||||
// Get non-existing file fails, force_create = false
|
||||
EXPECT_NOT_OK(
|
||||
builder.GetOrCreateAsset("does_not_exist", AssetProto::UNKNOWN, false));
|
||||
|
||||
// Get non-existing file fails, force_create = true
|
||||
EXPECT_NOT_OK(
|
||||
builder.GetOrCreateAsset("does_not_exist", AssetProto::UNKNOWN, true));
|
||||
|
||||
// Get non-existing file fails, no sub-directories are created.
|
||||
EXPECT_NOT_OK(builder.GetOrCreateAsset("new_dir1/does_not_exist",
|
||||
AssetProto::UNKNOWN, false));
|
||||
EXPECT_NOT_OK(builder.GetOrCreateAsset("new_dir2/does_not_exist",
|
||||
AssetProto::UNKNOWN, true));
|
||||
|
||||
ASSERT_OK(builder.Flush());
|
||||
VerifyAssets(assets, builder.ManifestId());
|
||||
}
|
||||
|
||||
TEST_F(ManifestBuilderTest, Deduplication) {
|
||||
ManifestBuilder builder(cdc_params_, &cache_);
|
||||
AssetMap assets;
|
||||
|
||||
@@ -79,7 +79,7 @@ absl::Status ManifestIterator::Open(const std::string& manifest_file) {
|
||||
std::string msg =
|
||||
absl::StrFormat("failed to open file '%s' for reading", manifest_file);
|
||||
if (errno) {
|
||||
status_ = ErrnoToCanonicalStatus(errno, msg);
|
||||
status_ = ErrnoToCanonicalStatus(errno, "%s", msg);
|
||||
} else {
|
||||
status_ = absl::UnknownError(msg);
|
||||
}
|
||||
|
||||
@@ -82,12 +82,14 @@ ManifestTestBase::ManifestTestBase(std::string base_dir)
|
||||
|
||||
std::vector<ManifestTestBase::AssetInfoForTest>
|
||||
ManifestTestBase::GetAllManifestAssets(ContentIdProto actual_manifest_id) {
|
||||
ContentIdProto manifest_id;
|
||||
EXPECT_OK(data_store_.GetProto(manifest_store_id_, &manifest_id));
|
||||
EXPECT_EQ(manifest_id, actual_manifest_id);
|
||||
ContentIdProto expected_manifest_id;
|
||||
EXPECT_OK(data_store_.GetProto(manifest_store_id_, &expected_manifest_id));
|
||||
EXPECT_EQ(ContentId::ToHexString(expected_manifest_id),
|
||||
ContentId::ToHexString(actual_manifest_id))
|
||||
<< DumpDataStoreProtos();
|
||||
|
||||
ManifestIterator manifest_iter(&data_store_);
|
||||
EXPECT_OK(manifest_iter.Open(manifest_id));
|
||||
EXPECT_OK(manifest_iter.Open(expected_manifest_id));
|
||||
|
||||
std::vector<AssetInfoForTest> assets;
|
||||
const AssetProto* entry;
|
||||
@@ -168,10 +170,10 @@ void ManifestTestBase::ExpectAssetInfosEqual(std::vector<AssetInfoForTest> a,
|
||||
void ManifestTestBase::ExpectManifestEquals(
|
||||
std::initializer_list<std::string> rel_paths,
|
||||
const ContentIdProto& actual_manifest_id) {
|
||||
std::vector<AssetInfoForTest> manifest_ais =
|
||||
std::vector<AssetInfoForTest> actual_ais =
|
||||
GetAllManifestAssets(actual_manifest_id);
|
||||
std::vector<AssetInfoForTest> expected_ais = MakeAssetInfos(rel_paths);
|
||||
ExpectAssetInfosEqual(manifest_ais, expected_ais);
|
||||
ExpectAssetInfosEqual(actual_ais, expected_ais);
|
||||
}
|
||||
|
||||
bool ManifestTestBase::InProgress(const ContentIdProto& manifest_id,
|
||||
|
||||
@@ -90,7 +90,7 @@ class ManifestTestBase : public ::testing::Test {
|
||||
// Compares the contents of the manifest to the real files at |rel_paths|.
|
||||
// The paths are relative to |cfg_.src_dir|.
|
||||
void ExpectManifestEquals(std::initializer_list<std::string> rel_paths,
|
||||
const ContentIdProto& actual_manifest_id);
|
||||
const ContentIdProto& got_manifest_id);
|
||||
|
||||
// Returns true if the file at Unix |path| contains file chunks in the
|
||||
// manifest referenced by |manifest_id|.
|
||||
|
||||
@@ -35,6 +35,16 @@
|
||||
namespace cdc_ft {
|
||||
namespace {
|
||||
|
||||
// A generic finalizer that invokes a given function at the end of its lifetime.
|
||||
class Finalizer {
|
||||
public:
|
||||
explicit Finalizer(std::function<void()> finalize) : finalize_(finalize) {}
|
||||
~Finalizer() { finalize_(); }
|
||||
|
||||
private:
|
||||
std::function<void()> finalize_;
|
||||
};
|
||||
|
||||
// Returns AssetInfos for all files and dirs in |src_dir| + |rel_path|. Does not
|
||||
// recurse into sub-directories.
|
||||
absl::Status GetAllSrcAssets(const std::string& src_dir,
|
||||
@@ -104,49 +114,49 @@ void AssetInfo::AppendMoveChunks(RepeatedChunkRefProto* list,
|
||||
// Common fields for tasks that fill in manifest data.
|
||||
class ManifestTask : public Task {
|
||||
public:
|
||||
ManifestTask(std::string src_dir, std::string relative_unix_path,
|
||||
std::string filename)
|
||||
: src_dir_(std::move(src_dir)),
|
||||
rel_unix_path_(std::move(relative_unix_path)),
|
||||
filename_(std::move(filename)) {}
|
||||
ManifestTask(std::string src_dir, PendingAsset asset)
|
||||
: src_dir_(std::move(src_dir)), asset_(std::move(asset)) {}
|
||||
|
||||
// Relative unix path of the directory containing the file or directory for
|
||||
// this task.
|
||||
const std::string& RelativeUnixPath() const { return rel_unix_path_; }
|
||||
const std::string& RelativeUnixPath() const { return asset_.relative_path; }
|
||||
|
||||
// Relative unix path of the file or directory for this task.
|
||||
std::string RelativeUnixFilePath() const {
|
||||
return path::JoinUnix(rel_unix_path_, filename_);
|
||||
return path::JoinUnix(RelativeUnixPath(), Filename());
|
||||
}
|
||||
|
||||
// Name of the file or directory to process with this task.
|
||||
const std::string& Filename() const { return filename_; }
|
||||
const std::string& Filename() const { return asset_.filename; }
|
||||
|
||||
// Full path of the file or directory to process with this task.
|
||||
std::string FilePath() const {
|
||||
return path::Join(src_dir_, path::ToNative(rel_unix_path_), filename_);
|
||||
return path::Join(src_dir_, path::ToNative(RelativeUnixPath()),
|
||||
asset_.filename);
|
||||
}
|
||||
|
||||
// Returns the final status of the task.
|
||||
// Should not be accessed before the task is finished.
|
||||
const absl::Status& Status() const { return status_; }
|
||||
|
||||
// Returns whether or not this asset is explicitly prioritized.
|
||||
bool Prioritized() const { return asset_.prioritized; }
|
||||
|
||||
// Returns the pending asset's deadline.
|
||||
absl::Time Deadline() const { return asset_.deadline; }
|
||||
|
||||
protected:
|
||||
const std::string src_dir_;
|
||||
const std::string rel_unix_path_;
|
||||
const std::string filename_;
|
||||
|
||||
const PendingAsset asset_;
|
||||
absl::Status status_;
|
||||
};
|
||||
|
||||
// ThreadPool task that runs the CDC chunker on a given file.
|
||||
class FileChunkerTask : public ManifestTask {
|
||||
public:
|
||||
FileChunkerTask(std::string src_dir, std::string relative_path,
|
||||
std::string filename, const fastcdc::Config* cfg,
|
||||
Buffer buffer)
|
||||
: ManifestTask(std::move(src_dir), std::move(relative_path),
|
||||
std::move(filename)),
|
||||
FileChunkerTask(std::string src_dir, PendingAsset asset,
|
||||
const fastcdc::Config* cfg, Buffer buffer)
|
||||
: ManifestTask(std::move(src_dir), std::move(asset)),
|
||||
cfg_(cfg),
|
||||
buffer_(std::move(buffer)) {
|
||||
assert(cfg_->max_size > 0);
|
||||
@@ -223,11 +233,9 @@ class FileChunkerTask : public ManifestTask {
|
||||
// ThreadPool task that creates assets for the contents of a directory.
|
||||
class DirScannerTask : public ManifestTask {
|
||||
public:
|
||||
DirScannerTask(std::string src_dir, std::string relative_path,
|
||||
std::string filename, AssetBuilder dir,
|
||||
DirScannerTask(std::string src_dir, PendingAsset asset, AssetBuilder dir,
|
||||
DataStoreReader* data_store)
|
||||
: ManifestTask(std::move(src_dir), std::move(relative_path),
|
||||
std::move(filename)),
|
||||
: ManifestTask(std::move(src_dir), std::move(asset)),
|
||||
dir_(dir),
|
||||
data_store_(data_store) {}
|
||||
|
||||
@@ -419,15 +427,16 @@ absl::Status ManifestUpdater::IsValidDir(std::string dir) {
|
||||
}
|
||||
|
||||
ManifestUpdater::ManifestUpdater(DataStoreWriter* data_store, UpdaterConfig cfg)
|
||||
: data_store_(data_store), cfg_(std::move(cfg)) {
|
||||
: data_store_(data_store),
|
||||
cfg_(std::move(cfg)),
|
||||
queue_(kMinAssetProcessingTime) {
|
||||
path::EnsureEndsWithPathSeparator(&cfg_.src_dir);
|
||||
}
|
||||
|
||||
ManifestUpdater::~ManifestUpdater() = default;
|
||||
|
||||
absl::Status ManifestUpdater::UpdateAll(
|
||||
FileChunkMap* file_chunks,
|
||||
PushIntermediateManifest push_intermediate_manifest) {
|
||||
absl::Status ManifestUpdater::UpdateAll(FileChunkMap* file_chunks,
|
||||
PushManifestHandler push_handler) {
|
||||
RETURN_IF_ERROR(ManifestUpdater::IsValidDir(cfg_.src_dir));
|
||||
|
||||
// Don't use the Windows localized time from path::GetStats.
|
||||
@@ -441,9 +450,8 @@ absl::Status ManifestUpdater::UpdateAll(
|
||||
|
||||
std::vector<Operation> operations{{Operator::kAdd, std::move(ri)}};
|
||||
|
||||
absl::Status status =
|
||||
Update(&operations, file_chunks, push_intermediate_manifest,
|
||||
/*recursive=*/true);
|
||||
absl::Status status = Update(&operations, file_chunks, push_handler,
|
||||
/*recursive=*/true);
|
||||
|
||||
if (status.ok() || !absl::IsUnavailable(status)) return status;
|
||||
|
||||
@@ -456,7 +464,7 @@ absl::Status ManifestUpdater::UpdateAll(
|
||||
RETURN_IF_ERROR(data_store_->Wipe());
|
||||
file_chunks->Clear();
|
||||
|
||||
RETURN_IF_ERROR(Update(&operations, file_chunks, push_intermediate_manifest,
|
||||
RETURN_IF_ERROR(Update(&operations, file_chunks, push_handler,
|
||||
/*recursive=*/true),
|
||||
"Failed to build manifest from scratch");
|
||||
|
||||
@@ -495,29 +503,97 @@ ContentIdProto ManifestUpdater::DefaultManifestId() {
|
||||
return manifest_id_;
|
||||
}
|
||||
|
||||
size_t ManifestUpdater::QueueTasks(Threadpool* pool,
|
||||
const fastcdc::Config* cdc_cfg,
|
||||
ManifestBuilder* manifest_builder) {
|
||||
const size_t max_tasks_queued = MaxQueuedTasks(*pool);
|
||||
size_t num_tasks_queued = 0;
|
||||
while (pool->NumQueuedTasks() < max_tasks_queued && !queue_.empty() &&
|
||||
!buffers_.empty()) {
|
||||
PendingAsset asset = std::move(queue_.front());
|
||||
absl::StatusOr<AssetBuilder> dir;
|
||||
queue_.pop_front();
|
||||
absl::Status ManifestUpdater::FlushAndPushManifest(
|
||||
FileChunkMap* file_chunks,
|
||||
std::unordered_set<ContentIdProto>* manifest_content_ids,
|
||||
PushManifestHandler push_manifest_handler) {
|
||||
file_chunks->FlushUpdates();
|
||||
ASSIGN_OR_RETURN(manifest_id_, manifest_builder_->Flush(),
|
||||
"Failed to flush intermediate manifest");
|
||||
// Add all content IDs that were just written back.
|
||||
manifest_content_ids->insert(manifest_builder_->FlushedContentIds().begin(),
|
||||
manifest_builder_->FlushedContentIds().end());
|
||||
if (push_manifest_handler) push_manifest_handler(manifest_id_);
|
||||
last_manifest_flush_ = absl::Now();
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
bool ManifestUpdater::WantManifestFlushed(
|
||||
PushManifestHandler push_manifest_handler) const {
|
||||
return push_manifest_handler && flush_deadline_ < absl::Now() &&
|
||||
last_manifest_flush_ + kMinDelayBetweenFlush < absl::Now();
|
||||
}
|
||||
|
||||
absl::Status ManifestUpdater::MaybeFlushAndPushManifest(
|
||||
size_t dir_scanner_tasks_queued, FileChunkMap* file_chunks,
|
||||
std::unordered_set<ContentIdProto>* manifest_content_ids,
|
||||
PushManifestHandler push_manifest) {
|
||||
// Flush only if there are no DirScannerTask active.
|
||||
if (dir_scanner_tasks_queued == 0 && WantManifestFlushed(push_manifest)) {
|
||||
flush_deadline_ = absl::InfiniteFuture();
|
||||
return FlushAndPushManifest(file_chunks, manifest_content_ids,
|
||||
push_manifest);
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
void ManifestUpdater::AddPriorityAssets(std::vector<std::string> rel_paths) {
|
||||
absl::MutexLock lock(&priority_mutex_);
|
||||
absl::Time now = absl::Now();
|
||||
for (std::string& rel_path : rel_paths) {
|
||||
priority_assets_.push_back(PriorityAsset{std::move(rel_path), now});
|
||||
}
|
||||
}
|
||||
|
||||
void ManifestUpdater::PrioritizeQueuedAssets() {
|
||||
std::vector<PriorityAsset> prio_assets;
|
||||
{
|
||||
absl::MutexLock lock(&priority_mutex_);
|
||||
if (priority_assets_.empty()) return;
|
||||
std::swap(prio_assets, priority_assets_);
|
||||
}
|
||||
|
||||
absl::Time deadline = queue_.Prioritize(prio_assets, manifest_builder_.get());
|
||||
if (deadline < flush_deadline_) flush_deadline_ = deadline;
|
||||
}
|
||||
|
||||
ManifestUpdater::QueueTasksResult ManifestUpdater::QueueTasks(
|
||||
bool drain_dir_scanner_tasks, Threadpool* pool,
|
||||
const fastcdc::Config* cdc_cfg) {
|
||||
// Prioritize requested assets before queuing new tasks.
|
||||
PrioritizeQueuedAssets();
|
||||
const size_t max_tasks_queued = MaxQueuedTasks(*pool);
|
||||
size_t file_chunker_tasks = 0, dir_scanner_tasks = 0;
|
||||
|
||||
// Skip DIRECTORY assets if we should drain DirScannerTasks.
|
||||
PendingAssetsQueue::AcceptFunc accept = nullptr;
|
||||
if (drain_dir_scanner_tasks) {
|
||||
accept = [](const PendingAsset& p) {
|
||||
return p.type != AssetProto::DIRECTORY;
|
||||
};
|
||||
}
|
||||
|
||||
absl::StatusOr<AssetBuilder> dir;
|
||||
PendingAsset asset;
|
||||
|
||||
while (pool->NumQueuedTasks() < max_tasks_queued && !buffers_.empty() &&
|
||||
queue_.Dequeue(&asset, accept)) {
|
||||
switch (asset.type) {
|
||||
case AssetProto::FILE:
|
||||
pool->QueueTask(std::make_unique<FileChunkerTask>(
|
||||
cfg_.src_dir, std::move(asset.relative_path),
|
||||
std::move(asset.filename), cdc_cfg, std::move(buffers_.back())));
|
||||
cfg_.src_dir, std::move(asset), cdc_cfg,
|
||||
std::move(buffers_.back())));
|
||||
buffers_.pop_back();
|
||||
++file_chunker_tasks;
|
||||
break;
|
||||
|
||||
case AssetProto::DIRECTORY:
|
||||
dir = manifest_builder->GetOrCreateAsset(
|
||||
// Flushing the manifest may invalidate the pointers to the directory
|
||||
// proto returned from GetOrCreateAsset(), so the manifest cannot be
|
||||
// flushed as long as DirScannerTask are in the queue.
|
||||
dir = manifest_builder_->GetOrCreateAsset(
|
||||
path::JoinUnix(asset.relative_path, asset.filename),
|
||||
AssetProto::DIRECTORY, true);
|
||||
AssetProto::DIRECTORY, /*force_create=*/true);
|
||||
if (!dir.ok()) {
|
||||
LOG_ERROR(
|
||||
"Failed to locate directory '%s' in the manifest, skipping it: "
|
||||
@@ -526,8 +602,9 @@ size_t ManifestUpdater::QueueTasks(Threadpool* pool,
|
||||
continue;
|
||||
}
|
||||
pool->QueueTask(std::make_unique<DirScannerTask>(
|
||||
cfg_.src_dir, std::move(asset.relative_path),
|
||||
std::move(asset.filename), std::move(dir.value()), data_store_));
|
||||
cfg_.src_dir, std::move(asset), std::move(dir.value()),
|
||||
data_store_));
|
||||
++dir_scanner_tasks;
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -535,15 +612,13 @@ size_t ManifestUpdater::QueueTasks(Threadpool* pool,
|
||||
AssetProto::Type_Name(asset.type), asset.relative_path);
|
||||
continue;
|
||||
}
|
||||
++num_tasks_queued;
|
||||
}
|
||||
return num_tasks_queued;
|
||||
return QueueTasksResult{dir_scanner_tasks, file_chunker_tasks};
|
||||
}
|
||||
|
||||
absl::Status ManifestUpdater::ApplyOperations(
|
||||
std::vector<Operation>* operations, FileChunkMap* file_chunks,
|
||||
ManifestBuilder* manifest_builder, AssetBuilder* parent, bool recursive) {
|
||||
assert(manifest_builder != nullptr);
|
||||
AssetBuilder* parent, absl::Time deadline, bool recursive) {
|
||||
if (operations->empty()) return absl::OkStatus();
|
||||
|
||||
// First, handle all deletions to make the outcome independent of the order of
|
||||
@@ -561,7 +636,7 @@ absl::Status ManifestUpdater::ApplyOperations(
|
||||
// skipped.
|
||||
continue;
|
||||
}
|
||||
RETURN_IF_ERROR(manifest_builder->DeleteAsset(ai.path),
|
||||
RETURN_IF_ERROR(manifest_builder_->DeleteAsset(ai.path),
|
||||
"Failed to delete asset '%s' from manifest", ai.path);
|
||||
last_deleted = &ai.path;
|
||||
}
|
||||
@@ -591,8 +666,8 @@ absl::Status ManifestUpdater::ApplyOperations(
|
||||
|
||||
case Operator::kUpdate:
|
||||
ASSIGN_OR_RETURN(asset_builder,
|
||||
manifest_builder->GetOrCreateAsset(ai.path, ai.type,
|
||||
true, &created),
|
||||
manifest_builder_->GetOrCreateAsset(ai.path, ai.type,
|
||||
true, &created),
|
||||
"Failed to add '%s' to the manifest", ai.path);
|
||||
break;
|
||||
}
|
||||
@@ -609,29 +684,30 @@ absl::Status ManifestUpdater::ApplyOperations(
|
||||
asset_builder.SetFileSize(ai.size);
|
||||
// Queue chunker tasks for files.
|
||||
asset_builder.SetInProgress(true);
|
||||
} else if (recursive && ai.type == AssetProto::DIRECTORY) {
|
||||
// We are recursing into all sub-directories, so we add queue up the
|
||||
// child directory for scanning.
|
||||
asset_builder.SetInProgress(true);
|
||||
} else if (ai.type == AssetProto::DIRECTORY) {
|
||||
asset_builder.SetPermissions(ManifestBuilder::kDefaultDirPerms);
|
||||
// We are recursing into all sub-directories, so we queue up the child
|
||||
// directory for scanning.
|
||||
if (recursive) asset_builder.SetInProgress(true);
|
||||
}
|
||||
|
||||
// If the asset is marked as in-progress, we need to queue it up.
|
||||
if (asset_builder.InProgress()) {
|
||||
queue_.emplace_back(ai.type, asset_builder.RelativePath(),
|
||||
asset_builder.Name());
|
||||
PendingAsset pending(ai.type, asset_builder.RelativePath(),
|
||||
asset_builder.Name(), deadline);
|
||||
queue_.Add(std::move(pending));
|
||||
}
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ManifestUpdater::HandleFileChunkerResult(
|
||||
FileChunkerTask* task, FileChunkMap* file_chunks,
|
||||
ManifestBuilder* manifest_builder) {
|
||||
FileChunkerTask* task, FileChunkMap* file_chunks) {
|
||||
const std::string rel_file_path = task->RelativeUnixFilePath();
|
||||
buffers_.emplace_back(task->ReleaseBuffer());
|
||||
|
||||
AssetBuilder asset_builder;
|
||||
ASSIGN_OR_RETURN(asset_builder, manifest_builder->GetOrCreateAsset(
|
||||
ASSIGN_OR_RETURN(asset_builder, manifest_builder_->GetOrCreateAsset(
|
||||
rel_file_path, AssetProto::FILE));
|
||||
asset_builder.SetInProgress(false);
|
||||
if (!task->Status().ok()) {
|
||||
@@ -663,7 +739,6 @@ absl::Status ManifestUpdater::HandleFileChunkerResult(
|
||||
|
||||
absl::Status ManifestUpdater::HandleDirScannerResult(
|
||||
DirScannerTask* task, FileChunkMap* file_chunks,
|
||||
ManifestBuilder* manifest_builder,
|
||||
std::unordered_set<ContentIdProto>* manifest_content_ids) {
|
||||
// Include the error in the stats, but we can still try to process the
|
||||
// (partial) results.
|
||||
@@ -671,21 +746,26 @@ absl::Status ManifestUpdater::HandleDirScannerResult(
|
||||
++stats_.total_dirs_failed;
|
||||
}
|
||||
|
||||
// If there's a chance we can do more work within the parent's deadline, we
|
||||
// propagate the deadline to the children.
|
||||
// TODO(chrschn) Use SteadyClock instead of the system clock.
|
||||
absl::Time deadline = task->Deadline() > absl::Now() ? task->Deadline()
|
||||
: absl::InfiniteFuture();
|
||||
|
||||
// DirScannerTasks are inherently recursive.
|
||||
RETURN_IF_ERROR(ApplyOperations(task->Operations(), file_chunks,
|
||||
manifest_builder, task->Dir(),
|
||||
/*recursive=*/true));
|
||||
RETURN_IF_ERROR(ApplyOperations(task->Operations(), file_chunks, task->Dir(),
|
||||
deadline, /*recursive=*/true));
|
||||
task->Dir()->SetInProgress(false);
|
||||
// Union all manifest chunk content IDs.
|
||||
assert(manifest_content_ids != nullptr);
|
||||
manifest_content_ids->insert(task->ManifestContentIds()->begin(),
|
||||
task->ManifestContentIds()->end());
|
||||
return task->Status();
|
||||
}
|
||||
|
||||
absl::Status ManifestUpdater::Update(
|
||||
OperationList* operations, FileChunkMap* file_chunks,
|
||||
PushIntermediateManifest push_intermediate_manifest, bool recursive) {
|
||||
absl::Status ManifestUpdater::Update(OperationList* operations,
|
||||
FileChunkMap* file_chunks,
|
||||
PushManifestHandler push_handler,
|
||||
bool recursive) {
|
||||
Stopwatch sw;
|
||||
LOG_INFO(
|
||||
"Updating manifest for '%s': applying %u changes, "
|
||||
@@ -694,11 +774,20 @@ absl::Status ManifestUpdater::Update(
|
||||
|
||||
stats_ = UpdaterStats();
|
||||
|
||||
// Collects the content IDs that make up the manifest when recursing. They are
|
||||
// used to prune the manifest cache directory at the end of the Update()
|
||||
// process.
|
||||
std::unordered_set<ContentIdProto> manifest_content_ids;
|
||||
|
||||
CdcParamsProto cdc_params;
|
||||
cdc_params.set_min_chunk_size(cfg_.min_chunk_size);
|
||||
cdc_params.set_avg_chunk_size(cfg_.avg_chunk_size);
|
||||
cdc_params.set_max_chunk_size(cfg_.max_chunk_size);
|
||||
ManifestBuilder manifest_builder(cdc_params, data_store_);
|
||||
manifest_builder_ =
|
||||
std::make_unique<ManifestBuilder>(cdc_params, data_store_);
|
||||
|
||||
// Release the ManifestBuilder at the end of this function to free memory.
|
||||
Finalizer finalizer([b = &manifest_builder_]() { b->reset(); });
|
||||
|
||||
// Load the manifest id from the store.
|
||||
ContentIdProto manifest_id;
|
||||
@@ -711,17 +800,17 @@ absl::Status ManifestUpdater::Update(
|
||||
// A non-existing manifest is not an issue, just build it from scratch.
|
||||
LOG_INFO("No cached manifest found. Building from scratch.");
|
||||
} else {
|
||||
RETURN_IF_ERROR(manifest_builder.LoadManifest(manifest_id),
|
||||
RETURN_IF_ERROR(manifest_builder_->LoadManifest(manifest_id),
|
||||
"Failed to load manifest with id '%s'",
|
||||
ContentId::ToHexString(manifest_id));
|
||||
// The CDC params might have changed when loading the manifest.
|
||||
if (ValidateCdcParams(manifest_builder.Manifest()->cdc_params())) {
|
||||
cdc_params = manifest_builder.Manifest()->cdc_params();
|
||||
if (ValidateCdcParams(manifest_builder_->Manifest()->cdc_params())) {
|
||||
cdc_params = manifest_builder_->Manifest()->cdc_params();
|
||||
}
|
||||
}
|
||||
|
||||
RETURN_IF_ERROR(ApplyOperations(operations, file_chunks, &manifest_builder,
|
||||
nullptr, recursive));
|
||||
RETURN_IF_ERROR(ApplyOperations(operations, file_chunks, nullptr,
|
||||
absl::InfiniteFuture(), recursive));
|
||||
|
||||
Threadpool pool(cfg_.num_threads > 0 ? cfg_.num_threads
|
||||
: std::thread::hardware_concurrency());
|
||||
@@ -730,36 +819,37 @@ absl::Status ManifestUpdater::Update(
|
||||
buffers_.reserve(max_queued_tasks);
|
||||
while (buffers_.size() < max_queued_tasks)
|
||||
buffers_.emplace_back(cfg_.max_chunk_size << 1);
|
||||
size_t num_tasks_queued = 0;
|
||||
size_t total_tasks_queued = 0, scanner_tasks_queued = 0;
|
||||
|
||||
// Collect the content IDs that make up the manifest when recursing. They are
|
||||
// used to prune the manifest cache directory in the end.
|
||||
std::unordered_set<ContentIdProto> manifest_content_ids;
|
||||
|
||||
// Push intermediate manifest if there are queued chunker tasks.
|
||||
if (push_intermediate_manifest && !queue_.empty()) {
|
||||
file_chunks->FlushUpdates();
|
||||
ASSIGN_OR_RETURN(manifest_id_, manifest_builder.Flush(),
|
||||
"Failed to flush intermediate manifest");
|
||||
// Add all content IDs that were just written back.
|
||||
manifest_content_ids.insert(manifest_builder.FlushedContentIds().begin(),
|
||||
manifest_builder.FlushedContentIds().end());
|
||||
push_intermediate_manifest(manifest_id_);
|
||||
// Push intermediate manifest if there are queued tasks.
|
||||
if (push_handler && !queue_.Empty()) {
|
||||
RETURN_IF_ERROR(
|
||||
FlushAndPushManifest(file_chunks, &manifest_content_ids, push_handler));
|
||||
}
|
||||
|
||||
fastcdc::Config cdc_cfg = CdcConfigFromProto(cdc_params);
|
||||
|
||||
// Wait for the chunker tasks and update file assets.
|
||||
while (!queue_.empty() || num_tasks_queued > 0) {
|
||||
num_tasks_queued += QueueTasks(&pool, &cdc_cfg, &manifest_builder);
|
||||
// Wait for the chunker and scanner tasks.
|
||||
while (!queue_.Empty() || total_tasks_queued > 0) {
|
||||
RETURN_IF_ERROR(MaybeFlushAndPushManifest(scanner_tasks_queued, file_chunks,
|
||||
&manifest_content_ids,
|
||||
push_handler));
|
||||
// Flushing the manifest may invalidate the AssetProto pointers held by the
|
||||
// queued DirScannerTask. If the manifest should be flushed, we drain the
|
||||
// queue from those tasks so that the push is safe.
|
||||
bool drain_dir_scanners = WantManifestFlushed(push_handler);
|
||||
|
||||
QueueTasksResult queued = QueueTasks(drain_dir_scanners, &pool, &cdc_cfg);
|
||||
total_tasks_queued += queued.dir_scanners + queued.file_chunkers;
|
||||
scanner_tasks_queued += queued.dir_scanners;
|
||||
|
||||
std::unique_ptr<Task> task = pool.GetCompletedTask();
|
||||
assert(num_tasks_queued > 0);
|
||||
--num_tasks_queued;
|
||||
assert(total_tasks_queued > 0);
|
||||
--total_tasks_queued;
|
||||
|
||||
FileChunkerTask* chunker_task = dynamic_cast<FileChunkerTask*>(task.get());
|
||||
if (chunker_task) {
|
||||
status =
|
||||
HandleFileChunkerResult(chunker_task, file_chunks, &manifest_builder);
|
||||
status = HandleFileChunkerResult(chunker_task, file_chunks);
|
||||
|
||||
if (!status.ok()) {
|
||||
LOG_ERROR("Failed to process file '%s': %s", chunker_task->FilePath(),
|
||||
@@ -770,8 +860,10 @@ absl::Status ManifestUpdater::Update(
|
||||
|
||||
DirScannerTask* scanner_task = dynamic_cast<DirScannerTask*>(task.get());
|
||||
if (scanner_task) {
|
||||
assert(scanner_tasks_queued > 0);
|
||||
--scanner_tasks_queued;
|
||||
status = HandleDirScannerResult(scanner_task, file_chunks,
|
||||
&manifest_builder, &manifest_content_ids);
|
||||
&manifest_content_ids);
|
||||
if (!status.ok()) {
|
||||
LOG_ERROR("Failed to process directory '%s': %s",
|
||||
scanner_task->FilePath(), status.ToString());
|
||||
@@ -780,25 +872,23 @@ absl::Status ManifestUpdater::Update(
|
||||
}
|
||||
}
|
||||
|
||||
file_chunks->FlushUpdates();
|
||||
ASSIGN_OR_RETURN(manifest_id_, manifest_builder.Flush(),
|
||||
"Failed to flush manifest");
|
||||
|
||||
// Don't pass in the push_handler here. We first want to write back the new
|
||||
// manifest ID to the data store before we call the handler.
|
||||
RETURN_IF_ERROR(
|
||||
FlushAndPushManifest(file_chunks, &manifest_content_ids, nullptr));
|
||||
// Save the manifest id to the store.
|
||||
std::string id_str = manifest_id_.SerializeAsString();
|
||||
RETURN_IF_ERROR(
|
||||
data_store_->Put(GetManifestStoreId(), id_str.data(), id_str.size()),
|
||||
"Failed to store manifest id");
|
||||
if (push_handler) push_handler(manifest_id_);
|
||||
|
||||
// Remove manifest chunks that are no longer referenced when recursing through
|
||||
// all sub-directories. This also makes sure that all referenced manifest
|
||||
// chunks are present.
|
||||
if (status.ok() && recursive) {
|
||||
// Retain the chunk that stores the manifest ID.
|
||||
manifest_content_ids.insert(ManifestUpdater::GetManifestStoreId());
|
||||
// Add all content IDs that were just written back.
|
||||
manifest_content_ids.insert(manifest_builder.FlushedContentIds().begin(),
|
||||
manifest_builder.FlushedContentIds().end());
|
||||
manifest_content_ids.insert(GetManifestStoreId());
|
||||
status = data_store_->Prune(std::move(manifest_content_ids));
|
||||
if (!status.ok()) {
|
||||
// Signal to the caller that the manifest needs to be rebuilt from
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "manifest/asset_builder.h"
|
||||
#include "manifest/file_chunk_map.h"
|
||||
#include "manifest/manifest_proto_defs.h"
|
||||
#include "manifest/pending_assets_queue.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
namespace fastcdc {
|
||||
@@ -140,7 +141,7 @@ class ManifestUpdater {
|
||||
// Returns an error if |dir| does not exist or it is not a directory.
|
||||
static absl::Status IsValidDir(std::string dir);
|
||||
|
||||
using PushIntermediateManifest =
|
||||
using PushManifestHandler =
|
||||
std::function<void(const ContentIdProto& manifest_id)>;
|
||||
|
||||
// |data_store| is used to store manifest chunks. File data chunks are not
|
||||
@@ -156,27 +157,29 @@ class ManifestUpdater {
|
||||
// Reads the full source directory and syncs the manifest to it. Prunes old,
|
||||
// unreferenced manifest chunks. Updates and flushes |file_chunks|.
|
||||
//
|
||||
// If a valid |push_intermediate_manifest| is passed, then a manifest is
|
||||
// flushed after the root directory has been added, but before all files and
|
||||
// If a valid |push_handler| is passed, then a manifest is flushed at least
|
||||
// twice and the handler is called:
|
||||
// - after the root directory has been added, but before all files and
|
||||
// directories have been processed. That means, the manifest does not yet
|
||||
// contains all assets, all incomplete assets are set to in-progress.
|
||||
// - after an asset that was prioritized with AddPriorityAssets() has been
|
||||
// completed.
|
||||
// - at the end of the update process in case of success
|
||||
absl::Status UpdateAll(FileChunkMap* file_chunks,
|
||||
PushIntermediateManifest push_intermediate_manifest =
|
||||
PushIntermediateManifest());
|
||||
PushManifestHandler push_handler = nullptr);
|
||||
|
||||
// Updates the manifest by applying the |operations| list. Deletions are
|
||||
// handled first to make the outcome independent of the order in the list.
|
||||
// Also updates and flushes |file_chunks| with the changes made. See
|
||||
// UpdateAll() for a description of |push_intermediate_manifest|.
|
||||
// Also updates and flushes |file_chunks| with the changes made. The
|
||||
// |push_handler| is called at least twice during the operation and at the
|
||||
// end, see UpdateAll() for more details.
|
||||
//
|
||||
// All paths should be Unix paths. If |recursive| is true, then a directory
|
||||
// scanner task is enqueued for each directory that is added to the manifest.
|
||||
// This is only needed during UpdateAll(). When the manifest is updated in
|
||||
// response to file watcher changes, then |recursive| should be set to false.
|
||||
absl::Status Update(OperationList* operations, FileChunkMap* file_chunks,
|
||||
PushIntermediateManifest push_intermediate_manifest =
|
||||
PushIntermediateManifest(),
|
||||
bool recursive = false);
|
||||
PushManifestHandler push_handler, bool recursive = false);
|
||||
|
||||
// Content id of the current manifest.
|
||||
const ContentIdProto& ManifestId() const { return manifest_id_; }
|
||||
@@ -190,62 +193,84 @@ class ManifestUpdater {
|
||||
// Returns an empty manifest.
|
||||
ContentIdProto DefaultManifestId();
|
||||
|
||||
private:
|
||||
// Adds enough pending assets from |queue_| as tasks to the |pool| to keep all
|
||||
// worker threads busy. Returns the number of tasks that were added.
|
||||
size_t QueueTasks(Threadpool* pool, const fastcdc::Config* cdc_cfg,
|
||||
ManifestBuilder* manifest_builder);
|
||||
// Appends the given |rel_paths| to the list of assets to prioritize. All
|
||||
// paths must be given as Unix paths.
|
||||
void AddPriorityAssets(std::vector<std::string> rel_paths)
|
||||
ABSL_LOCKS_EXCLUDED(priority_mutex_);
|
||||
|
||||
// Applies the |operatio ns| list to the manifest owned by the
|
||||
// |manifest_builder|. First, all deletions are handled and the corresponding
|
||||
// files are removed from the |file_chunks| map, then all added or updated
|
||||
// assets are processed. This guarantees that the outcome is independent of
|
||||
// the order in the list.
|
||||
private:
|
||||
// Holds the number of queued tasks returned by QueueTasks().
|
||||
struct QueueTasksResult {
|
||||
size_t dir_scanners = 0, file_chunkers = 0;
|
||||
};
|
||||
|
||||
// Adds enough pending assets from |queue_| as tasks to the |pool| to keep all
|
||||
// worker threads busy. If |drain_dir_scanner_tasks| is true, only
|
||||
// FileChunkerTasks are queued, others are skipped. Returns the number of
|
||||
// tasks that were queued as a QueueTaskResult.
|
||||
QueueTasksResult QueueTasks(bool drain_dir_scanner_tasks, Threadpool* pool,
|
||||
const fastcdc::Config* cdc_cfg);
|
||||
|
||||
// Modifies the list of queued tasks to prioritize those assets that were
|
||||
// previously selected using the AddPriorityAssets() method.
|
||||
void PrioritizeQueuedAssets() ABSL_LOCKS_EXCLUDED(priority_mutex_);
|
||||
|
||||
// Returns true if all of the following conditions are satisfied:
|
||||
// - |push_manifest_handler| is valid
|
||||
// - the flush deadline that was set by a prioritized asset is due
|
||||
// - the manifest was not flushed recently
|
||||
bool WantManifestFlushed(PushManifestHandler push_manifest_handler) const;
|
||||
|
||||
// Checks if it is safe and desired to flush the manifest, then calls
|
||||
// FlushAndPushManifest() if that is the case.
|
||||
// |dir_scanner_tasks_queued| must be the number of currently queued
|
||||
// DirScannerTasks.
|
||||
// |file_chunks| is updated by the flush operation, if it is executed.
|
||||
// |push_manifest_handler| is invoked if the manifest gets flushed and pushed.
|
||||
absl::Status MaybeFlushAndPushManifest(
|
||||
size_t dir_scanner_tasks_queued, FileChunkMap* file_chunks,
|
||||
std::unordered_set<ContentIdProto>* manifest_content_ids,
|
||||
PushManifestHandler push_manifest_handler);
|
||||
|
||||
// Flushes the in-progress manifest and the updates queued in |file_chunks|.
|
||||
// If |push_manifest_handler| is not nullptr, it is invoked with the resulting
|
||||
// manifest ID.
|
||||
absl::Status FlushAndPushManifest(
|
||||
FileChunkMap* file_chunks,
|
||||
std::unordered_set<ContentIdProto>* manifest_content_ids,
|
||||
PushManifestHandler push_manifest_handler);
|
||||
|
||||
// Applies the |operations| list to the manifest owned by the manifest
|
||||
// builder. First, all deletions are handled and the corresponding files are
|
||||
// removed from the |file_chunks| map, then all added or updated assets are
|
||||
// processed. This guarantees that the outcome is independent of the order in
|
||||
// the list.
|
||||
//
|
||||
// If |parent| is non-null, then it must be of type DIRECTORY and all added
|
||||
// assets are made direct children of |parent|. The function does *not* verify
|
||||
// that all children have |parent| as directory path.
|
||||
// that all children have |parent| as directory path. This is used to
|
||||
// efficently handle the result of a DirScannerTask.
|
||||
//
|
||||
// Enqueues tasks to chunk the given files for files that were added or
|
||||
// updated. If |recursive| is true, then it will also enqueue directory
|
||||
// scanner tasks for all given directories.
|
||||
// scanner tasks for all given directories. All follow-up tasks have the given
|
||||
// |deadline| set, which determines the deadline after which the manifest
|
||||
// should be flushed.
|
||||
absl::Status ApplyOperations(std::vector<Operation>* operations,
|
||||
FileChunkMap* file_chunks,
|
||||
ManifestBuilder* manifest_builder,
|
||||
AssetBuilder* parent, bool recursive);
|
||||
FileChunkMap* file_chunks, AssetBuilder* parent,
|
||||
absl::Time deadline, bool recursive);
|
||||
|
||||
// Handles the results of a completed FileChunkerTask.
|
||||
absl::Status HandleFileChunkerResult(FileChunkerTask* task,
|
||||
FileChunkMap* file_chunks,
|
||||
ManifestBuilder* manifest_builder);
|
||||
FileChunkMap* file_chunks);
|
||||
|
||||
// Handles the results of a completed DirScannerTask.
|
||||
absl::Status HandleDirScannerResult(
|
||||
DirScannerTask* task, FileChunkMap* file_chunks,
|
||||
ManifestBuilder* manifest_builder,
|
||||
std::unordered_set<ContentIdProto>* manifest_content_ids);
|
||||
|
||||
// Represents an asset that has not been fully processed yet.
|
||||
struct PendingAsset {
|
||||
PendingAsset() {}
|
||||
PendingAsset(AssetProto::Type type, std::string relative_path,
|
||||
std::string filename)
|
||||
: type(type),
|
||||
relative_path(std::move(relative_path)),
|
||||
filename(std::move(filename)) {}
|
||||
|
||||
// The asset type (either FILE or DIRECTORY).
|
||||
AssetProto::Type type = AssetProto::UNKNOWN;
|
||||
|
||||
// Relative unix path of the directory containing this asset.
|
||||
std::string relative_path;
|
||||
|
||||
// File name of the asset that still needs processing.
|
||||
std::string filename;
|
||||
};
|
||||
|
||||
// Queue of pending assets waiting for completion.
|
||||
std::list<PendingAsset> queue_;
|
||||
PendingAssetsQueue queue_;
|
||||
|
||||
// Pool of pre-allocated buffers
|
||||
std::vector<Buffer> buffers_;
|
||||
@@ -261,6 +286,29 @@ class ManifestUpdater {
|
||||
|
||||
// Stats for the last Update*() operation.
|
||||
UpdaterStats stats_;
|
||||
|
||||
// The builder used for updating the manifest.
|
||||
std::unique_ptr<ManifestBuilder> manifest_builder_;
|
||||
|
||||
// Holds the assets that should be prioritized while updating the manifest.
|
||||
std::vector<PriorityAsset> priority_assets_ ABSL_GUARDED_BY(priority_mutex_);
|
||||
absl::Mutex priority_mutex_;
|
||||
|
||||
// Deadline by which the manifest should be flushed again.
|
||||
absl::Time flush_deadline_ = absl::InfiniteFuture();
|
||||
|
||||
// The time when the manifest was flushed last.
|
||||
absl::Time last_manifest_flush_;
|
||||
|
||||
// How much time we allow at least for processing a prioritized asset. The
|
||||
// manifest won't be flushed for that time, to allow more assets to be
|
||||
// finalized before the manifest is sent to the client.
|
||||
static constexpr absl::Duration kMinAssetProcessingTime =
|
||||
absl::Milliseconds(200);
|
||||
|
||||
// How often we allow an intermediate manifest to be flushed and pushed.
|
||||
static constexpr absl::Duration kMinDelayBetweenFlush =
|
||||
absl::Milliseconds(500);
|
||||
};
|
||||
|
||||
}; // namespace cdc_ft
|
||||
|
||||
@@ -107,7 +107,7 @@ TEST_F(ManifestUpdaterTest, UpdateAll_AddFileIncremental) {
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
EXPECT_OK(updater.Update(
|
||||
MakeDeleteOps({"subdir/b.txt", "subdir/c.txt", "subdir/d.txt"}),
|
||||
&file_chunks_));
|
||||
&file_chunks_, nullptr));
|
||||
ASSERT_NO_FATAL_FAILURE(
|
||||
ExpectManifestEquals({"a.txt", "subdir"}, updater.ManifestId()));
|
||||
|
||||
@@ -173,13 +173,13 @@ TEST_F(ManifestUpdaterTest, UpdateAll_PrunesUnreferencedChunks) {
|
||||
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"a.txt"}), &file_chunks_));
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"a.txt"}), &file_chunks_, nullptr));
|
||||
// 1 for manifest id, 1 for manifest, 1 indirect assets.
|
||||
EXPECT_EQ(data_store_.Chunks().size(), 3);
|
||||
|
||||
EXPECT_OK(updater.Update(
|
||||
MakeUpdateOps({"subdir/b.txt", "subdir/c.txt", "subdir/d.txt"}),
|
||||
&file_chunks_));
|
||||
&file_chunks_, nullptr));
|
||||
// 1 for manifest id, 1 for manifest, 5 indirect assets.
|
||||
// 2 additional chunks from the first Update() that are now unreferenced.
|
||||
// -1, because the indirect asset for "a.txt" is deduplicated
|
||||
@@ -207,7 +207,7 @@ TEST_F(ManifestUpdaterTest, UpdateAll_RecoversFromMissingChunks) {
|
||||
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"a.txt"}), &file_chunks_));
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"a.txt"}), &file_chunks_, nullptr));
|
||||
// 1 for manifest id, 1 for manifest, 1 indirect assets.
|
||||
EXPECT_EQ(data_store_.Chunks().size(), 3)
|
||||
<< "Manifest: " << ContentId::ToHexString(updater.ManifestId())
|
||||
@@ -225,7 +225,7 @@ TEST_F(ManifestUpdaterTest, UpdateAll_RecoversFromMissingChunks) {
|
||||
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
// 1 for manifest id, 1 for manifest, 5 indirect assets.
|
||||
// There would be 7 chunks without the removal above, see UpdateAll_Prune.
|
||||
// There would be 8 chunks without the removal above, see UpdateAll_Prune.
|
||||
EXPECT_EQ(data_store_.Chunks().size(), 7)
|
||||
<< "Manifest: " << ContentId::ToHexString(updater.ManifestId())
|
||||
<< std::endl
|
||||
@@ -272,15 +272,17 @@ TEST_F(ManifestUpdaterTest, UpdateAll_FileChunkMapAfterUpdate) {
|
||||
// Verifies that the intermediate manifest contains the expected files.
|
||||
TEST_F(ManifestUpdaterTest, UpdateAll_PushIntermediateManifest) {
|
||||
ContentIdProto intermediate_id;
|
||||
auto push_intermediate_manifest =
|
||||
[&intermediate_id](const ContentIdProto& manifest_id) {
|
||||
intermediate_id = manifest_id;
|
||||
};
|
||||
auto push_manifest = [&intermediate_id](const ContentIdProto& manifest_id) {
|
||||
// Catch the first (= intermediate) manifest.
|
||||
if (intermediate_id == ContentIdProto()) {
|
||||
intermediate_id = manifest_id;
|
||||
}
|
||||
};
|
||||
|
||||
// Contains a.txt and subdir/b.txt.
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_, push_intermediate_manifest));
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_, push_manifest));
|
||||
|
||||
// Double check that the files in the final manifest are no longer in
|
||||
// progress.
|
||||
@@ -301,7 +303,7 @@ TEST_F(ManifestUpdaterTest, UpdateAll_PushIntermediateManifest) {
|
||||
TEST_F(ManifestUpdaterTest, Update_AddFile) {
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"a.txt"}), &file_chunks_));
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"a.txt"}), &file_chunks_, nullptr));
|
||||
|
||||
const UpdaterStats& stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_added_or_updated, 1);
|
||||
@@ -319,7 +321,8 @@ TEST_F(ManifestUpdaterTest, Update_AddFile) {
|
||||
TEST_F(ManifestUpdaterTest, Update_AddFileAutoCreateSubdir) {
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"subdir/b.txt"}), &file_chunks_));
|
||||
EXPECT_OK(
|
||||
updater.Update(MakeUpdateOps({"subdir/b.txt"}), &file_chunks_, nullptr));
|
||||
|
||||
const UpdaterStats& stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_added_or_updated, 1);
|
||||
@@ -346,7 +349,7 @@ TEST_F(ManifestUpdaterTest, Update_DeleteFiles) {
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
EXPECT_OK(updater.Update(MakeDeleteOps({"a.txt"}), &file_chunks_));
|
||||
EXPECT_OK(updater.Update(MakeDeleteOps({"a.txt"}), &file_chunks_, nullptr));
|
||||
|
||||
const UpdaterStats& stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_added_or_updated, 0);
|
||||
@@ -360,7 +363,8 @@ TEST_F(ManifestUpdaterTest, Update_DeleteFiles) {
|
||||
updater.ManifestId()));
|
||||
|
||||
// Delete another one in a subdirectory.
|
||||
EXPECT_OK(updater.Update(MakeDeleteOps({"subdir/b.txt"}), &file_chunks_));
|
||||
EXPECT_OK(
|
||||
updater.Update(MakeDeleteOps({"subdir/b.txt"}), &file_chunks_, nullptr));
|
||||
ASSERT_NO_FATAL_FAILURE(ExpectManifestEquals(
|
||||
{"subdir", "subdir/c.txt", "subdir/d.txt"}, updater.ManifestId()));
|
||||
}
|
||||
@@ -370,7 +374,7 @@ TEST_F(ManifestUpdaterTest, Update_DeleteDir) {
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
EXPECT_OK(updater.Update(MakeDeleteOps({"subdir"}), &file_chunks_));
|
||||
EXPECT_OK(updater.Update(MakeDeleteOps({"subdir"}), &file_chunks_, nullptr));
|
||||
|
||||
const UpdaterStats& stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_added_or_updated, 0);
|
||||
@@ -390,7 +394,7 @@ TEST_F(ManifestUpdaterTest, Update_DeleteNonExistingAsset) {
|
||||
// We need to craft AssetInfos for non-existing assets manually.
|
||||
AssetInfo ai{"non_existing", AssetProto::DIRECTORY};
|
||||
ManifestUpdater::OperationList ops{{Operator::kDelete, ai}};
|
||||
EXPECT_OK(updater.Update(&ops, &file_chunks_));
|
||||
EXPECT_OK(updater.Update(&ops, &file_chunks_, nullptr));
|
||||
|
||||
const UpdaterStats& stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_deleted, 1);
|
||||
@@ -406,7 +410,7 @@ TEST_F(ManifestUpdaterTest, Update_AddNonExistingFile) {
|
||||
ai.path = "non_existing";
|
||||
ManifestUpdater::OperationList ops{
|
||||
{Operator::kAdd, ai}, {Operator::kAdd, MakeAssetInfo("a.txt").info}};
|
||||
EXPECT_OK(updater.Update(&ops, &file_chunks_));
|
||||
EXPECT_OK(updater.Update(&ops, &file_chunks_, nullptr));
|
||||
|
||||
const UpdaterStats& stats = updater.Stats();
|
||||
EXPECT_EQ(stats.total_assets_added_or_updated, 2);
|
||||
@@ -428,17 +432,19 @@ TEST_F(ManifestUpdaterTest, Update_PushIntermediateManifest) {
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
EXPECT_OK(updater.Update(
|
||||
MakeDeleteOps({"subdir/b.txt", "subdir/c.txt", "subdir/d.txt"}),
|
||||
&file_chunks_));
|
||||
&file_chunks_, nullptr));
|
||||
|
||||
// Add a.txt back and check intermediate manifest.
|
||||
ContentIdProto intermediate_id;
|
||||
auto push_intermediate_manifest =
|
||||
[&intermediate_id](const ContentIdProto& manifest_id) {
|
||||
intermediate_id = manifest_id;
|
||||
};
|
||||
auto push_manifest = [&intermediate_id](const ContentIdProto& manifest_id) {
|
||||
// Catch the first (= intermediate) manifest.
|
||||
if (intermediate_id == ContentIdProto()) {
|
||||
intermediate_id = manifest_id;
|
||||
}
|
||||
};
|
||||
EXPECT_OK(updater.Update(
|
||||
MakeUpdateOps({"subdir/b.txt", "subdir/c.txt", "subdir/d.txt"}),
|
||||
&file_chunks_, push_intermediate_manifest));
|
||||
&file_chunks_, push_manifest));
|
||||
EXPECT_GT(intermediate_id.blake3_sum_160().size(), 0);
|
||||
|
||||
// Only file a.txt is done in the intermediate manifest, all others are in
|
||||
@@ -460,17 +466,18 @@ TEST_F(ManifestUpdaterTest, Update_FileChunkMap) {
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
|
||||
// Add a.txt.
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"a.txt"}), &file_chunks_));
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"a.txt"}), &file_chunks_, nullptr));
|
||||
ValidateChunkLookup("a.txt", true);
|
||||
ValidateChunkLookup("subdir/b.txt", false);
|
||||
|
||||
// Add subdir/b.txt.
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"subdir/b.txt"}), &file_chunks_));
|
||||
EXPECT_OK(
|
||||
updater.Update(MakeUpdateOps({"subdir/b.txt"}), &file_chunks_, nullptr));
|
||||
ValidateChunkLookup("a.txt", true);
|
||||
ValidateChunkLookup("subdir/b.txt", true);
|
||||
|
||||
// Remove a.txt.
|
||||
EXPECT_OK(updater.Update(MakeDeleteOps({"a.txt"}), &file_chunks_));
|
||||
EXPECT_OK(updater.Update(MakeDeleteOps({"a.txt"}), &file_chunks_, nullptr));
|
||||
ValidateChunkLookup("a.txt", false);
|
||||
ValidateChunkLookup("subdir/b.txt", true);
|
||||
}
|
||||
@@ -482,18 +489,20 @@ TEST_F(ManifestUpdaterTest, Update_IntermediateFileChunkMap) {
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
|
||||
// Add a.txt.
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"a.txt"}), &file_chunks_));
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"a.txt"}), &file_chunks_, nullptr));
|
||||
|
||||
// Add subdir/b.txt and check intermediate lookups.
|
||||
auto push_intermediate_manifest = [this](const ContentIdProto&) {
|
||||
int count = 0;
|
||||
auto push_manifest = [this, &count](const ContentIdProto&) {
|
||||
++count;
|
||||
ValidateChunkLookup("a.txt", true);
|
||||
ValidateChunkLookup("subdir/b.txt", false); // Not in yet.
|
||||
// The first (= intermediate) manifest does not have the chunks, the second
|
||||
// (= final) does.
|
||||
ValidateChunkLookup("subdir/b.txt", count > 1);
|
||||
};
|
||||
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"subdir/b.txt"}), &file_chunks_,
|
||||
push_intermediate_manifest));
|
||||
ValidateChunkLookup("a.txt", true);
|
||||
ValidateChunkLookup("subdir/b.txt", true); // Now it's in!
|
||||
push_manifest));
|
||||
}
|
||||
|
||||
// A call to ManifestId() returns the manifest id!!!
|
||||
@@ -507,6 +516,70 @@ TEST_F(ManifestUpdaterTest, ManifestId) {
|
||||
EXPECT_EQ(updater.ManifestId(), manifest_id);
|
||||
}
|
||||
|
||||
TEST_F(ManifestUpdaterTest, VerifyPermissions) {
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
|
||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||
ManifestIterator manifest_iter(&data_store_);
|
||||
EXPECT_OK(manifest_iter.Open(updater.ManifestId()));
|
||||
const AssetProto* entry;
|
||||
while ((entry = manifest_iter.NextEntry()) != nullptr) {
|
||||
switch (entry->type()) {
|
||||
case AssetProto::FILE:
|
||||
EXPECT_EQ(entry->permissions(), ManifestBuilder::kDefaultFilePerms);
|
||||
break;
|
||||
case AssetProto::DIRECTORY:
|
||||
EXPECT_EQ(entry->permissions(), ManifestBuilder::kDefaultDirPerms);
|
||||
break;
|
||||
case AssetProto::SYMLINK:
|
||||
// Symlinks don't have their own permissions.
|
||||
break;
|
||||
default:
|
||||
FAIL() << "Unhandled type: " << AssetProto::Type_Name(entry->type());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ManifestUpdaterTest, VerifyIntermediateFilesAreExecutable) {
|
||||
cfg_.src_dir = path::Join(base_dir_, "non_empty");
|
||||
ManifestUpdater updater(&data_store_, cfg_);
|
||||
|
||||
int count = 0;
|
||||
auto push_intermediate_manifest = [this, &count](
|
||||
const ContentIdProto& manifest_id) {
|
||||
++count;
|
||||
ManifestIterator manifest_iter(&data_store_);
|
||||
EXPECT_OK(manifest_iter.Open(manifest_id));
|
||||
const AssetProto* entry;
|
||||
while ((entry = manifest_iter.NextEntry()) != nullptr) {
|
||||
switch (entry->type()) {
|
||||
case AssetProto::FILE:
|
||||
if (count == 1) {
|
||||
// While the manifest is in-progress, all files are set to be
|
||||
// executable.
|
||||
EXPECT_EQ(entry->permissions(), ManifestUpdater::kExecutablePerms);
|
||||
} else {
|
||||
EXPECT_EQ(entry->permissions(), ManifestBuilder::kDefaultFilePerms);
|
||||
}
|
||||
break;
|
||||
case AssetProto::DIRECTORY:
|
||||
EXPECT_EQ(entry->permissions(), ManifestBuilder::kDefaultDirPerms);
|
||||
break;
|
||||
default:
|
||||
FAIL() << "Unhandled type: " << AssetProto::Type_Name(entry->type());
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Add subdir/b.txt and verify the file permissions.
|
||||
EXPECT_OK(updater.Update(MakeUpdateOps({"subdir/b.txt"}), &file_chunks_,
|
||||
push_intermediate_manifest));
|
||||
EXPECT_EQ(updater.Stats().total_files_added_or_updated, 1);
|
||||
}
|
||||
|
||||
// Makes sure that executables are properly detected.
|
||||
TEST_F(ManifestUpdaterTest, DetectExecutables) {
|
||||
cfg_.src_dir = path::Join(base_dir_, "executables");
|
||||
|
||||
94
manifest/pending_assets_queue.cc
Normal file
94
manifest/pending_assets_queue.cc
Normal file
@@ -0,0 +1,94 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "manifest/pending_assets_queue.h"
|
||||
|
||||
#include "common/log.h"
|
||||
#include "manifest/manifest_builder.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
PendingAssetsQueue::PendingAssetsQueue(absl::Duration min_processing_time)
|
||||
: min_processing_time_(min_processing_time) {}
|
||||
|
||||
void PendingAssetsQueue::Add(PendingAsset pending) {
|
||||
if (pending.deadline == absl::InfiniteFuture()) {
|
||||
queue_.push_back(std::move(pending));
|
||||
return;
|
||||
}
|
||||
|
||||
// Pending assets with a deadline will be added at the end of other
|
||||
// prioritized assets.
|
||||
auto it =
|
||||
std::find_if(queue_.begin(), queue_.end(), [](const PendingAsset& pa) {
|
||||
return pa.deadline == absl::InfiniteFuture();
|
||||
});
|
||||
queue_.insert(it, std::move(pending));
|
||||
}
|
||||
|
||||
bool PendingAssetsQueue::Dequeue(PendingAsset* pending, AcceptFunc accept) {
|
||||
auto it = queue_.begin();
|
||||
while (it != queue_.end() && accept && !accept(*it)) ++it;
|
||||
if (it == queue_.end()) return false;
|
||||
*pending = std::move(*it);
|
||||
queue_.erase(it);
|
||||
return true;
|
||||
}
|
||||
|
||||
absl::Time PendingAssetsQueue::Prioritize(
|
||||
const std::vector<PriorityAsset>& prio_assets,
|
||||
ManifestBuilder* manifest_builder) {
|
||||
absl::Time min_received = absl::InfiniteFuture();
|
||||
|
||||
for (const PriorityAsset& prio_asset : prio_assets) {
|
||||
if (prio_asset.received < min_received) min_received = prio_asset.received;
|
||||
|
||||
// Check if this asset is still in progress.
|
||||
absl::StatusOr<AssetBuilder> asset = manifest_builder->GetOrCreateAsset(
|
||||
prio_asset.rel_file_path, AssetProto::UNKNOWN);
|
||||
if (!asset.ok()) {
|
||||
LOG_ERROR("Failed to prioritize asset '%s': %s", prio_asset.rel_file_path,
|
||||
asset.status().ToString());
|
||||
continue;
|
||||
}
|
||||
if (!asset->InProgress()) continue;
|
||||
|
||||
// Find the queued task for this asset.
|
||||
auto prio_end = queue_.end();
|
||||
for (auto it = queue_.begin(); it != queue_.end(); ++it) {
|
||||
// Remember the first task that is not prioritized so that we can insert
|
||||
// new prioritized tasks just before.
|
||||
if (prio_end == queue_.end() && it->deadline == absl::InfiniteFuture()) {
|
||||
prio_end = it;
|
||||
}
|
||||
|
||||
if (it->relative_path == asset->RelativePath() &&
|
||||
it->filename == asset->Name()) {
|
||||
// If this asset is not yet prioritized, |prio_end| will be set
|
||||
// accordingly and we move |*it| to the end of the prioritized tasks.
|
||||
if (it->deadline == absl::InfiniteFuture()) {
|
||||
it->deadline = prio_asset.received + min_processing_time_;
|
||||
it->prioritized = true; // Expliciy prioritization.
|
||||
queue_.insert(prio_end, std::move(*it));
|
||||
queue_.erase(it);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return min_received + min_processing_time_;
|
||||
}
|
||||
|
||||
} // namespace cdc_ft
|
||||
102
manifest/pending_assets_queue.h
Normal file
102
manifest/pending_assets_queue.h
Normal file
@@ -0,0 +1,102 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MANIFEST_PENDING_ASSETS_QUEUE_H
|
||||
#define MANIFEST_PENDING_ASSETS_QUEUE_H
|
||||
|
||||
#include "absl/time/time.h"
|
||||
#include "manifest/manifest_proto_defs.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
class ManifestBuilder;
|
||||
|
||||
// Holds an asset that was requested to be prioritized at a given point in time.
|
||||
struct PriorityAsset {
|
||||
// Relative Unix file path.
|
||||
std::string rel_file_path;
|
||||
// Timestamp when this request was received.
|
||||
absl::Time received;
|
||||
};
|
||||
|
||||
// Represents an asset that has not been fully processed yet.
|
||||
struct PendingAsset {
|
||||
PendingAsset() {}
|
||||
PendingAsset(AssetProto::Type type, std::string relative_path,
|
||||
std::string filename, absl::Time deadline)
|
||||
: type(type),
|
||||
relative_path(std::move(relative_path)),
|
||||
filename(std::move(filename)),
|
||||
deadline(deadline) {}
|
||||
|
||||
// The asset type (either FILE or DIRECTORY).
|
||||
AssetProto::Type type = AssetProto::UNKNOWN;
|
||||
|
||||
// Relative unix path of the directory containing this asset.
|
||||
std::string relative_path;
|
||||
|
||||
// File name of the asset that still needs processing.
|
||||
std::string filename;
|
||||
|
||||
// If this asset was explicitly prioritized, this field is set to true,
|
||||
// otherwise false.
|
||||
bool prioritized = false;
|
||||
|
||||
// If a deadline is set, it means that this asset was prioritized
|
||||
// (implicitly or explicitly) and should be processed by this deadline. Once
|
||||
// this asset has been processed, the manifest should be flushed if the
|
||||
// deadline has expired. Otherwise, additional related assets can be queued
|
||||
// and processed (implicit prioritization).
|
||||
absl::Time deadline;
|
||||
};
|
||||
|
||||
// Queues assets that still need to be processed before they are completed.
|
||||
class PendingAssetsQueue {
|
||||
public:
|
||||
// Signature for a callback function to accept items to dequeue.
|
||||
using AcceptFunc = std::function<bool(const PendingAsset&)>;
|
||||
|
||||
// The |min_processing_time| is used to calculate the deadline by which a
|
||||
// pending asset should be returned to the requesting instance.
|
||||
PendingAssetsQueue(absl::Duration min_processing_time);
|
||||
|
||||
// Adds the given asset |pending| to the queue of assets to complete.
|
||||
// PendingAssets without a deadline will be queued at the end, while those
|
||||
// with a given deadline will be inserted after other assets having a
|
||||
// deadline.
|
||||
void Add(PendingAsset pending);
|
||||
|
||||
// Removes a PendingAsset from the queue and stores it in |pending|. If
|
||||
// |accept| is given, then only items for which |accept| returns true are
|
||||
// considered. Returns true if an item was stored in |pending|, otherwise
|
||||
// false is returned.
|
||||
bool Dequeue(PendingAsset* pending, AcceptFunc accept = nullptr);
|
||||
|
||||
// Returns true if the queue is empty, otherwise returns false.
|
||||
bool Empty() const { return queue_.empty(); }
|
||||
|
||||
// Modifies the list of queued assets to prioritize the assets given in
|
||||
// |prio_assets|. Returns the deadline by which the processed assets should be
|
||||
// returned to the requested instance.
|
||||
absl::Time Prioritize(const std::vector<PriorityAsset>& prio_assets,
|
||||
ManifestBuilder* manifest_builder);
|
||||
|
||||
private:
|
||||
const absl::Duration min_processing_time_;
|
||||
std::list<PendingAsset> queue_;
|
||||
};
|
||||
|
||||
} // namespace cdc_ft
|
||||
|
||||
#endif // MANIFEST_PENDING_ASSETS_QUEUE_H
|
||||
Reference in New Issue
Block a user