mirror of
https://github.com/nestriness/cdc-file-transfer.git
synced 2026-05-01 17:03:07 +03:00
Releasing the former Stadia file transfer tools
The tools allow efficient and fast synchronization of large directory trees from a Windows workstation to a Linux target machine. cdc_rsync* support efficient copy of files by using content-defined chunking (CDC) to identify chunks within files that can be reused. asset_stream_manager + cdc_fuse_fs support efficient streaming of a local directory to a remote virtual file system based on FUSE. It also employs CDC to identify and reuse unchanged data chunks.
This commit is contained in:
128
data_store/BUILD
Normal file
128
data_store/BUILD
Normal file
@@ -0,0 +1,128 @@
|
||||
package(default_visibility = ["//:__subpackages__"])
|
||||
|
||||
cc_library(
|
||||
name = "data_store",
|
||||
srcs = [
|
||||
"data_store_reader.cc",
|
||||
"data_store_writer.cc",
|
||||
],
|
||||
hdrs = [
|
||||
"data_store_reader.h",
|
||||
"data_store_writer.h",
|
||||
],
|
||||
deps = [
|
||||
"//common:buffer",
|
||||
"//common:status",
|
||||
"//common:status_macros",
|
||||
"//manifest:content_id",
|
||||
"@com_google_absl//absl/status",
|
||||
"@com_google_absl//absl/status:statusor",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "data_provider",
|
||||
srcs = ["data_provider.cc"],
|
||||
hdrs = ["data_provider.h"],
|
||||
deps = [
|
||||
":data_store",
|
||||
"//common:clock",
|
||||
"//common:log",
|
||||
"//common:status",
|
||||
"//common:stopwatch",
|
||||
"@com_google_absl//absl/status",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
"@com_google_absl//absl/synchronization",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "disk_data_store",
|
||||
srcs = ["disk_data_store.cc"],
|
||||
hdrs = ["disk_data_store.h"],
|
||||
deps = [
|
||||
":data_store",
|
||||
"//common:clock",
|
||||
"//common:log",
|
||||
"//common:path",
|
||||
"//common:platform",
|
||||
"//common:status_macros",
|
||||
"//manifest:content_id",
|
||||
"@com_google_absl//absl/status",
|
||||
"@com_google_absl//absl/status:statusor",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "disk_data_store_test",
|
||||
srcs = ["disk_data_store_test.cc"],
|
||||
deps = [
|
||||
":disk_data_store",
|
||||
"//common:status_test_macros",
|
||||
"//common:testing_clock",
|
||||
"//manifest:content_id",
|
||||
"@com_google_googletest//:gtest",
|
||||
"@com_google_googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "data_provider_test",
|
||||
srcs = ["data_provider_test.cc"],
|
||||
deps = [
|
||||
":data_provider",
|
||||
":disk_data_store",
|
||||
":mem_data_store",
|
||||
"//common:status_test_macros",
|
||||
"//common:testing_clock",
|
||||
"//common:util",
|
||||
"//manifest:content_id",
|
||||
"@com_google_googletest//:gtest",
|
||||
"@com_google_googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "grpc_reader",
|
||||
srcs = ["grpc_reader.cc"],
|
||||
hdrs = ["grpc_reader.h"],
|
||||
deps = [
|
||||
":data_store",
|
||||
"//cdc_fuse_fs:asset_stream_client",
|
||||
"//common:buffer",
|
||||
"//common:status",
|
||||
"//common:status_macros",
|
||||
"//manifest:content_id",
|
||||
"@com_google_absl//absl/status",
|
||||
"@com_google_absl//absl/status:statusor",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "mem_data_store",
|
||||
srcs = ["mem_data_store.cc"],
|
||||
hdrs = ["mem_data_store.h"],
|
||||
deps = [
|
||||
":data_store",
|
||||
"//common:status",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "mem_data_store_test",
|
||||
srcs = ["mem_data_store_test.cc"],
|
||||
deps = [
|
||||
":mem_data_store",
|
||||
"//common:status_test_macros",
|
||||
"//manifest:content_id",
|
||||
"@com_google_googletest//:gtest",
|
||||
"@com_google_googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all_test_sources",
|
||||
srcs = glob(["*_test.cc"]),
|
||||
)
|
||||
364
data_store/data_provider.cc
Normal file
364
data_store/data_provider.cc
Normal file
@@ -0,0 +1,364 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "data_store/data_provider.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <thread>
|
||||
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "common/log.h"
|
||||
#include "common/status.h"
|
||||
#include "common/stopwatch.h"
|
||||
#include "manifest/content_id.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
namespace {
|
||||
|
||||
// FUSE limits the maximum read request size to 128k. Larger requests will be
|
||||
// split up into smaller requests up to at most this size. This constant can
|
||||
// be used to identify max. size requests.
|
||||
constexpr uint64_t kMaxFuseRequestSize = 1 << 17;
|
||||
|
||||
} // namespace
|
||||
|
||||
DataProvider::DataProvider(
|
||||
std::unique_ptr<DataStoreWriter> writer,
|
||||
std::vector<std::unique_ptr<DataStoreReader>> readers, size_t prefetch_size,
|
||||
uint32_t cleanup_timeout_sec, uint32_t access_idle_timeout_sec)
|
||||
: prefetch_size_(prefetch_size),
|
||||
writer_(std::move(writer)),
|
||||
readers_(std::move(readers)),
|
||||
chunks_updated_(true),
|
||||
cleanup_timeout_sec_(cleanup_timeout_sec),
|
||||
access_idle_timeout_sec_(access_idle_timeout_sec) {
|
||||
if (writer_) {
|
||||
assert(!async_cleaner_);
|
||||
async_cleaner_ =
|
||||
std::make_unique<std::thread>([this]() { CleanupThreadMain(); });
|
||||
}
|
||||
}
|
||||
|
||||
DataProvider::~DataProvider() { Shutdown(); }
|
||||
|
||||
void DataProvider::Shutdown() {
|
||||
{
|
||||
absl::MutexLock lock(&shutdown_mutex_);
|
||||
shutdown_ = true;
|
||||
}
|
||||
if (async_cleaner_) {
|
||||
if (async_cleaner_->joinable()) async_cleaner_->join();
|
||||
async_cleaner_.reset();
|
||||
}
|
||||
}
|
||||
|
||||
size_t DataProvider::PrefetchSize(size_t read_size) const {
|
||||
// If the read size matches the maximum FUSE request size, it is very likely
|
||||
// that the next chunk is needed as well, so we enlarge the read size by the
|
||||
// prefetch size.
|
||||
if (read_size == kMaxFuseRequestSize) read_size += prefetch_size_;
|
||||
return read_size;
|
||||
}
|
||||
|
||||
absl::StatusOr<size_t> DataProvider::Get(const ContentIdProto& content_id,
|
||||
void* data, size_t offset,
|
||||
size_t size) {
|
||||
last_access_ts_ = steady_clock_->Now();
|
||||
absl::Mutex* content_mutex = GetContentMutex(content_id);
|
||||
absl::StatusOr<size_t> read_bytes;
|
||||
if (writer_) {
|
||||
{
|
||||
absl::ReaderMutexLock read_lock(content_mutex);
|
||||
read_bytes = writer_->Get(content_id, data, offset, size);
|
||||
}
|
||||
if (read_bytes.ok()) {
|
||||
return read_bytes;
|
||||
}
|
||||
LogWriterWarning(read_bytes.status(), content_id);
|
||||
}
|
||||
// To prevent reading the same chunk from multiple threads, make read/write
|
||||
// atomic.
|
||||
absl::WriterMutexLock write_lock(content_mutex);
|
||||
// Read from the writer_ again, in case the cache has been populated by
|
||||
// another thread.
|
||||
if (writer_ && absl::IsNotFound(read_bytes.status())) {
|
||||
read_bytes = writer_->Get(content_id, data, offset, size);
|
||||
if (read_bytes.ok()) {
|
||||
return read_bytes;
|
||||
}
|
||||
LogWriterWarning(read_bytes.status(), content_id);
|
||||
}
|
||||
for (auto& reader : readers_) {
|
||||
Buffer buffer;
|
||||
absl::Status status = reader->Get(content_id, &buffer);
|
||||
if (!status.ok()) {
|
||||
// Try next reader if this one doesn't contain the chunk.
|
||||
if (absl::IsNotFound(status)) continue;
|
||||
// TODO: Add reader identification for debugging.
|
||||
return WrapStatus(status, "Failed to get '%s'.",
|
||||
ContentId::ToHexString(content_id));
|
||||
}
|
||||
if (writer_) {
|
||||
status = writer_->Put(content_id, buffer.data(), buffer.size());
|
||||
chunks_updated_ = true;
|
||||
if (!status.ok()) {
|
||||
LOG_ERROR("Failed to write chunk '%s': %s.",
|
||||
ContentId::ToHexString(content_id), status.ToString());
|
||||
}
|
||||
}
|
||||
if (buffer.size() <= offset) return 0;
|
||||
size_t return_bytes = std::min(buffer.size() - offset, size);
|
||||
memcpy(data, buffer.data() + offset, return_bytes);
|
||||
return return_bytes;
|
||||
}
|
||||
return absl::NotFoundError(absl::StrFormat(
|
||||
"Failed to find %s.", ContentId::ToHexString(content_id)));
|
||||
}
|
||||
|
||||
absl::Status DataProvider::Get(ChunkTransferList* chunks) {
|
||||
last_access_ts_ = steady_clock_->Now();
|
||||
// Try to fetch chunks from the cache first.
|
||||
RETURN_IF_ERROR(GetFromWriter(chunks, /*lock_required=*/true));
|
||||
if (chunks->ReadDone()) return absl::OkStatus();
|
||||
|
||||
// Get list of all missing chunk IDs.
|
||||
std::vector<const ContentIdProto*> chunk_ids;
|
||||
for (const ChunkTransferTask& chunk : *chunks) {
|
||||
if (!chunk.done) chunk_ids.push_back(&chunk.id);
|
||||
}
|
||||
|
||||
// Acquire writer locks for all missing chunks.
|
||||
WriterMutexLockList locks;
|
||||
WriteLockAll(std::move(chunk_ids), &locks);
|
||||
|
||||
// Read from the |writer_| again, in case the cache has been populated by
|
||||
// another thread. We hold all chunk locks already.
|
||||
RETURN_IF_ERROR(GetFromWriter(chunks, /*lock_required=*/false));
|
||||
if (chunks->ReadDone()) return absl::OkStatus();
|
||||
|
||||
// Try to read from all readers.
|
||||
for (auto& reader : readers_) {
|
||||
absl::Status status = reader->Get(chunks);
|
||||
if (!status.ok()) {
|
||||
// TODO: Add reader identification for debugging.
|
||||
return WrapStatus(status, "Failed to get chunks [%s] from list [%s]",
|
||||
chunks->UndoneToHexString(), chunks->ToHexString());
|
||||
}
|
||||
if (chunks->PrefetchDone()) break;
|
||||
}
|
||||
|
||||
// Cache complete chunks in the writer.
|
||||
if (writer_) {
|
||||
for (ChunkTransferTask& chunk : *chunks) {
|
||||
if (!chunk.done || chunk.chunk_data.empty()) continue;
|
||||
absl::Status status = writer_->Put(chunk.id, chunk.chunk_data.data(),
|
||||
chunk.chunk_data.size());
|
||||
chunks_updated_ = true;
|
||||
if (!status.ok()) {
|
||||
LOG_WARNING("Failed to put '%s' to writer: %s.",
|
||||
ContentId::ToHexString(chunk.id), status.message());
|
||||
}
|
||||
}
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status DataProvider::Get(const ContentIdProto& content_id, Buffer* data) {
|
||||
last_access_ts_ = steady_clock_->Now();
|
||||
absl::Mutex* content_mutex = GetContentMutex(content_id);
|
||||
absl::Status status = absl::OkStatus();
|
||||
if (writer_) {
|
||||
{
|
||||
absl::ReaderMutexLock read_lock(content_mutex);
|
||||
status = writer_->Get(content_id, data);
|
||||
}
|
||||
if (status.ok()) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
LogWriterWarning(status, content_id);
|
||||
}
|
||||
|
||||
// To prevent reading the same chunk from multiple threads, make read/write
|
||||
// atomic.
|
||||
absl::WriterMutexLock write_lock(content_mutex);
|
||||
// Read from the writer_ again, in case the cache has been populated by
|
||||
// another thread.
|
||||
if (writer_ && absl::IsNotFound(status)) {
|
||||
status = writer_->Get(content_id, data);
|
||||
if (status.ok()) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
LogWriterWarning(status, content_id);
|
||||
}
|
||||
for (auto& reader : readers_) {
|
||||
status = reader->Get(content_id, data);
|
||||
if (!status.ok()) {
|
||||
// Try next reader if this one doesn't contain the chunk.
|
||||
if (absl::IsNotFound(status)) continue;
|
||||
// TODO: Add reader identification for debugging.
|
||||
return WrapStatus(status, "Failed to get '%s'.",
|
||||
ContentId::ToHexString(content_id));
|
||||
}
|
||||
if (writer_) {
|
||||
writer_->Put(content_id, data->data(), data->size()).IgnoreError();
|
||||
chunks_updated_ = true;
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
return absl::NotFoundError(absl::StrFormat(
|
||||
"Failed to find '%s'.", ContentId::ToHexString(content_id)));
|
||||
}
|
||||
|
||||
void DataProvider::LogWriterWarning(const absl::Status& status,
|
||||
const ContentIdProto& content_id) {
|
||||
if (!absl::IsNotFound(status)) {
|
||||
LOG_WARNING("Failed to get '%s' from writer: %s.",
|
||||
ContentId::ToHexString(content_id), status.message());
|
||||
}
|
||||
}
|
||||
|
||||
absl::Mutex* DataProvider::GetContentMutex(const ContentIdProto& content_id) {
|
||||
interrupt_ = true;
|
||||
uint8_t id = ContentId::GetByte(content_id, 0);
|
||||
return &content_mutexes_[id];
|
||||
}
|
||||
|
||||
void DataProvider::WriteLockAll(std::vector<const ContentIdProto*> chunk_ids,
|
||||
WriterMutexLockList* locks) {
|
||||
// Sorting the list avoids cycles when locking from multiple threads
|
||||
// concurrently, thus avoiding deadlocks when holding some mutexes while
|
||||
// trying to lock others.
|
||||
std::sort(
|
||||
chunk_ids.begin(), chunk_ids.end(),
|
||||
[](const ContentIdProto* a, const ContentIdProto* b) { return *a < *b; });
|
||||
|
||||
std::unordered_set<absl::Mutex*> locked;
|
||||
for (const ContentIdProto* id : chunk_ids) {
|
||||
absl::Mutex* mu = GetContentMutex(*id);
|
||||
auto [_, inserted] = locked.insert(mu);
|
||||
if (!inserted) continue;
|
||||
locks->push_back(std::make_unique<absl::WriterMutexLock>(mu));
|
||||
}
|
||||
}
|
||||
|
||||
absl::Status DataProvider::GetFromWriter(ChunkTransferList* chunks,
|
||||
bool lock_required) {
|
||||
if (!writer_ || chunks->ReadDone()) return absl::OkStatus();
|
||||
|
||||
// Try to read all remaining chunks from the cache.
|
||||
absl::StatusOr<size_t> read_bytes;
|
||||
for (ChunkTransferTask& chunk : *chunks) {
|
||||
if (chunk.done) continue;
|
||||
|
||||
{
|
||||
std::unique_ptr<absl::ReaderMutexLock> lock;
|
||||
if (lock_required) {
|
||||
lock =
|
||||
std::make_unique<absl::ReaderMutexLock>(GetContentMutex(chunk.id));
|
||||
}
|
||||
if (!chunk.size) {
|
||||
// Check if the prefetch chunk is already present, no further processing
|
||||
// needed.
|
||||
chunk.done = writer_->Contains(chunk.id);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Read the requested data.
|
||||
read_bytes = writer_->Get(chunk.id, chunk.data, chunk.offset, chunk.size);
|
||||
}
|
||||
|
||||
if (!read_bytes.ok()) {
|
||||
LogWriterWarning(read_bytes.status(), chunk.id);
|
||||
} else if (*read_bytes == chunk.size) {
|
||||
chunk.done = true;
|
||||
if (chunks->ReadDone()) return absl::OkStatus();
|
||||
} else {
|
||||
LogWriterWarning(
|
||||
MakeStatus("Expected %u bytes, got %u", chunk.size, *read_bytes),
|
||||
chunk.id);
|
||||
// Remove the corrupted chunk from the cache, but only if the chunk was
|
||||
// write-locked by the caller.
|
||||
if (!lock_required) {
|
||||
absl::Status status = writer_->Remove(chunk.id);
|
||||
if (!status.ok()) {
|
||||
LOG_WARNING("Failed to remove chunk '%s' from the cache: %s",
|
||||
ContentId::ToHexString(chunk.id), status.ToString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
void DataProvider::LockAllMutexes(WriterMutexLockList* locks) {
|
||||
for (absl::Mutex& mu : content_mutexes_) {
|
||||
locks->push_back(std::make_unique<absl::WriterMutexLock>(&mu));
|
||||
}
|
||||
}
|
||||
|
||||
void DataProvider::CleanupThreadMain() {
|
||||
assert(writer_);
|
||||
writer_->RegisterInterrupt(&interrupt_);
|
||||
absl::MutexLock lock(&shutdown_mutex_);
|
||||
SteadyClock::Timestamp next_cleanup_time =
|
||||
steady_clock_->Now() + std::chrono::seconds(cleanup_timeout_sec_);
|
||||
while (!shutdown_) {
|
||||
auto cond = [this]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(shutdown_mutex_) {
|
||||
return shutdown_;
|
||||
};
|
||||
shutdown_mutex_.AwaitWithTimeout(
|
||||
absl::Condition(&cond),
|
||||
std::max(absl::Seconds(access_idle_timeout_sec_),
|
||||
absl::Seconds(std::chrono::duration_cast<std::chrono::seconds>(
|
||||
next_cleanup_time - steady_clock_->Now())
|
||||
.count())));
|
||||
int64_t time_sec_since_last_access =
|
||||
std::chrono::duration_cast<std::chrono::seconds>(steady_clock_->Now() -
|
||||
last_access_ts_.load())
|
||||
.count();
|
||||
if (chunks_updated_ &&
|
||||
time_sec_since_last_access > access_idle_timeout_sec_) {
|
||||
WriterMutexLockList locks;
|
||||
LockAllMutexes(&locks);
|
||||
chunks_updated_ = false;
|
||||
LOG_DEBUG("Starting cache cleanup");
|
||||
Stopwatch sw;
|
||||
absl::Status status = writer_->Cleanup();
|
||||
LOG_DEBUG("Finished cache cleanup in %0.3f seconds", sw.ElapsedSeconds());
|
||||
next_cleanup_time =
|
||||
steady_clock_->Now() + std::chrono::seconds(cleanup_timeout_sec_);
|
||||
absl::MutexLock cleaned_lock(&cleaned_mutex_);
|
||||
if (!status.ok()) {
|
||||
LOG_WARNING("Failed to cleanup the cache: %s", status.message());
|
||||
chunks_updated_ = true;
|
||||
is_cleaned_ = false;
|
||||
} else {
|
||||
is_cleaned_ = true;
|
||||
}
|
||||
}
|
||||
interrupt_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
bool DataProvider::WaitForCleanupAndResetForTesting(absl::Duration timeout) {
|
||||
absl::MutexLock lock(&cleaned_mutex_);
|
||||
auto cond = [this]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(cleaned_mutex_) {
|
||||
return is_cleaned_;
|
||||
};
|
||||
cleaned_mutex_.AwaitWithTimeout(absl::Condition(&cond), timeout);
|
||||
bool is_cleaned = is_cleaned_;
|
||||
is_cleaned_ = false;
|
||||
return is_cleaned;
|
||||
}
|
||||
} // namespace cdc_ft
|
||||
157
data_store/data_provider.h
Normal file
157
data_store/data_provider.h
Normal file
@@ -0,0 +1,157 @@
|
||||
/*
|
||||
* Copyright 2022 Google LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef DATA_STORE_DATA_PROVIDER_H_
|
||||
#define DATA_STORE_DATA_PROVIDER_H_
|
||||
|
||||
#include <atomic>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/base/thread_annotations.h"
|
||||
#include "absl/status/statusor.h"
|
||||
#include "absl/synchronization/mutex.h"
|
||||
#include "absl/time/time.h"
|
||||
#include "common/clock.h"
|
||||
#include "data_store/data_store_reader.h"
|
||||
#include "data_store/data_store_writer.h"
|
||||
#include "manifest/manifest_proto_defs.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
// DataProvider is a composite of several data-store readers used for the file
|
||||
// transfer. Thread-safe.
|
||||
class DataProvider : public DataStoreReader {
|
||||
public:
|
||||
// Default cleanup interval in seconds.
|
||||
static constexpr unsigned int kCleanupTimeoutSec = 300;
|
||||
// Default access-idling time in seconds.
|
||||
static constexpr int64_t kAccessIdleSec = 5;
|
||||
|
||||
DataProvider(std::unique_ptr<DataStoreWriter> writer,
|
||||
std::vector<std::unique_ptr<DataStoreReader>> readers,
|
||||
size_t prefetch_size,
|
||||
uint32_t cleanup_timeout_sec = kCleanupTimeoutSec,
|
||||
uint32_t access_idle_timeout_sec = kAccessIdleSec);
|
||||
DataProvider() = delete;
|
||||
DataProvider(const DataProvider&) = delete;
|
||||
DataProvider& operator=(const DataProvider&) = delete;
|
||||
virtual ~DataProvider() ABSL_LOCKS_EXCLUDED(shutdown_mutex_);
|
||||
|
||||
// Shuts down the background cleanup thread.
|
||||
void Shutdown();
|
||||
|
||||
// DataStoreReader:
|
||||
size_t PrefetchSize(size_t read_size) const override;
|
||||
absl::StatusOr<size_t> Get(const ContentIdProto& content_id, void* data,
|
||||
size_t offset, size_t size)
|
||||
ABSL_LOCKS_EXCLUDED(*content_mutexes_) override;
|
||||
absl::Status Get(ChunkTransferList* chunks)
|
||||
ABSL_LOCKS_EXCLUDED(*content_mutexes_) override;
|
||||
absl::Status Get(const ContentIdProto& content_id, Buffer* data)
|
||||
ABSL_LOCKS_EXCLUDED(*content_mutexes_) override;
|
||||
|
||||
private:
|
||||
friend class DataProviderTest;
|
||||
|
||||
// Returns whether the writer was cleaned up and resets |is_cleaned_|.
|
||||
bool WaitForCleanupAndResetForTesting(absl::Duration timeout)
|
||||
ABSL_LOCKS_EXCLUDED(cleaned_mutex_);
|
||||
|
||||
// Vector of WriterMutexLock pointers to lock multiple mutexes together.
|
||||
using WriterMutexLockList =
|
||||
std::vector<std::unique_ptr<absl::WriterMutexLock>>;
|
||||
|
||||
// Logs a warning if unexpectedly could not get data from the writer.
|
||||
void LogWriterWarning(const absl::Status& status,
|
||||
const ContentIdProto& content_id);
|
||||
|
||||
// Returns the mutex for |content_id| from |content_mutexes_|.
|
||||
absl::Mutex* GetContentMutex(const ContentIdProto& content_id);
|
||||
|
||||
// Acquires write locks on the corresponding mutexes for all content IDs in
|
||||
// |chunk_ids|. The locks are placed in the |locks| list. Detects if two chunk
|
||||
// IDs are guarded by the same mutex and locks it only once.
|
||||
//
|
||||
// The list of mutexes is sorted in a deterministic way before they are
|
||||
// locked. This prevents cycles when calling this function from multiple
|
||||
// threads and thus avoids deadlocks.
|
||||
void WriteLockAll(std::vector<const ContentIdProto*> chunk_ids,
|
||||
WriterMutexLockList* locks);
|
||||
|
||||
// Tries to fulfill as many of the chunk transfer tasks in |chunks| as
|
||||
// possible. Tasks that are completed are marked as `done`. If |lock_required|
|
||||
// is true, a read lock is acquired for each chunk as its read. Otherwise the
|
||||
// caller is responsible for acquiring all required locks beforehand.
|
||||
absl::Status GetFromWriter(ChunkTransferList* chunks, bool lock_required);
|
||||
|
||||
// Collects locks for all mutexes.
|
||||
void LockAllMutexes(WriterMutexLockList* locks)
|
||||
ABSL_LOCKS_EXCLUDED(*content_mutexes_);
|
||||
|
||||
// Periodically cleans up data in |writer_|.
|
||||
void CleanupThreadMain() ABSL_LOCKS_EXCLUDED(shutdown_mutex_, cleaned_mutex_);
|
||||
|
||||
static constexpr unsigned int kNumberOfMutexes = 256;
|
||||
|
||||
// How much additional data to prefetch when a max. FUSE read is encountered.
|
||||
size_t prefetch_size_;
|
||||
|
||||
std::unique_ptr<DataStoreWriter> writer_;
|
||||
std::vector<std::unique_ptr<DataStoreReader>> readers_;
|
||||
|
||||
// Array of mutexes to protect read/write operations.
|
||||
absl::Mutex content_mutexes_[kNumberOfMutexes];
|
||||
|
||||
// Runs periodical cleanup of the data writer.
|
||||
std::unique_ptr<std::thread> async_cleaner_;
|
||||
|
||||
absl::Mutex shutdown_mutex_;
|
||||
|
||||
// Indicates whether the shutdown was triggered.
|
||||
bool shutdown_ ABSL_GUARDED_BY(shutdown_mutex_) = false;
|
||||
|
||||
// The last access time.
|
||||
std::atomic<std::chrono::time_point<std::chrono::steady_clock>>
|
||||
last_access_ts_;
|
||||
|
||||
// Identifies if new data was added to the cache since the last cleanup.
|
||||
std::atomic<bool> chunks_updated_;
|
||||
|
||||
// Clock to track the last access time.
|
||||
SteadyClock* steady_clock_ = DefaultSteadyClock::GetInstance();
|
||||
|
||||
// Cleanup interval.
|
||||
uint32_t cleanup_timeout_sec_ = kCleanupTimeoutSec;
|
||||
|
||||
// The number of seconds needs to pass since the last write or read operation
|
||||
// to mark the data provider as access-idling.
|
||||
uint32_t access_idle_timeout_sec_ = kAccessIdleSec;
|
||||
|
||||
absl::Mutex cleaned_mutex_;
|
||||
|
||||
// Whether the writer was cleaned up since the last time
|
||||
// WaitForCleanupAndResetForTesting() was executed or since beginning.
|
||||
bool is_cleaned_ ABSL_GUARDED_BY(cleaned_mutex_) = false;
|
||||
|
||||
// Shows whether any read/write request arrived during Cleanup().
|
||||
// data_writer_ only reads it and cancels Cleanup() if it is true.
|
||||
// It is set in GetContentMutex() and reset at the end of Get().
|
||||
std::atomic<bool> interrupt_;
|
||||
}; // class DataProvider
|
||||
}; // namespace cdc_ft
|
||||
|
||||
#endif // DATA_STORE_DATA_PROVIDER_H_
|
||||
370
data_store/data_provider_test.cc
Normal file
370
data_store/data_provider_test.cc
Normal file
@@ -0,0 +1,370 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "data_store/data_provider.h"
|
||||
|
||||
#include <chrono>
|
||||
#include <numeric>
|
||||
#include <thread>
|
||||
|
||||
#include "common/path.h"
|
||||
#include "common/status_test_macros.h"
|
||||
#include "common/testing_clock.h"
|
||||
#include "common/util.h"
|
||||
#include "data_store/disk_data_store.h"
|
||||
#include "data_store/mem_data_store.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "manifest/content_id.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
namespace {
|
||||
constexpr uint8_t kFirstData[] = {10, 20, 30, 40, 50, 60, 70, 80, 90};
|
||||
constexpr size_t kFirstDataSize = sizeof(kFirstData);
|
||||
constexpr char kTestCacheDirName[] = ".cdc_ft_cache";
|
||||
} // namespace
|
||||
|
||||
class DataProviderTest : public ::testing::Test {
|
||||
public:
|
||||
void SetUp() override {
|
||||
cache_dir_path_ = path::Join(path::GetTempDir(), kTestCacheDirName);
|
||||
EXPECT_OK(path::RemoveDirRec(cache_dir_path_));
|
||||
}
|
||||
void TearDown() override { EXPECT_OK(path::RemoveDirRec(cache_dir_path_)); }
|
||||
DataProviderTest() {
|
||||
first_content_id_ = ContentId::FromArray(kFirstData, kFirstDataSize);
|
||||
}
|
||||
|
||||
ContentIdProto Id(const std::string& data) {
|
||||
return ContentId::FromDataString(data);
|
||||
}
|
||||
|
||||
std::unique_ptr<DiskDataStore> CreateDiskCache(
|
||||
const std::vector<std::string>& chunks) {
|
||||
absl::StatusOr<std::unique_ptr<DiskDataStore>> cache =
|
||||
DiskDataStore::Create(0, cache_dir_path_, false, &clock_);
|
||||
EXPECT_OK(cache);
|
||||
for (const std::string& s : chunks) {
|
||||
EXPECT_OK((*cache)->Put(Id(s), s.data(), s.size()));
|
||||
}
|
||||
return std::move(*cache);
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<DataStoreReader>> CreateMemCache(
|
||||
const std::vector<std::string>& chunks) {
|
||||
auto cache = std::make_unique<MemDataStore>();
|
||||
for (const std::string& chunk : chunks) {
|
||||
cache->AddData({chunk.begin(), chunk.end()});
|
||||
}
|
||||
std::vector<std::unique_ptr<DataStoreReader>> readers;
|
||||
readers.emplace_back(std::move(cache));
|
||||
return readers;
|
||||
}
|
||||
|
||||
std::unique_ptr<DiskDataStore> CreateCacheWithFirstData() {
|
||||
absl::StatusOr<std::unique_ptr<DiskDataStore>> cache =
|
||||
DiskDataStore::Create(0, cache_dir_path_, false, &clock_);
|
||||
EXPECT_OK(cache);
|
||||
EXPECT_OK((*cache)->Put(first_content_id_, &kFirstData[0], kFirstDataSize));
|
||||
return std::move(*cache);
|
||||
}
|
||||
|
||||
std::unique_ptr<MemDataStore> CreateMemCacheWithFirstData() {
|
||||
auto cache = std::make_unique<MemDataStore>();
|
||||
cache->AddData({kFirstData, kFirstData + kFirstDataSize});
|
||||
return cache;
|
||||
}
|
||||
|
||||
std::string GetDiskCacheFilePath(DiskDataStore* dds,
|
||||
const ContentIdProto& content_id) const {
|
||||
return dds->GetCacheFilePath(content_id);
|
||||
}
|
||||
|
||||
bool WaitForProviderCleanupAndResetForTesting(DataProvider* dp,
|
||||
absl::Duration timeout) {
|
||||
return dp->WaitForCleanupAndResetForTesting(timeout);
|
||||
}
|
||||
|
||||
void TestGetExistingChunkInBounds(DataProvider& data_provider) {
|
||||
uint8_t ret_data[kFirstDataSize];
|
||||
size_t offset = 3;
|
||||
absl::StatusOr<uint64_t> bytes_read =
|
||||
data_provider.Get(first_content_id_, &ret_data, offset, kFirstDataSize);
|
||||
ASSERT_OK(bytes_read);
|
||||
ASSERT_EQ(kFirstDataSize - offset, *bytes_read);
|
||||
EXPECT_TRUE(std::equal(std::begin(kFirstData) + offset,
|
||||
std::end(kFirstData), std::begin(ret_data)));
|
||||
}
|
||||
|
||||
void TestGetExistingChunkOutOfBounds(DataProvider& data_provider) {
|
||||
uint8_t ret_data[kFirstDataSize];
|
||||
size_t offset = 15;
|
||||
absl::StatusOr<uint64_t> bytes_read =
|
||||
data_provider.Get(first_content_id_, &ret_data, offset, kFirstDataSize);
|
||||
ASSERT_OK(bytes_read);
|
||||
EXPECT_EQ(0u, *bytes_read);
|
||||
}
|
||||
|
||||
void TestGetExistingChunkComplete(DataProvider& data_provider) {
|
||||
Buffer buffer;
|
||||
EXPECT_OK(data_provider.Get(first_content_id_, &buffer));
|
||||
Buffer exp_buffer({10, 20, 30, 40, 50, 60, 70, 80, 90});
|
||||
EXPECT_EQ(exp_buffer, buffer);
|
||||
}
|
||||
|
||||
void TestGetExistingChunk(DataProvider& data_provider) {
|
||||
TestGetExistingChunkInBounds(data_provider);
|
||||
TestGetExistingChunkOutOfBounds(data_provider);
|
||||
TestGetExistingChunkComplete(data_provider);
|
||||
}
|
||||
|
||||
protected:
|
||||
ContentIdProto first_content_id_;
|
||||
TestingSystemClock clock_;
|
||||
std::string cache_dir_path_;
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
||||
// TODO: Add test with several readers and a writer, which has no data at the
|
||||
// beginning. Request the chunk several times (the first time it should be
|
||||
// received from reader, the second time - from the writer).
|
||||
TEST_F(DataProviderTest, DataProvider) {
|
||||
DataProvider data_provider(nullptr, {}, 0);
|
||||
uint8_t ret_data[kFirstDataSize];
|
||||
absl::StatusOr<uint64_t> bytes =
|
||||
data_provider.Get(first_content_id_, &ret_data, 0, kFirstDataSize);
|
||||
EXPECT_TRUE(absl::IsNotFound(bytes.status()));
|
||||
|
||||
Buffer buffer;
|
||||
EXPECT_TRUE(absl::IsNotFound(data_provider.Get(first_content_id_, &buffer)));
|
||||
}
|
||||
|
||||
TEST_F(DataProviderTest, CacheAsReader) {
|
||||
std::vector<std::unique_ptr<DataStoreReader>> readers;
|
||||
readers.emplace_back(CreateCacheWithFirstData());
|
||||
DataProvider data_provider(nullptr, std::move(readers), 0);
|
||||
|
||||
TestGetExistingChunk(data_provider);
|
||||
}
|
||||
|
||||
TEST_F(DataProviderTest, CacheAsWriter) {
|
||||
DataProvider data_provider(CreateCacheWithFirstData(), {}, 0);
|
||||
TestGetExistingChunk(data_provider);
|
||||
}
|
||||
|
||||
TEST_F(DataProviderTest, MemCacheAsReader) {
|
||||
std::vector<std::unique_ptr<DataStoreReader>> readers;
|
||||
readers.emplace_back(CreateMemCacheWithFirstData());
|
||||
DataProvider data_provider(nullptr, std::move(readers), 0);
|
||||
TestGetExistingChunk(data_provider);
|
||||
}
|
||||
|
||||
TEST_F(DataProviderTest, MemCacheAsWriter) {
|
||||
DataProvider data_provider(CreateMemCacheWithFirstData(), {}, 0);
|
||||
TestGetExistingChunk(data_provider);
|
||||
}
|
||||
|
||||
TEST_F(DataProviderTest, CacheAsWriterMemCacheAsReader) {
|
||||
absl::StatusOr<std::unique_ptr<DiskDataStore>> cache =
|
||||
DiskDataStore::Create(0, cache_dir_path_, false, &clock_);
|
||||
ASSERT_OK(cache);
|
||||
std::vector<std::unique_ptr<DataStoreReader>> readers;
|
||||
readers.emplace_back(CreateMemCacheWithFirstData());
|
||||
DataProvider data_provider(std::move(*cache), std::move(readers), 0);
|
||||
TestGetExistingChunk(data_provider);
|
||||
}
|
||||
|
||||
TEST_F(DataProviderTest, GetMultiChunksFromWriterSuccess) {
|
||||
DataProvider data_provider(CreateDiskCache({"aaa", "bbb", "ccc"}), {}, 0);
|
||||
char buf[10];
|
||||
ChunkTransferList chunks;
|
||||
chunks.emplace_back(Id("aaa"), 0, buf, 3);
|
||||
chunks.emplace_back(Id("bbb"), 0, buf + 3, 3);
|
||||
chunks.emplace_back(Id("ccc"), 0, buf + 6, 3);
|
||||
EXPECT_OK(data_provider.Get(&chunks));
|
||||
EXPECT_TRUE(chunks.ReadDone());
|
||||
EXPECT_TRUE(chunks.PrefetchDone());
|
||||
EXPECT_TRUE(chunks[0].done);
|
||||
EXPECT_TRUE(chunks[1].done);
|
||||
EXPECT_TRUE(chunks[2].done);
|
||||
EXPECT_EQ(absl::string_view(buf, 9), "aaabbbccc");
|
||||
}
|
||||
|
||||
TEST_F(DataProviderTest, GetMultiChunksFromWriterPartialFail) {
|
||||
DataProvider data_provider(CreateDiskCache({"aaa", "bbb", "ccc"}), {}, 0);
|
||||
char buf[10];
|
||||
ChunkTransferList chunks;
|
||||
chunks.emplace_back(Id("aaa"), 0, buf, 3);
|
||||
chunks.emplace_back(Id("does not exist"), 0, buf + 3, sizeof(buf));
|
||||
chunks.emplace_back(Id("ccc"), 0, buf + 6, 3);
|
||||
EXPECT_OK(data_provider.Get(&chunks));
|
||||
EXPECT_FALSE(chunks.ReadDone());
|
||||
EXPECT_FALSE(chunks.PrefetchDone());
|
||||
EXPECT_TRUE(chunks[0].done);
|
||||
EXPECT_FALSE(chunks[1].done);
|
||||
EXPECT_TRUE(chunks[2].done);
|
||||
EXPECT_EQ(absl::string_view(buf, 3), "aaa");
|
||||
EXPECT_EQ(absl::string_view(buf + 6, 3), "ccc");
|
||||
}
|
||||
|
||||
TEST_F(DataProviderTest, GetMultiChunksFromWriterAllFail) {
|
||||
DataProvider data_provider(CreateDiskCache({"aaa", "bbb", "ccc"}), {}, 0);
|
||||
char buf[10];
|
||||
ChunkTransferList chunks;
|
||||
chunks.emplace_back(Id("does not exist"), 0, buf, sizeof(buf));
|
||||
EXPECT_OK(data_provider.Get(&chunks));
|
||||
EXPECT_FALSE(chunks.ReadDone());
|
||||
EXPECT_FALSE(chunks.PrefetchDone());
|
||||
EXPECT_FALSE(chunks[0].done);
|
||||
}
|
||||
|
||||
TEST_F(DataProviderTest, GetMultiChunksFromReaderCachedInWriter) {
|
||||
auto readers = CreateMemCache({"aaa", "bbb", "ccc"});
|
||||
auto disk_cache = CreateDiskCache({});
|
||||
DiskDataStore* disk_cache_ptr = disk_cache.get();
|
||||
DataProvider data_provider(std::move(disk_cache), std::move(readers), 0);
|
||||
char buf[10];
|
||||
ChunkTransferList chunks;
|
||||
chunks.emplace_back(Id("aaa"), 0, buf, 3);
|
||||
EXPECT_OK(data_provider.Get(&chunks));
|
||||
EXPECT_TRUE(chunks.ReadDone());
|
||||
EXPECT_TRUE(chunks.PrefetchDone());
|
||||
EXPECT_TRUE(chunks[0].done);
|
||||
EXPECT_EQ(absl::string_view(buf, 3), "aaa");
|
||||
// Verify data has been cached in the writer.
|
||||
EXPECT_TRUE(disk_cache_ptr->Contains(Id("aaa")));
|
||||
EXPECT_EQ(disk_cache_ptr->List()->size(), 1);
|
||||
|
||||
chunks.clear();
|
||||
chunks.emplace_back(Id("bbb"), 0, buf + 3, 3);
|
||||
chunks.emplace_back(Id("ccc"), 0, buf + 6, 3);
|
||||
EXPECT_OK(data_provider.Get(&chunks));
|
||||
EXPECT_TRUE(chunks.ReadDone());
|
||||
EXPECT_TRUE(chunks.PrefetchDone());
|
||||
EXPECT_TRUE(chunks[0].done);
|
||||
EXPECT_TRUE(chunks[1].done);
|
||||
EXPECT_EQ(absl::string_view(buf, 9), "aaabbbccc");
|
||||
// Verify data has been cached in the writer.
|
||||
EXPECT_TRUE(disk_cache_ptr->Contains(Id("aaa")));
|
||||
EXPECT_TRUE(disk_cache_ptr->Contains(Id("bbb")));
|
||||
EXPECT_TRUE(disk_cache_ptr->Contains(Id("ccc")));
|
||||
EXPECT_EQ(disk_cache_ptr->List()->size(), 3);
|
||||
}
|
||||
|
||||
TEST_F(DataProviderTest, GetMultiChunksFromReaderAndWriterSkipPrefetch) {
|
||||
auto readers = CreateMemCache({"bbb", "ccc"});
|
||||
auto disk_cache = CreateDiskCache({"aaa"});
|
||||
DiskDataStore* disk_cache_ptr = disk_cache.get();
|
||||
DataProvider data_provider(std::move(disk_cache), std::move(readers), 0);
|
||||
char buf[10];
|
||||
|
||||
// This request can be fulfilled with cached data, so "bbb" and "ccc" are not
|
||||
// fetched from the reader.
|
||||
ChunkTransferList chunks;
|
||||
chunks.emplace_back(Id("aaa"), 0, buf, 3);
|
||||
chunks.emplace_back(Id("bbb"), 0, nullptr, 0); // prefetch
|
||||
chunks.emplace_back(Id("ccc"), 0, nullptr, 0); // prefetch
|
||||
EXPECT_OK(data_provider.Get(&chunks));
|
||||
EXPECT_TRUE(chunks.ReadDone());
|
||||
EXPECT_FALSE(chunks.PrefetchDone());
|
||||
EXPECT_TRUE(chunks[0].done);
|
||||
EXPECT_FALSE(chunks[1].done);
|
||||
EXPECT_FALSE(chunks[2].done);
|
||||
EXPECT_EQ(absl::string_view(buf, 3), "aaa");
|
||||
// No additional chunks should have been cached in the writer.
|
||||
EXPECT_EQ(disk_cache_ptr->List()->size(), 1);
|
||||
}
|
||||
|
||||
TEST_F(DataProviderTest, GetMultiChunksFromReaderAndWriterWithPrefetch) {
|
||||
auto readers = CreateMemCache({"bbb", "ccc"});
|
||||
auto disk_cache = CreateDiskCache({"aaa"});
|
||||
DiskDataStore* disk_cache_ptr = disk_cache.get();
|
||||
DataProvider data_provider(std::move(disk_cache), std::move(readers), 0);
|
||||
char buf[10];
|
||||
|
||||
// This request includes one chunk that has to be fetched, so the third
|
||||
// chunk should be prefetched as well.
|
||||
ChunkTransferList chunks;
|
||||
chunks.emplace_back(Id("aaa"), 0, buf, 3);
|
||||
chunks.emplace_back(Id("bbb"), 0, buf + 3, 3);
|
||||
chunks.emplace_back(Id("ccc"), 0, nullptr, 0); // prefetch
|
||||
EXPECT_OK(data_provider.Get(&chunks));
|
||||
EXPECT_TRUE(chunks.ReadDone());
|
||||
EXPECT_TRUE(chunks.PrefetchDone());
|
||||
EXPECT_TRUE(chunks[0].done);
|
||||
EXPECT_TRUE(chunks[1].done);
|
||||
EXPECT_TRUE(chunks[2].done);
|
||||
EXPECT_EQ(absl::string_view(buf, 6), "aaabbb");
|
||||
// Verify data has been cached in the writer
|
||||
EXPECT_TRUE(disk_cache_ptr->Contains(Id("aaa")));
|
||||
EXPECT_TRUE(disk_cache_ptr->Contains(Id("bbb")));
|
||||
EXPECT_TRUE(disk_cache_ptr->Contains(Id("ccc")));
|
||||
EXPECT_EQ(disk_cache_ptr->List()->size(), 3);
|
||||
}
|
||||
|
||||
TEST_F(DataProviderTest, RecoverFromTruncatedChunkInCache) {
|
||||
auto readers = CreateMemCache({"aaa"});
|
||||
auto disk_cache = CreateDiskCache({"aaa"});
|
||||
DiskDataStore* disk_cache_ptr = disk_cache.get();
|
||||
DataProvider data_provider(std::move(disk_cache), std::move(readers), 0);
|
||||
char buf[3];
|
||||
|
||||
// Truncate the chunk stored in the disk cache.
|
||||
std::string path = GetDiskCacheFilePath(disk_cache_ptr, Id("aaa"));
|
||||
size_t size;
|
||||
EXPECT_OK(path::WriteFile(path, "a", 1));
|
||||
EXPECT_OK(path::FileSize(path, &size));
|
||||
EXPECT_EQ(size, 1);
|
||||
|
||||
ChunkTransferList chunks;
|
||||
chunks.emplace_back(Id("aaa"), 0, buf, 3);
|
||||
EXPECT_OK(data_provider.Get(&chunks));
|
||||
EXPECT_TRUE(chunks.ReadDone());
|
||||
EXPECT_TRUE(chunks[0].done);
|
||||
EXPECT_EQ(absl::string_view(buf, 3), "aaa");
|
||||
// Verify that the chunk has been recovered.
|
||||
EXPECT_OK(path::FileSize(path, &size));
|
||||
EXPECT_EQ(size, 3);
|
||||
}
|
||||
|
||||
TEST_F(DataProviderTest, CleanupNotAllChunksRead) {
|
||||
auto cache = CreateDiskCache({"aaa", "bbb", "ccc"});
|
||||
cache->SetCapacity(5);
|
||||
|
||||
// Check that chunks are available in the cache first.
|
||||
char buf[10];
|
||||
EXPECT_EQ(cache->Get(Id("aaa"), buf, 0, 3).value(), 3u);
|
||||
EXPECT_EQ(cache->Get(Id("bbb"), buf + 3, 0, 3).value(), 3u);
|
||||
EXPECT_EQ(cache->Get(Id("ccc"), buf + 6, 0, 3).value(), 3u);
|
||||
EXPECT_EQ(absl::string_view(buf, 9), "aaabbbccc");
|
||||
|
||||
DataProvider data_provider(std::move(cache), {}, 0, 0 /*cleanup timeout*/,
|
||||
0 /*idling timeout*/);
|
||||
memset(buf, 0, 10);
|
||||
|
||||
EXPECT_EQ(data_provider.Get(Id("ccc"), buf, 0, 3).value(), 3u);
|
||||
|
||||
// The data provider should contain only 1 chunk as the cleanup was already
|
||||
// executed.
|
||||
EXPECT_TRUE(WaitForProviderCleanupAndResetForTesting(
|
||||
&data_provider, absl::Seconds(5) /*timeout*/));
|
||||
memset(buf, 0, 10);
|
||||
EXPECT_NOT_OK(data_provider.Get(Id("aaa"), buf, 0, 3));
|
||||
EXPECT_NOT_OK(data_provider.Get(Id("bbb"), buf, 0, 3));
|
||||
EXPECT_EQ(data_provider.Get(Id("ccc"), buf, 0, 3).value(), 3u);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace cdc_ft
|
||||
98
data_store/data_store_reader.cc
Normal file
98
data_store/data_store_reader.cc
Normal file
@@ -0,0 +1,98 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "data_store/data_store_reader.h"
|
||||
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "common/status_macros.h"
|
||||
#include "manifest/content_id.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
bool ChunkTransferList::ReadDone() const {
|
||||
for (auto it = begin(); it != end(); ++it) {
|
||||
if (it->size && !it->done) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ChunkTransferList::PrefetchDone() const {
|
||||
for (auto it = begin(); it != end(); ++it) {
|
||||
if (!it->done) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string ChunkTransferList::ToHexString(
|
||||
std::function<bool(const ChunkTransferTask&)> filter) const {
|
||||
std::string ids;
|
||||
for (auto it = begin(); it != end(); ++it) {
|
||||
if (filter && !filter(*it)) continue;
|
||||
if (!ids.empty()) ids += ", ";
|
||||
ids += ContentId::ToHexString(it->id);
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
std::string ChunkTransferList::UndoneToHexString() const {
|
||||
return ToHexString(
|
||||
[](const ChunkTransferTask& chunk) { return !chunk.done; });
|
||||
}
|
||||
|
||||
size_t DataStoreReader::PrefetchSize(size_t read_size) const {
|
||||
return read_size;
|
||||
}
|
||||
|
||||
absl::Status DataStoreReader::Get(ChunkTransferList* chunks) {
|
||||
absl::StatusOr<uint64_t> bytes_read;
|
||||
for (ChunkTransferTask& chunk : *chunks) {
|
||||
// This default implementation skips prefetching tasks (chunk.size == 0).
|
||||
if (chunk.done || !chunk.size) continue;
|
||||
bytes_read = Get(chunk.id, chunk.data, chunk.offset, chunk.size);
|
||||
if (bytes_read.ok()) {
|
||||
if (*bytes_read != chunk.size) {
|
||||
return MakeStatus(
|
||||
"Corrupted chunk %s detected, expected to read %u bytes, got %u",
|
||||
ContentId::ToHexString(chunk.id), chunk.size, *bytes_read);
|
||||
}
|
||||
chunk.done = true;
|
||||
} else {
|
||||
// Return any unexpected error.
|
||||
if (!absl::IsNotFound(bytes_read.status())) return bytes_read.status();
|
||||
}
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status DataStoreReader::GetProto(const ContentIdProto& content_id,
|
||||
google::protobuf::Message* proto) {
|
||||
Buffer chunk;
|
||||
return GetProto(content_id, &chunk, proto);
|
||||
}
|
||||
|
||||
absl::Status DataStoreReader::GetProto(const ContentIdProto& content_id,
|
||||
Buffer* buf,
|
||||
google::protobuf::Message* proto) {
|
||||
// Fetch the referenced chunk.
|
||||
RETURN_IF_ERROR(Get(content_id, buf));
|
||||
// Parse the manifest proto from the chunk.
|
||||
if (!proto->ParseFromArray(buf->data(), static_cast<int>(buf->size()))) {
|
||||
return absl::InternalError(absl::StrFormat(
|
||||
"Failed to parse %s from chunk '%s'", proto->GetTypeName(),
|
||||
ContentId::ToHexString(content_id)));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace cdc_ft
|
||||
129
data_store/data_store_reader.h
Normal file
129
data_store/data_store_reader.h
Normal file
@@ -0,0 +1,129 @@
|
||||
/*
|
||||
* Copyright 2022 Google LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef DATA_STORE_DATA_STORE_READER_H_
|
||||
#define DATA_STORE_DATA_STORE_READER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "absl/status/statusor.h"
|
||||
#include "common/buffer.h"
|
||||
#include "common/status.h"
|
||||
#include "common/status_macros.h"
|
||||
#include "manifest/content_id.h"
|
||||
#include "manifest/manifest_proto_defs.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
// Describes which part of a chunk needs to copied into a given buffer.
|
||||
struct ChunkTransferTask {
|
||||
ChunkTransferTask() {}
|
||||
ChunkTransferTask(ContentIdProto id, uint64_t offset, void* data,
|
||||
uint64_t size)
|
||||
: id(std::move(id)), offset(offset), data(data), size(size) {}
|
||||
// Identifies the chunk.
|
||||
ContentIdProto id;
|
||||
// Relative offset into the chunk from where data should be copied.
|
||||
uint64_t offset = 0;
|
||||
// Data buffer into which the chunk is written. May be null for prefetching.
|
||||
void* data = nullptr;
|
||||
// Size of the |data| buffer. May be zero for prefetching.
|
||||
uint64_t size = 0;
|
||||
// If the storage layer fetches the complete chunk data, it can be moved into
|
||||
// this string so that the data provider layer can cache the chunk.
|
||||
std::string chunk_data;
|
||||
// Indicates if the chunk was successfully copied into |data| or prefetched.
|
||||
bool done = false;
|
||||
};
|
||||
|
||||
// A std::vector of ChunkTransferTask elements.
|
||||
class ChunkTransferList : public std::vector<ChunkTransferTask> {
|
||||
public:
|
||||
// Returns true if all tasks with a non-zero size have |done| set to true.
|
||||
bool ReadDone() const;
|
||||
|
||||
// Returns true if all tasks have |done| set to true, including those only
|
||||
// meant for prefetching (|size| == 0).
|
||||
bool PrefetchDone() const;
|
||||
|
||||
// Returns a comma separated string of hex IDs of all chunks in this list. If
|
||||
// the optional function |filter| is given, only those chunks are included for
|
||||
// which |filter| returns true.
|
||||
std::string ToHexString(
|
||||
std::function<bool(const ChunkTransferTask&)> filter = nullptr) const;
|
||||
|
||||
// Same as ToHexString, but only includes tasks having |done| set to false.
|
||||
std::string UndoneToHexString() const;
|
||||
};
|
||||
|
||||
// DataStoreReader is an abstract interface to read from all data stores used
|
||||
// for the file transfer, for example: a local cache, a data store, which
|
||||
// receives data via a gRPC channel, etc.
|
||||
class DataStoreReader {
|
||||
public:
|
||||
DataStoreReader() = default;
|
||||
virtual ~DataStoreReader() = default;
|
||||
|
||||
DataStoreReader(const DataStoreReader&) = delete;
|
||||
DataStoreReader& operator=(const DataStoreReader&) = delete;
|
||||
|
||||
// Suggests a data prefetch size based on the given |read_size|. The default
|
||||
// implementation just returns |read_size|. Override this function to
|
||||
// implement a prefetching strategy.
|
||||
virtual size_t PrefetchSize(size_t read_size) const;
|
||||
|
||||
// Reads |size| bytes from the chunk specified by |content_id|, starting
|
||||
// at the given |offset|, and writes the result into |data|.
|
||||
// The return value is the number of read bytes.
|
||||
// If the chunk is not found in the data store, returns NotFoundError.
|
||||
virtual absl::StatusOr<size_t> Get(const ContentIdProto& content_id,
|
||||
void* data, size_t offset,
|
||||
size_t size) = 0;
|
||||
|
||||
// Reads all chunks from the given task list |chunks| that are not done yet,
|
||||
// copies the data into the associated buffer, and marks the chunk as done. If
|
||||
// the reader fetches the full chunk, the raw data may be moved to the task as
|
||||
// well for caching.
|
||||
//
|
||||
// Returns success even if no chunk was found. Check |chunks->ReadDone()| or
|
||||
// |chunks->PrefetchDone()| to verify all chunks were fetched. Returns any
|
||||
// error other than absl::NotFoundError from the underlying implementation.
|
||||
//
|
||||
// The default implementation calls the single item `Get()` method for each
|
||||
// task in |chunks|. Override this method in a sub-class for optimized batch
|
||||
// processing.
|
||||
virtual absl::Status Get(ChunkTransferList* chunks);
|
||||
|
||||
// Reads the complete data chunk specified by |content_id| and writes the
|
||||
// result into |data|.
|
||||
// If the chunk is not found in the data store, returns NotFoundError.
|
||||
virtual absl::Status Get(const ContentIdProto& content_id, Buffer* data) = 0;
|
||||
|
||||
// Reads the complete chunk identified by |content_id| and parses it as the
|
||||
// given protocol buffer.
|
||||
absl::Status GetProto(const ContentIdProto& content_id,
|
||||
google::protobuf::Message* proto);
|
||||
|
||||
// Reads the complete chunk identified by |content_id| and parses it as the
|
||||
// given protocol buffer. Uses the given Buffer |buf| as intermediate
|
||||
// storage.
|
||||
absl::Status GetProto(const ContentIdProto& content_id, Buffer* buf,
|
||||
google::protobuf::Message* proto);
|
||||
}; // class DataStoreReader
|
||||
|
||||
} // namespace cdc_ft
|
||||
|
||||
#endif // DATA_STORE_DATA_STORE_READER_H_
|
||||
44
data_store/data_store_writer.cc
Normal file
44
data_store/data_store_writer.cc
Normal file
@@ -0,0 +1,44 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "data_store/data_store_writer.h"
|
||||
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "common/status.h"
|
||||
#include "manifest/content_id.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
bool DataStoreWriter::Contains(const ContentIdProto& content_id) {
|
||||
Buffer buffer;
|
||||
return Get(content_id, &buffer).ok();
|
||||
}
|
||||
|
||||
absl::Status DataStoreWriter::PutProto(
|
||||
const google::protobuf::MessageLite& proto, ContentIdProto* content_id,
|
||||
size_t* proto_size) {
|
||||
// Serialize the proto.
|
||||
std::string out;
|
||||
if (!proto.SerializeToString(&out)) {
|
||||
return absl::InternalError(
|
||||
absl::StrFormat("Failed to serialize %s.", proto.GetTypeName()));
|
||||
}
|
||||
// Calculate the proto's content ID.
|
||||
*content_id = ContentId::FromDataString(out);
|
||||
if (proto_size) *proto_size = out.size();
|
||||
// Write manifest chunk to storage.
|
||||
return Put(*content_id, out.c_str(), out.size());
|
||||
}
|
||||
|
||||
} // namespace cdc_ft
|
||||
83
data_store/data_store_writer.h
Normal file
83
data_store/data_store_writer.h
Normal file
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Copyright 2022 Google LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef DATA_STORE_DATA_STORE_WRITER_H_
|
||||
#define DATA_STORE_DATA_STORE_WRITER_H_
|
||||
|
||||
#include <unordered_set>
|
||||
|
||||
#include "absl/status/statusor.h"
|
||||
#include "common/buffer.h"
|
||||
#include "data_store/data_store_reader.h"
|
||||
#include "manifest/manifest_proto_defs.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
// DataStoreWriter is an abstract interface for read/write operations for a data
|
||||
// store, for example: a disk-based or in-memory cache.
|
||||
class DataStoreWriter : public DataStoreReader {
|
||||
public:
|
||||
DataStoreWriter() = default;
|
||||
DataStoreWriter(const DataStoreWriter&) = delete;
|
||||
DataStoreWriter& operator=(const DataStoreWriter&) = delete;
|
||||
virtual ~DataStoreWriter() = default;
|
||||
|
||||
// Returns true if the chunk with the given |content_id| is available
|
||||
// in the data store. Otherwise, returns false. The default implementation
|
||||
// uses Get() to retrieve the chunk and should be overridden.
|
||||
virtual bool Contains(const ContentIdProto& content_id);
|
||||
|
||||
// Stores a data chunk |data| of |size| and |content_id| into the data store.
|
||||
virtual absl::Status Put(const ContentIdProto& content_id, const void* data,
|
||||
size_t size) = 0;
|
||||
|
||||
// Stores the given protocol buffer |proto| as a unique chunk and updates
|
||||
// |content_id| with the corresponding digest. If the optional parameter
|
||||
// |proto_size| is given, it will be set to the byte size of the serialized
|
||||
// proto.
|
||||
absl::Status PutProto(const google::protobuf::MessageLite& proto,
|
||||
ContentIdProto* content_id,
|
||||
size_t* proto_size = nullptr);
|
||||
|
||||
// Removes the data chunk with |content_id| from the writer. Returns success
|
||||
// if the chunk does not exist or was removed.
|
||||
virtual absl::Status Remove(const ContentIdProto& content_id) = 0;
|
||||
|
||||
// Wipes the data. All statistics and data chunks are removed from the data
|
||||
// store.
|
||||
virtual absl::Status Wipe() = 0;
|
||||
|
||||
// Removes all chunks except for |ids_to_keep|. Also checks whether all chunks
|
||||
// in |ids_to_keep| are present. If not, returns a NotFound error.
|
||||
virtual absl::Status Prune(
|
||||
std::unordered_set<ContentIdProto> ids_to_keep) = 0;
|
||||
|
||||
// Removes the data if the data store size exceeds its capacity.
|
||||
virtual absl::Status Cleanup() { return absl::OkStatus(); }
|
||||
|
||||
// Allows to interrupt methods by setting |interrupt_|.
|
||||
void RegisterInterrupt(std::atomic<bool>* interrupt) {
|
||||
interrupt_ = interrupt;
|
||||
}
|
||||
|
||||
protected:
|
||||
// Shows whether a function can be cancelled. Used in Cleanup().
|
||||
std::atomic<bool>* interrupt_ = nullptr;
|
||||
}; // class DataStoreWriter
|
||||
|
||||
} // namespace cdc_ft
|
||||
|
||||
#endif // DATA_STORE_DATA_STORE_WRITER_H_
|
||||
362
data_store/disk_data_store.cc
Normal file
362
data_store/disk_data_store.cc
Normal file
@@ -0,0 +1,362 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "data_store/disk_data_store.h"
|
||||
|
||||
#include <filesystem>
|
||||
#include <memory>
|
||||
|
||||
#include "common/log.h"
|
||||
#include "common/path.h"
|
||||
#include "common/status.h"
|
||||
#include "common/status_macros.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
namespace {
|
||||
|
||||
static constexpr char kDirNames[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
|
||||
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
|
||||
|
||||
// Generates directory names of |length| symbols from kDirNames.
|
||||
// If length = 2, the names are 00, 01, 02, etc.
|
||||
std::vector<std::string> GenerateDirNames(size_t length) {
|
||||
size_t names_size = 1ull << (length * 4);
|
||||
std::vector<std::string> names(names_size, std::string(length, '0'));
|
||||
for (size_t idx = 0; idx < names_size; ++idx) {
|
||||
size_t symbol = idx;
|
||||
for (size_t jdx = 0; jdx < length; ++jdx) {
|
||||
names[idx][jdx] = kDirNames[symbol & 0xfu];
|
||||
symbol >>= 4;
|
||||
}
|
||||
}
|
||||
return names;
|
||||
}
|
||||
|
||||
// Adds |count| path separators to |input| after each |distance| symbols
|
||||
// starting from the beginning. At least one symbol is left at the end
|
||||
// for the file name.
|
||||
// AddSeparators("abc", 1, 3) -> a\b\c
|
||||
// AddSeparators("abc", 1, 0) -> abc
|
||||
// AddSeparators("abc", 2, 100) -> ab\c
|
||||
static std::string AddPathSeparators(const std::string& input, size_t distance,
|
||||
size_t count) {
|
||||
if (input.empty() || distance == 0 || count == 0) {
|
||||
return input;
|
||||
}
|
||||
count = std::min((input.size() - 1) / distance, count);
|
||||
std::string path;
|
||||
path.reserve(input.size() + count);
|
||||
std::string::const_iterator it_pos = input.begin();
|
||||
while (count > 0 && it_pos < input.end()) {
|
||||
path.append(it_pos, it_pos + distance);
|
||||
path.push_back(path::PathSeparator());
|
||||
it_pos += distance;
|
||||
--count;
|
||||
}
|
||||
if (it_pos < input.end()) {
|
||||
path.append(it_pos, input.end());
|
||||
}
|
||||
return path;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
DiskDataStore::DiskDataStore(unsigned int depth, std::string cache_root_dir,
|
||||
bool create_dirs, SystemClock* clock)
|
||||
: depth_(depth),
|
||||
root_dir_(std::move(cache_root_dir)),
|
||||
create_dirs_(create_dirs),
|
||||
clock_(clock) {
|
||||
assert(!root_dir_.empty());
|
||||
path::EnsureEndsWithPathSeparator(&root_dir_);
|
||||
}
|
||||
|
||||
absl::StatusOr<std::unique_ptr<DiskDataStore>> DiskDataStore::Create(
|
||||
unsigned int depth, std::string cache_root_dir, bool create_dirs,
|
||||
SystemClock* clock) {
|
||||
std::unique_ptr<DiskDataStore> store = absl::WrapUnique(
|
||||
new DiskDataStore(depth, std::move(cache_root_dir), create_dirs, clock));
|
||||
if (create_dirs) {
|
||||
RETURN_IF_ERROR(store->CreateDirHierarchy());
|
||||
}
|
||||
return store;
|
||||
}
|
||||
|
||||
DiskDataStore::~DiskDataStore() {}
|
||||
|
||||
absl::Status DiskDataStore::Put(const ContentIdProto& content_id,
|
||||
const void* data, size_t size) {
|
||||
std::string path = GetCacheFilePath(content_id);
|
||||
if (!create_dirs_) {
|
||||
RETURN_IF_ERROR(path::CreateDirRec(path::DirName(path)));
|
||||
}
|
||||
RETURN_IF_ERROR(path::WriteFile(path, data, size));
|
||||
UpdateModificationTime(path);
|
||||
size_.fetch_add(size, std::memory_order_relaxed);
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::StatusOr<size_t> DiskDataStore::Get(const ContentIdProto& content_id,
|
||||
void* data, size_t offset,
|
||||
size_t size) {
|
||||
if (!size) return 0;
|
||||
assert(data);
|
||||
std::string path = GetCacheFilePath(content_id);
|
||||
size_t read_size;
|
||||
ASSIGN_OR_RETURN(read_size, path::ReadFile(path, data, offset, size),
|
||||
"Failed to read chunk %s of size %d at offset %d",
|
||||
ContentId::ToHexString(content_id), size, offset);
|
||||
UpdateModificationTime(path);
|
||||
return read_size;
|
||||
}
|
||||
|
||||
absl::Status DiskDataStore::Get(const ContentIdProto& content_id,
|
||||
Buffer* data) {
|
||||
assert(data);
|
||||
std::string path = GetCacheFilePath(content_id);
|
||||
size_t read_size = 0;
|
||||
size_t file_size = 0;
|
||||
|
||||
RETURN_IF_ERROR(path::FileSize(path, &file_size),
|
||||
"Failed to stat file size for '%s'", path);
|
||||
data->resize(file_size);
|
||||
ASSIGN_OR_RETURN(read_size, path::ReadFile(path, data->data(), 0, file_size),
|
||||
"Failed to read %s of size %d",
|
||||
ContentId::ToHexString(content_id), file_size);
|
||||
if (read_size != file_size) {
|
||||
return absl::DataLossError(
|
||||
absl::StrFormat("Only %u bytes out of %u are read for %s", read_size,
|
||||
file_size, ContentId::ToHexString(content_id)));
|
||||
}
|
||||
UpdateModificationTime(path);
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
int64_t DiskDataStore::Capacity() const { return capacity_; }
|
||||
|
||||
double DiskDataStore::FillFactor() const { return fill_factor_; }
|
||||
|
||||
unsigned int DiskDataStore::Depth() const { return depth_; }
|
||||
|
||||
size_t DiskDataStore::Size() const { return size_; }
|
||||
|
||||
const std::string& DiskDataStore::RootDir() const { return root_dir_; }
|
||||
|
||||
void DiskDataStore::SetCapacity(int64_t capacity) { capacity_ = capacity; }
|
||||
|
||||
absl::Status DiskDataStore::SetFillFactor(double fill_factor) {
|
||||
if (fill_factor <= 0 || fill_factor > 1) {
|
||||
return absl::FailedPreconditionError(
|
||||
absl::StrFormat("Failed to set cache fill factor to %f.", fill_factor));
|
||||
}
|
||||
fill_factor_ = fill_factor;
|
||||
return Cleanup();
|
||||
}
|
||||
|
||||
absl::Status DiskDataStore::Wipe() {
|
||||
RETURN_IF_ERROR(path::RemoveDirRec(root_dir_),
|
||||
"RemoveDirRec() for '%s' failed", root_dir_);
|
||||
size_ = 0;
|
||||
if (create_dirs_) {
|
||||
RETURN_IF_ERROR(CreateDirHierarchy());
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status DiskDataStore::Prune(
|
||||
std::unordered_set<ContentIdProto> ids_to_keep) {
|
||||
CacheFilesWithSize files_with_size;
|
||||
ASSIGN_OR_RETURN(files_with_size, CollectCacheFiles(),
|
||||
"Failed to collect cache files");
|
||||
|
||||
// Delete the set of chunks not in |ids_to_keep|.
|
||||
std::vector<ContentIdProto> to_delete;
|
||||
for (const CacheFile& file : files_with_size.files) {
|
||||
// Don't touch files that don't match the chunk naming scheme
|
||||
// (e.g. user-added files).
|
||||
ContentIdProto id;
|
||||
if (!ParseCacheFilePath(std::move(file.path), &id)) continue;
|
||||
|
||||
if (ids_to_keep.find(id) == ids_to_keep.end()) {
|
||||
RETURN_IF_ERROR(Remove(id));
|
||||
size_.fetch_sub(file.size, std::memory_order_relaxed);
|
||||
} else {
|
||||
ids_to_keep.erase(id);
|
||||
}
|
||||
}
|
||||
|
||||
// Verify that all chunks in |ids_to_keep| are present in the cache.
|
||||
if (!ids_to_keep.empty()) {
|
||||
return absl::NotFoundError(absl::StrFormat(
|
||||
"%u chunks, e.g. '%s', not found in the store", ids_to_keep.size(),
|
||||
ContentId::ToHexString(*ids_to_keep.begin())));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status DiskDataStore::Remove(const ContentIdProto& content_id) {
|
||||
std::string path = GetCacheFilePath(content_id);
|
||||
return path::RemoveFile(path);
|
||||
}
|
||||
|
||||
bool DiskDataStore::Contains(const ContentIdProto& content_id) {
|
||||
return path::Exists(GetCacheFilePath(content_id));
|
||||
}
|
||||
|
||||
absl::Status DiskDataStore::Cleanup() {
|
||||
if (capacity_ < 0) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
size_t size_threshold = static_cast<size_t>(capacity_) * fill_factor_;
|
||||
if (size_initialized_.load() && size_ <= size_threshold) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
CacheFilesWithSize files_with_size;
|
||||
ASSIGN_OR_RETURN(files_with_size, CollectCacheFiles());
|
||||
LOG_DEBUG("Cache size before the cleanup: %u bytes", size_.load());
|
||||
std::vector<CacheFile>& files = files_with_size.files;
|
||||
// Sort in the LRU order: the old files stored first.
|
||||
std::sort(files.begin(), files.end(),
|
||||
[](const CacheFile& file1, const CacheFile& file2) {
|
||||
// Also sort by path for deterministic results in tests.
|
||||
if (file1.mtime == file2.mtime) return file1.path < file2.path;
|
||||
return file1.mtime < file2.mtime;
|
||||
});
|
||||
size_t file_index = 0;
|
||||
const size_t num_of_files = files.size();
|
||||
while (size_ > size_threshold && file_index < num_of_files) {
|
||||
std::string path = path::Join(root_dir_, files[file_index].path);
|
||||
RETURN_IF_ERROR(path::RemoveFile(path));
|
||||
size_.fetch_sub(files[file_index].size, std::memory_order_relaxed);
|
||||
++file_index;
|
||||
if (interrupt_ && *interrupt_) {
|
||||
return absl::CancelledError("Cache cleanup has been cancelled");
|
||||
}
|
||||
}
|
||||
LOG_DEBUG("Cache size after the cleanup: %u bytes", size_.load());
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::StatusOr<std::vector<ContentIdProto>> DiskDataStore::List() {
|
||||
CacheFilesWithSize files_with_size;
|
||||
ASSIGN_OR_RETURN(files_with_size, CollectCacheFiles(true),
|
||||
"Failed to collect cache files");
|
||||
|
||||
std::vector<ContentIdProto> ids;
|
||||
ids.reserve(files_with_size.files.size());
|
||||
for (const CacheFile& file : files_with_size.files) {
|
||||
ContentIdProto id;
|
||||
if (ParseCacheFilePath(std::move(file.path), &id))
|
||||
ids.push_back(std::move(id));
|
||||
}
|
||||
|
||||
return ids;
|
||||
}
|
||||
|
||||
absl::StatusOr<DiskDataStore::Statistics> DiskDataStore::CalculateStatistics()
|
||||
const {
|
||||
Statistics statistics;
|
||||
auto handler = [&](const std::string& dir, const std::string& filename,
|
||||
int64_t /*modified_time*/, uint64_t size,
|
||||
bool is_directory) -> absl::Status {
|
||||
if (!is_directory) {
|
||||
statistics.size += size;
|
||||
++statistics.number_of_chunks;
|
||||
}
|
||||
return absl::OkStatus();
|
||||
};
|
||||
RETURN_IF_ERROR(path::SearchFiles(root_dir_, true, handler));
|
||||
return statistics;
|
||||
}
|
||||
|
||||
absl::StatusOr<DiskDataStore::CacheFilesWithSize>
|
||||
DiskDataStore::CollectCacheFiles(bool continue_on_interrupt) {
|
||||
CacheFilesWithSize cache_files;
|
||||
|
||||
if (!path::DirExists({root_dir_})) return cache_files;
|
||||
|
||||
auto handler = [&](const std::string& dir, const std::string& filename,
|
||||
int64_t modified_time, uint64_t size,
|
||||
bool is_directory) -> absl::Status {
|
||||
if (!is_directory) {
|
||||
cache_files.files.emplace_back();
|
||||
cache_files.files.back().path =
|
||||
path::Join(dir.substr(root_dir_.size()), filename);
|
||||
cache_files.files.back().mtime = modified_time;
|
||||
cache_files.files.back().size = size;
|
||||
cache_files.size += size;
|
||||
}
|
||||
if (!continue_on_interrupt && interrupt_ && *interrupt_) {
|
||||
return absl::CancelledError("Cache cleanup has been cancelled");
|
||||
}
|
||||
return absl::OkStatus();
|
||||
};
|
||||
|
||||
RETURN_IF_ERROR(path::SearchFiles(root_dir_, true, handler));
|
||||
size_ = cache_files.size;
|
||||
size_initialized_ = true;
|
||||
return cache_files;
|
||||
}
|
||||
|
||||
std::string DiskDataStore::GetCacheFilePath(
|
||||
const ContentIdProto& content_id) const {
|
||||
std::string file_name = AddPathSeparators(ContentId::ToHexString(content_id),
|
||||
kDirNameLength, depth_);
|
||||
return path::Join(root_dir_, file_name);
|
||||
}
|
||||
|
||||
bool DiskDataStore::ParseCacheFilePath(std::string path,
|
||||
ContentIdProto* content_id) const {
|
||||
// Remove path separators.
|
||||
if (depth_ > 0) {
|
||||
path.erase(std::remove_if(path.begin(), path.end(),
|
||||
[](char c) {
|
||||
return c == path::PathSeparator() ||
|
||||
c == path::OtherPathSeparator();
|
||||
}),
|
||||
path.end());
|
||||
}
|
||||
return ContentId::FromHexString(path, content_id);
|
||||
}
|
||||
|
||||
void DiskDataStore::UpdateModificationTime(const std::string& path) {
|
||||
// Don't fail if the time cannot be modified.
|
||||
// The time might be updated in parallel, so it is not critical.
|
||||
path::SetFileTime(path, std::chrono::system_clock::to_time_t(clock_->Now()))
|
||||
.IgnoreError();
|
||||
}
|
||||
|
||||
absl::Status DiskDataStore::CreateDirHierarchy() {
|
||||
if (dirs_.empty() && depth_ > 0) {
|
||||
dirs_ = GenerateDirNames(kDirNameLength);
|
||||
}
|
||||
RETURN_IF_ERROR(path::CreateDirRec(root_dir_));
|
||||
return CreateDirLevelRec(root_dir_, depth_);
|
||||
}
|
||||
|
||||
absl::Status DiskDataStore::CreateDirLevelRec(const std::string& parent,
|
||||
unsigned int depth) {
|
||||
if (depth == 0) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
for (const std::string& dir : dirs_) {
|
||||
std::string name = path::Join(parent, dir);
|
||||
RETURN_IF_ERROR(path::CreateDir(name));
|
||||
RETURN_IF_ERROR(CreateDirLevelRec(name, depth - 1),
|
||||
"CreateDirLevelRec() for %s failed at level %d:", name,
|
||||
depth - 1);
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
}; // namespace cdc_ft
|
||||
191
data_store/disk_data_store.h
Normal file
191
data_store/disk_data_store.h
Normal file
@@ -0,0 +1,191 @@
|
||||
/*
|
||||
* Copyright 2022 Google LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef DATA_STORE_DISK_DATA_STORE_H_
|
||||
#define DATA_STORE_DISK_DATA_STORE_H_
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "absl/status/status.h"
|
||||
#include "absl/status/statusor.h"
|
||||
#include "common/buffer.h"
|
||||
#include "common/clock.h"
|
||||
#include "common/platform.h"
|
||||
#include "data_store/data_store_writer.h"
|
||||
#include "manifest/content_id.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
// File-based LRU cache to store data chunks on disk. The LRU strategy is based
|
||||
// on each file's mtime, which gets updated on each access.
|
||||
// Not thread-safe.
|
||||
class DiskDataStore : public DataStoreWriter {
|
||||
public:
|
||||
struct Statistics {
|
||||
size_t size = 0;
|
||||
size_t number_of_chunks = 0;
|
||||
};
|
||||
|
||||
static constexpr uint64_t kDefaultCapacity{150ull << 30}; // 150 GiB
|
||||
|
||||
// Creates and returns a DiskDataStore that generates the cache directory
|
||||
// hierarchy in |cache_root_dir| of |depth| at startup if |create_dirs| is
|
||||
// set.
|
||||
// Returns an error status if the cache directories cannot be created.
|
||||
// Uses |clock| as an internal clock for the file modification times.
|
||||
static absl::StatusOr<std::unique_ptr<DiskDataStore>> Create(
|
||||
unsigned int depth, std::string cache_root_dir, bool create_dirs,
|
||||
SystemClock* clock = DefaultSystemClock::GetInstance());
|
||||
|
||||
DiskDataStore(const DiskDataStore& other) = delete;
|
||||
DiskDataStore& operator=(const DiskDataStore& other) = delete;
|
||||
|
||||
~DiskDataStore();
|
||||
|
||||
// DataStoreReader:
|
||||
absl::StatusOr<size_t> Get(const ContentIdProto& content_id, void* data,
|
||||
size_t offset, size_t size) override;
|
||||
absl::Status Get(const ContentIdProto& content_id, Buffer* data) override;
|
||||
|
||||
// DataStoreWriter:
|
||||
absl::Status Put(const ContentIdProto& content_id, const void* data,
|
||||
size_t size) override;
|
||||
absl::Status Remove(const ContentIdProto& content_id) override;
|
||||
absl::Status Wipe() override;
|
||||
absl::Status Prune(std::unordered_set<ContentIdProto> ids_to_keep) override;
|
||||
bool Contains(const ContentIdProto& content_id) override;
|
||||
// Removes chunks in the LRU order if the cache size exceeds its capacity.
|
||||
// Cleans the cache up until its size drops below the cache capacity
|
||||
// limited by the fill factor (capacity * fill factor).
|
||||
absl::Status Cleanup() override;
|
||||
|
||||
// Returns a list of all contained content ids independent of |interrupt_|.
|
||||
absl::StatusOr<std::vector<ContentIdProto>> List();
|
||||
|
||||
// Returns the defined cache capacity in bytes.
|
||||
// If 0, the cache is disabled.
|
||||
// If < 0, the disk space is not limited and the whole disk can be used for
|
||||
// storing data in the cache.
|
||||
int64_t Capacity() const;
|
||||
|
||||
// Returns the fill factor that defines the maximum portion of the capacity,
|
||||
// which can be occupied by the cache after cleanup.
|
||||
double FillFactor() const;
|
||||
|
||||
// Returns the depth of the hierarchy of the cache directories.
|
||||
unsigned int Depth() const;
|
||||
|
||||
// Returns the current total |size_| of the stored data.
|
||||
size_t Size() const;
|
||||
|
||||
// Returns the path to the root cache directory.
|
||||
const std::string& RootDir() const;
|
||||
|
||||
// Sets the cache capacity in bytes.
|
||||
// No cleanup is performed.
|
||||
void SetCapacity(int64_t capacity);
|
||||
|
||||
// Sets the cache fill factor.
|
||||
// |factor| should be a positive number (0,1].
|
||||
absl::Status SetFillFactor(double factor);
|
||||
|
||||
// Calculates cache statistics including the total amount of disk space used
|
||||
// for storing chunks measured in bytes and the number of chunks.
|
||||
// Returns an error, if the size could not be calculated.
|
||||
// This is an expensive operation.
|
||||
absl::StatusOr<Statistics> CalculateStatistics() const;
|
||||
|
||||
// The number of symbols in the cache's directory names.
|
||||
static constexpr int kDirNameLength = 2;
|
||||
|
||||
private:
|
||||
friend class DataProviderTest;
|
||||
|
||||
struct CacheFile {
|
||||
std::string path;
|
||||
int64_t mtime = 0;
|
||||
size_t size = 0;
|
||||
|
||||
void swap(CacheFile& other) {
|
||||
std::swap(path, other.path);
|
||||
std::swap(mtime, other.mtime);
|
||||
std::swap(size, other.size);
|
||||
}
|
||||
};
|
||||
|
||||
struct CacheFilesWithSize {
|
||||
size_t size = 0;
|
||||
std::vector<CacheFile> files;
|
||||
};
|
||||
|
||||
DiskDataStore(unsigned int depth, std::string cache_root_dir,
|
||||
bool create_dirs, SystemClock* clock);
|
||||
|
||||
// Returns a vector of CacheFile with their total size in bytes.
|
||||
// In addition, initializes the size if the method succeeds.
|
||||
// Returns an error status, if an error occured.
|
||||
// |continue_on_interrupt| shows whether the method should be cancelled on new
|
||||
// read/write requests.
|
||||
absl::StatusOr<CacheFilesWithSize> CollectCacheFiles(
|
||||
bool continue_on_interrupt = false);
|
||||
|
||||
// Returns the path to the file, which stores the data chunk for |content_id|.
|
||||
std::string GetCacheFilePath(const ContentIdProto& content_id) const;
|
||||
|
||||
// Parses the chunk file |path| into its content id if possible.
|
||||
// |path| is expected to look similar to "aa/bb/ccddeeff...".
|
||||
// Returns false if parsing fails.
|
||||
bool ParseCacheFilePath(std::string path, ContentIdProto* content_id) const;
|
||||
|
||||
// Updates modification time of |path|.
|
||||
void UpdateModificationTime(const std::string& path);
|
||||
|
||||
// Creates the cache directory hierarchy.
|
||||
absl::Status CreateDirHierarchy();
|
||||
|
||||
// Creates cache directories in the |parent| on the level |depth| recursively.
|
||||
absl::Status CreateDirLevelRec(const std::string& parent, unsigned int depth);
|
||||
|
||||
// When the cache is cleaned up, it is advantageous to make some more space
|
||||
// available for new chunks and not only to clean the redundant chunks up,
|
||||
// which make the cache exceed its capacity.
|
||||
static constexpr double kDefaultFillFactor = 0.8;
|
||||
|
||||
const unsigned int depth_;
|
||||
std::string root_dir_;
|
||||
const bool create_dirs_;
|
||||
const SystemClock* clock_;
|
||||
|
||||
std::atomic<int64_t> capacity_{kDefaultCapacity};
|
||||
std::atomic<double> fill_factor_{kDefaultFillFactor};
|
||||
|
||||
// The total data size is updated at Put(), Prune(), Wipe(), and Cleanup().
|
||||
// It is not guaranteed to be correct between cleanups:
|
||||
// - Put() does not consider the size of the file metadata.
|
||||
// - before the first Cleanup if the cache had already some data stored on the
|
||||
// disk from previous AS runs.
|
||||
std::atomic<size_t> size_{0};
|
||||
|
||||
// Shows if the |size_| was already initialized correctly in the Cleanup().
|
||||
// Without it, Cleanup() can be skipped if some new data has been written
|
||||
// before the first Cleanup() took place.
|
||||
std::atomic<bool> size_initialized_{false};
|
||||
|
||||
std::vector<std::string> dirs_;
|
||||
}; // class DiskDataStore
|
||||
|
||||
}; // namespace cdc_ft
|
||||
#endif // DATA_STORE_DISK_DATA_STORE_H_
|
||||
453
data_store/disk_data_store_test.cc
Normal file
453
data_store/disk_data_store_test.cc
Normal file
@@ -0,0 +1,453 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "data_store/disk_data_store.h"
|
||||
|
||||
#include "common/path.h"
|
||||
#include "common/status_test_macros.h"
|
||||
#include "common/testing_clock.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "manifest/content_id.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
namespace {
|
||||
constexpr uint8_t kFirstData[] = {10, 20, 30, 40, 50, 60, 70, 80, 90};
|
||||
constexpr uint8_t kSecondData[] = {100, 101, 102, 103, 104, 105, 106};
|
||||
constexpr size_t kFirstDataSize = sizeof(kFirstData);
|
||||
constexpr size_t kSecondDataSize = sizeof(kSecondData);
|
||||
constexpr char kTestCacheDirName[] = ".cdc_ft_cache";
|
||||
|
||||
class DiskDataStoreTest : public ::testing::Test {
|
||||
public:
|
||||
DiskDataStoreTest() {
|
||||
first_content_id_ = ContentId::FromArray(kFirstData, kFirstDataSize);
|
||||
second_content_id_ = ContentId::FromArray(kSecondData, kSecondDataSize);
|
||||
}
|
||||
void SetUp() override {
|
||||
cache_dir_path_ = path::Join(path::GetTempDir(), kTestCacheDirName);
|
||||
EXPECT_OK(path::RemoveDirRec(cache_dir_path_));
|
||||
}
|
||||
void TearDown() override { EXPECT_OK(path::RemoveDirRec(cache_dir_path_)); }
|
||||
|
||||
std::unique_ptr<DiskDataStore> CreateCache(unsigned int depth,
|
||||
bool create_dirs = false) {
|
||||
absl::StatusOr<std::unique_ptr<DiskDataStore>> cache =
|
||||
DiskDataStore::Create(depth, cache_dir_path_, create_dirs, &clock_);
|
||||
EXPECT_OK(cache);
|
||||
return std::move(*cache);
|
||||
}
|
||||
|
||||
protected:
|
||||
ContentIdProto first_content_id_;
|
||||
ContentIdProto second_content_id_;
|
||||
TestingSystemClock clock_;
|
||||
std::string cache_dir_path_;
|
||||
};
|
||||
|
||||
TEST_F(DiskDataStoreTest, DiskDataStore) {
|
||||
auto cache = CreateCache(0);
|
||||
EXPECT_EQ(0u, cache->Size());
|
||||
|
||||
absl::StatusOr<DiskDataStore::Statistics> statistics =
|
||||
cache->CalculateStatistics();
|
||||
ASSERT_OK(statistics);
|
||||
EXPECT_EQ(0u, statistics->size);
|
||||
EXPECT_EQ(0u, statistics->number_of_chunks);
|
||||
|
||||
EXPECT_GT(cache->Capacity(), 0);
|
||||
EXPECT_GT(cache->FillFactor(), 0);
|
||||
EXPECT_LT(cache->FillFactor(), 1);
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, PutGet) {
|
||||
auto cache = CreateCache(2);
|
||||
|
||||
EXPECT_OK(cache->Put(first_content_id_, kFirstData, kFirstDataSize));
|
||||
EXPECT_EQ(kFirstDataSize, cache->Size());
|
||||
absl::StatusOr<DiskDataStore::Statistics> statistics =
|
||||
cache->CalculateStatistics();
|
||||
ASSERT_OK(statistics);
|
||||
EXPECT_EQ(kFirstDataSize, statistics->size);
|
||||
EXPECT_EQ(1u, statistics->number_of_chunks);
|
||||
EXPECT_TRUE(cache->Contains(first_content_id_));
|
||||
|
||||
uint8_t ret_data[kFirstDataSize];
|
||||
absl::StatusOr<uint64_t> bytes_read =
|
||||
cache->Get(first_content_id_, &ret_data, 0, kFirstDataSize);
|
||||
EXPECT_EQ(kFirstDataSize, cache->Size());
|
||||
ASSERT_OK(bytes_read);
|
||||
ASSERT_EQ(kFirstDataSize, *bytes_read);
|
||||
EXPECT_TRUE(std::equal(std::begin(kFirstData), std::end(kFirstData),
|
||||
std::begin(ret_data)));
|
||||
statistics = cache->CalculateStatistics();
|
||||
ASSERT_OK(statistics);
|
||||
EXPECT_EQ(kFirstDataSize, statistics->size);
|
||||
EXPECT_EQ(1u, statistics->number_of_chunks);
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, GetBuffer) {
|
||||
auto cache = CreateCache(1);
|
||||
EXPECT_OK(cache->Put(first_content_id_, kFirstData, kFirstDataSize));
|
||||
|
||||
Buffer buffer;
|
||||
EXPECT_OK(cache->Get(first_content_id_, &buffer));
|
||||
Buffer exp_buffer({10, 20, 30, 40, 50, 60, 70, 80, 90});
|
||||
EXPECT_EQ(exp_buffer, buffer);
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, Wipe) {
|
||||
auto cache = CreateCache(0);
|
||||
|
||||
EXPECT_OK(cache->Put(first_content_id_, kFirstData, kFirstDataSize));
|
||||
EXPECT_EQ(kFirstDataSize, cache->Size());
|
||||
EXPECT_OK(cache->Wipe());
|
||||
EXPECT_EQ(0u, cache->Size());
|
||||
absl::StatusOr<DiskDataStore::Statistics> statistics =
|
||||
cache->CalculateStatistics();
|
||||
ASSERT_OK(statistics);
|
||||
EXPECT_EQ(0u, statistics->size);
|
||||
EXPECT_EQ(0u, statistics->number_of_chunks);
|
||||
EXPECT_FALSE(cache->Contains(first_content_id_));
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, PruneSucceeds) {
|
||||
auto cache = CreateCache(2);
|
||||
|
||||
ContentIdProto content_ids[4];
|
||||
for (size_t n = 0; n < std::size(content_ids); ++n) {
|
||||
content_ids[n] = ContentId::FromArray(&n, sizeof(n));
|
||||
EXPECT_OK(cache->Put(content_ids[n], &n, sizeof(n)));
|
||||
}
|
||||
|
||||
std::unordered_set<ContentIdProto> ids_to_keep = {content_ids[0],
|
||||
content_ids[2]};
|
||||
EXPECT_OK(cache->Prune(std::move(ids_to_keep)));
|
||||
EXPECT_TRUE(cache->Contains(content_ids[0]));
|
||||
EXPECT_TRUE(cache->Contains(content_ids[2]));
|
||||
EXPECT_EQ(2 * sizeof(size_t), cache->Size());
|
||||
|
||||
EXPECT_FALSE(cache->Contains(content_ids[1]));
|
||||
EXPECT_FALSE(cache->Contains(content_ids[3]));
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, PruneFailsNotFound) {
|
||||
auto cache = CreateCache(2);
|
||||
|
||||
ContentIdProto content_ids[2];
|
||||
for (size_t n = 0; n < std::size(content_ids); ++n)
|
||||
content_ids[n] = ContentId::FromArray(&n, sizeof(n));
|
||||
EXPECT_OK(cache->Put(content_ids[0], nullptr, 0));
|
||||
|
||||
std::unordered_set<ContentIdProto> ids_to_keep = {content_ids[1]};
|
||||
EXPECT_TRUE(absl::IsNotFound(cache->Prune(std::move(ids_to_keep))));
|
||||
|
||||
EXPECT_FALSE(cache->Contains(content_ids[0]));
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, SetCapacity) {
|
||||
auto cache = CreateCache(0);
|
||||
|
||||
EXPECT_OK(cache->Put(first_content_id_, kFirstData, kFirstDataSize));
|
||||
cache->SetCapacity(0);
|
||||
EXPECT_OK(cache->Cleanup());
|
||||
absl::StatusOr<DiskDataStore::Statistics> statistics =
|
||||
cache->CalculateStatistics();
|
||||
ASSERT_OK(statistics);
|
||||
EXPECT_EQ(0u, statistics->size);
|
||||
EXPECT_EQ(0u, statistics->number_of_chunks);
|
||||
EXPECT_FALSE(cache->Contains(first_content_id_));
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, SetFillFactor) {
|
||||
auto cache = CreateCache(2);
|
||||
|
||||
EXPECT_OK(cache->SetFillFactor(0.1));
|
||||
EXPECT_NOT_OK(cache->SetFillFactor(0));
|
||||
EXPECT_NOT_OK(cache->SetFillFactor(100));
|
||||
EXPECT_OK(cache->SetFillFactor(1));
|
||||
EXPECT_EQ(1, cache->FillFactor());
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, GetNonExisting) {
|
||||
auto cache = CreateCache(0);
|
||||
|
||||
EXPECT_FALSE(cache->Contains(first_content_id_));
|
||||
|
||||
uint8_t ret_data[kFirstDataSize];
|
||||
absl::StatusOr<size_t> read_bytes =
|
||||
cache->Get(first_content_id_, &ret_data, 0, kFirstDataSize);
|
||||
EXPECT_TRUE(absl::IsNotFound(read_bytes.status()));
|
||||
|
||||
Buffer buffer;
|
||||
EXPECT_TRUE(absl::IsNotFound(cache->Get(first_content_id_, &buffer)));
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, PutTwoRemoveOne) {
|
||||
auto cache = CreateCache(0);
|
||||
|
||||
EXPECT_OK(cache->Put(first_content_id_, kFirstData, kFirstDataSize));
|
||||
EXPECT_EQ(kFirstDataSize, cache->Size());
|
||||
clock_.Advance(1000);
|
||||
EXPECT_OK(cache->Put(second_content_id_, kSecondData, kSecondDataSize));
|
||||
|
||||
absl::StatusOr<DiskDataStore::Statistics> statistics =
|
||||
cache->CalculateStatistics();
|
||||
ASSERT_OK(statistics);
|
||||
EXPECT_EQ(kFirstDataSize + kSecondDataSize, statistics->size);
|
||||
EXPECT_EQ(statistics->size, cache->Size());
|
||||
EXPECT_EQ(2u, statistics->number_of_chunks);
|
||||
|
||||
cache->SetCapacity(kFirstDataSize + 4);
|
||||
EXPECT_OK(cache->Cleanup());
|
||||
|
||||
statistics = cache->CalculateStatistics();
|
||||
ASSERT_OK(statistics);
|
||||
EXPECT_EQ(kSecondDataSize, statistics->size);
|
||||
EXPECT_EQ(statistics->size, cache->Size());
|
||||
EXPECT_EQ(1u, statistics->number_of_chunks);
|
||||
|
||||
EXPECT_FALSE(cache->Contains(first_content_id_));
|
||||
EXPECT_TRUE(cache->Contains(second_content_id_));
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, PutTwoReadOldRemoveOne) {
|
||||
auto cache = CreateCache(0);
|
||||
|
||||
EXPECT_OK(cache->Put(first_content_id_, kFirstData, kFirstDataSize));
|
||||
clock_.Advance(1000);
|
||||
EXPECT_OK(cache->Put(second_content_id_, kSecondData, kSecondDataSize));
|
||||
clock_.Advance(1000);
|
||||
uint8_t ret_data[kFirstDataSize];
|
||||
EXPECT_OK(
|
||||
cache->Get(first_content_id_, &ret_data, 0, kFirstDataSize).status());
|
||||
|
||||
// second_key should be removed after the cleanup.
|
||||
cache->SetCapacity(kFirstDataSize + 4);
|
||||
EXPECT_OK(cache->Cleanup());
|
||||
|
||||
EXPECT_TRUE(cache->Contains(first_content_id_));
|
||||
EXPECT_FALSE(cache->Contains(second_content_id_));
|
||||
|
||||
uint8_t ret_data2[kFirstDataSize];
|
||||
absl::StatusOr<uint64_t> bytes_read =
|
||||
cache->Get(first_content_id_, &ret_data2, 0, kFirstDataSize);
|
||||
ASSERT_OK(bytes_read);
|
||||
ASSERT_EQ(kFirstDataSize, *bytes_read);
|
||||
EXPECT_TRUE(std::equal(std::begin(kFirstData), std::end(kFirstData),
|
||||
std::begin(ret_data)));
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, GetWithZeroLength) {
|
||||
auto cache = CreateCache(0);
|
||||
|
||||
EXPECT_OK(cache->Put(first_content_id_, kFirstData, kFirstDataSize));
|
||||
|
||||
uint8_t ret_data[1];
|
||||
absl::StatusOr<size_t> read_bytes =
|
||||
cache->Get(first_content_id_, &ret_data, 0, 0);
|
||||
ASSERT_OK(read_bytes);
|
||||
ASSERT_EQ(0u, *read_bytes);
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, GetWithOffset) {
|
||||
auto cache = CreateCache(0);
|
||||
|
||||
EXPECT_OK(cache->Put(first_content_id_, kFirstData, kFirstDataSize));
|
||||
|
||||
size_t const offset = 3;
|
||||
size_t const len = kFirstDataSize - offset;
|
||||
uint8_t ret_data[len];
|
||||
absl::StatusOr<size_t> read_bytes =
|
||||
cache->Get(first_content_id_, &ret_data, offset, len);
|
||||
ASSERT_OK(read_bytes);
|
||||
ASSERT_EQ(len, *read_bytes);
|
||||
EXPECT_TRUE(std::equal(std::begin(kFirstData) + offset, std::end(kFirstData),
|
||||
std::begin(ret_data)));
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, GetWithWrongOffset) {
|
||||
auto cache = CreateCache(0);
|
||||
|
||||
EXPECT_OK(cache->Put(first_content_id_, kFirstData, kFirstDataSize));
|
||||
|
||||
uint8_t ret_data[kFirstDataSize];
|
||||
absl::StatusOr<size_t> read_bytes =
|
||||
cache->Get(first_content_id_, &ret_data, 1000, kFirstDataSize);
|
||||
ASSERT_OK(read_bytes);
|
||||
ASSERT_EQ(0, *read_bytes);
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, GetWithTooBigLength) {
|
||||
auto cache = CreateCache(0);
|
||||
|
||||
EXPECT_OK(cache->Put(first_content_id_, kFirstData, kFirstDataSize));
|
||||
|
||||
uint8_t ret_data[kFirstDataSize + 10];
|
||||
absl::StatusOr<size_t> read_bytes =
|
||||
cache->Get(first_content_id_, &ret_data, 0, kFirstDataSize + 10);
|
||||
ASSERT_OK(read_bytes);
|
||||
ASSERT_EQ(kFirstDataSize, *read_bytes);
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, Remove) {
|
||||
auto cache = CreateCache(0);
|
||||
|
||||
EXPECT_OK(cache->Put(first_content_id_, kFirstData, kFirstDataSize));
|
||||
EXPECT_OK(cache->Remove(first_content_id_));
|
||||
EXPECT_FALSE(cache->Contains(first_content_id_));
|
||||
EXPECT_OK(cache->Remove(first_content_id_));
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, CreateCacheIfRootDirExists) {
|
||||
auto cache1 = CreateCache(1);
|
||||
EXPECT_OK(cache1->Put(first_content_id_, kFirstData, kFirstDataSize));
|
||||
EXPECT_OK(cache1->Put(second_content_id_, kSecondData, kSecondDataSize));
|
||||
|
||||
auto cache2 = CreateCache(1);
|
||||
absl::StatusOr<DiskDataStore::Statistics> statistics1 =
|
||||
cache1->CalculateStatistics();
|
||||
ASSERT_OK(statistics1);
|
||||
absl::StatusOr<DiskDataStore::Statistics> statistics2 =
|
||||
cache2->CalculateStatistics();
|
||||
ASSERT_OK(statistics2);
|
||||
|
||||
EXPECT_EQ(statistics1->size, statistics2->size);
|
||||
EXPECT_EQ(statistics1->number_of_chunks, statistics2->number_of_chunks);
|
||||
EXPECT_EQ(cache2->Capacity(), cache1->Capacity());
|
||||
EXPECT_EQ(cache2->FillFactor(), cache1->FillFactor());
|
||||
EXPECT_EQ(cache2->Depth(), cache1->Depth());
|
||||
EXPECT_TRUE(cache2->Contains(first_content_id_));
|
||||
EXPECT_TRUE(cache2->Contains(second_content_id_));
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, CacheWithDirectories) {
|
||||
unsigned int depth = 1;
|
||||
unsigned int dir_count = 0;
|
||||
auto cache = CreateCache(depth, true);
|
||||
|
||||
EXPECT_EQ(depth, cache->Depth());
|
||||
|
||||
auto handler = [&dir_count](const std::string& /*dir*/,
|
||||
const std::string& /*filename*/,
|
||||
int64_t /*modified_time*/, uint64_t /*size*/,
|
||||
bool is_directory) -> absl::Status {
|
||||
if (is_directory) {
|
||||
++dir_count;
|
||||
}
|
||||
return absl::OkStatus();
|
||||
};
|
||||
EXPECT_OK(path::SearchFiles(cache->RootDir(), true, handler));
|
||||
EXPECT_EQ(dir_count,
|
||||
std::pow(std::pow(16, DiskDataStore::kDirNameLength), depth));
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, CacheWithDirectoriesOnDemand) {
|
||||
unsigned int depth = 4;
|
||||
unsigned int dir_count = 0;
|
||||
auto cache = CreateCache(depth, false);
|
||||
|
||||
EXPECT_EQ(depth, cache->Depth());
|
||||
EXPECT_OK(cache->Put(first_content_id_, kFirstData, kFirstDataSize));
|
||||
auto handler = [&dir_count](const std::string& /*dir*/,
|
||||
const std::string& /*filename*/,
|
||||
int64_t /*modified_time*/, uint64_t /*size*/,
|
||||
bool is_directory) -> absl::Status {
|
||||
if (is_directory) {
|
||||
++dir_count;
|
||||
}
|
||||
return absl::OkStatus();
|
||||
};
|
||||
EXPECT_OK(path::SearchFiles(cache->RootDir(), true, handler));
|
||||
EXPECT_EQ(dir_count, 4u);
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, OverwriteExistingEntry) {
|
||||
auto cache = CreateCache(0, true);
|
||||
|
||||
EXPECT_OK(cache->Put(first_content_id_, kFirstData, kFirstDataSize));
|
||||
EXPECT_OK(cache->Put(first_content_id_, kSecondData, kSecondDataSize));
|
||||
uint8_t ret_data[kSecondDataSize];
|
||||
absl::StatusOr<uint64_t> bytes_read =
|
||||
cache->Get(first_content_id_, &ret_data, 0, kSecondDataSize);
|
||||
ASSERT_OK(bytes_read);
|
||||
ASSERT_EQ(kSecondDataSize, *bytes_read);
|
||||
EXPECT_TRUE(std::equal(std::begin(kSecondData), std::end(kSecondData),
|
||||
std::begin(ret_data)));
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, List) {
|
||||
auto cache = CreateCache(0, true);
|
||||
|
||||
EXPECT_OK(cache->Put(first_content_id_, kFirstData, 1));
|
||||
EXPECT_OK(cache->Put(second_content_id_, kSecondData, 1));
|
||||
|
||||
absl::StatusOr<std::vector<ContentIdProto>> ids = cache->List();
|
||||
ASSERT_OK(ids);
|
||||
ASSERT_EQ(ids->size(), 2);
|
||||
|
||||
if (ids->at(0) == second_content_id_) std::swap(ids->at(0), ids->at(1));
|
||||
EXPECT_TRUE(ids->at(0) == first_content_id_);
|
||||
EXPECT_TRUE(ids->at(1) == second_content_id_);
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, InterruptCleanup) {
|
||||
auto cache = CreateCache(0);
|
||||
|
||||
EXPECT_OK(cache->Put(first_content_id_, kFirstData, kFirstDataSize));
|
||||
cache->SetCapacity(0);
|
||||
std::atomic<bool> interrupt{true};
|
||||
cache->RegisterInterrupt(&interrupt);
|
||||
EXPECT_TRUE(absl::IsCancelled(cache->Cleanup()));
|
||||
|
||||
absl::StatusOr<DiskDataStore::Statistics> statistics =
|
||||
cache->CalculateStatistics();
|
||||
ASSERT_OK(statistics);
|
||||
EXPECT_EQ(kFirstDataSize, statistics->size);
|
||||
EXPECT_EQ(1u, statistics->number_of_chunks);
|
||||
EXPECT_TRUE(cache->Contains(first_content_id_));
|
||||
|
||||
// Resetting interrupt should enable Cleanup().
|
||||
interrupt = false;
|
||||
EXPECT_OK(cache->Cleanup());
|
||||
|
||||
statistics = cache->CalculateStatistics();
|
||||
ASSERT_OK(statistics);
|
||||
EXPECT_EQ(0u, statistics->size);
|
||||
EXPECT_EQ(0u, statistics->number_of_chunks);
|
||||
EXPECT_FALSE(cache->Contains(first_content_id_));
|
||||
}
|
||||
|
||||
TEST_F(DiskDataStoreTest, CleanupForPrefilledCacheSuccess) {
|
||||
auto cache = CreateCache(0);
|
||||
EXPECT_OK(cache->Put(first_content_id_, kFirstData, kFirstDataSize));
|
||||
clock_.Advance(1000);
|
||||
|
||||
absl::StatusOr<std::unique_ptr<DiskDataStore>> filled_cache =
|
||||
DiskDataStore::Create(0, cache_dir_path_, false, &clock_);
|
||||
EXPECT_OK(filled_cache);
|
||||
EXPECT_OK(
|
||||
(*filled_cache)->Put(second_content_id_, kSecondData, kSecondDataSize));
|
||||
(*filled_cache)->SetCapacity(kFirstDataSize + 4);
|
||||
EXPECT_OK((*filled_cache)->Cleanup());
|
||||
|
||||
absl::StatusOr<DiskDataStore::Statistics> statistics =
|
||||
(*filled_cache)->CalculateStatistics();
|
||||
ASSERT_OK(statistics);
|
||||
EXPECT_EQ(kSecondDataSize, statistics->size);
|
||||
EXPECT_EQ(1u, statistics->number_of_chunks);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace cdc_ft
|
||||
103
data_store/grpc_reader.cc
Normal file
103
data_store/grpc_reader.cc
Normal file
@@ -0,0 +1,103 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "data_store/grpc_reader.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "cdc_fuse_fs/asset_stream_client.h"
|
||||
#include "common/status.h"
|
||||
#include "common/status_macros.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
GrpcReader::GrpcReader(std::shared_ptr<grpc::Channel> channel,
|
||||
bool enable_stats)
|
||||
: client_(std::make_unique<AssetStreamClient>(std::move(channel),
|
||||
enable_stats)) {}
|
||||
|
||||
GrpcReader::~GrpcReader() = default;
|
||||
|
||||
absl::Status GrpcReader::SendCachedContentIds(
|
||||
std::vector<ContentIdProto> content_ids) {
|
||||
return client_->SendCachedContentIds(std::move(content_ids));
|
||||
}
|
||||
|
||||
absl::StatusOr<size_t> GrpcReader::Get(const ContentIdProto& id, void* data,
|
||||
uint64_t size, uint64_t offset) {
|
||||
absl::StatusOr<std::string> result = client_->GetContent(id);
|
||||
if (!result.ok()) {
|
||||
return WrapStatus(result.status(), "Failed to stream data for id %s",
|
||||
ContentId::ToHexString(id));
|
||||
}
|
||||
if (offset >= result->size()) {
|
||||
return 0;
|
||||
}
|
||||
uint64_t bytes_to_copy = std::min<uint64_t>(result->size() - offset, size);
|
||||
memcpy(data, result->data() + offset, bytes_to_copy);
|
||||
return bytes_to_copy;
|
||||
}
|
||||
|
||||
absl::Status GrpcReader::Get(ChunkTransferList* chunks) {
|
||||
RepeatedContentIdProto chunk_ids;
|
||||
for (const ChunkTransferTask& chunk : *chunks) {
|
||||
if (!chunk.done) *chunk_ids.Add() = chunk.id;
|
||||
};
|
||||
|
||||
const int chunk_id_count = chunk_ids.size();
|
||||
RepeatedStringProto chunk_data;
|
||||
ASSIGN_OR_RETURN(chunk_data, client_->GetContent(std::move(chunk_ids)),
|
||||
"Failed to stream data chunks [%s]",
|
||||
chunks->UndoneToHexString());
|
||||
|
||||
if (chunk_data.size() != chunk_id_count) {
|
||||
return MakeStatus(
|
||||
"Incomplete response received for chunks [%s], expected %u, got %u",
|
||||
chunks->UndoneToHexString(), chunk_id_count, chunk_data.size());
|
||||
}
|
||||
|
||||
int i = 0;
|
||||
for (ChunkTransferTask& chunk : *chunks) {
|
||||
if (chunk.done) continue;
|
||||
// Move the complete chunk data over to the chunks list.
|
||||
chunk.chunk_data = std::move(chunk_data[i++]);
|
||||
// Verify the chunk size.
|
||||
if (chunk.chunk_data.size() < chunk.offset + chunk.size) {
|
||||
return MakeStatus(
|
||||
"Truncated chunk '%s' received, expected %u + %u = %u bytes, got %u",
|
||||
ContentId::ToHexString(chunk.id), chunk.offset, chunk.size,
|
||||
chunk.offset + chunk.size, chunk.chunk_data.size());
|
||||
}
|
||||
// Copy the part of the chunk data to the target buffer.
|
||||
if (chunk.data) {
|
||||
memcpy(chunk.data, chunk.chunk_data.data() + chunk.offset, chunk.size);
|
||||
}
|
||||
chunk.done = true;
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status GrpcReader::Get(const ContentIdProto& id, Buffer* data) {
|
||||
absl::StatusOr<std::string> result = client_->GetContent(id);
|
||||
if (!result.ok()) {
|
||||
return WrapStatus(result.status(), "Failed to stream data for id %s",
|
||||
ContentId::ToHexString(id));
|
||||
}
|
||||
data->clear();
|
||||
data->append((*result).data(), (*result).size());
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace cdc_ft
|
||||
57
data_store/grpc_reader.h
Normal file
57
data_store/grpc_reader.h
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright 2022 Google LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef DATA_STORE_GRPC_READER_H_
|
||||
#define DATA_STORE_GRPC_READER_H_
|
||||
|
||||
#include "absl/status/statusor.h"
|
||||
#include "data_store/data_store_reader.h"
|
||||
#include "grpcpp/channel.h"
|
||||
#include "manifest/content_id.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
class AssetStreamClient;
|
||||
|
||||
// Implementation of a DataStoreReader that loads chunks through gRpc
|
||||
// exclusively. Does not have any local caching.
|
||||
class GrpcReader : public DataStoreReader {
|
||||
public:
|
||||
// |channel| is a grpc channel to connect to.
|
||||
// |enable_stats| determines whether additional statistics are sent.
|
||||
GrpcReader(std::shared_ptr<grpc::Channel> channel, bool enable_stats);
|
||||
virtual ~GrpcReader();
|
||||
|
||||
GrpcReader(const GrpcReader&) = delete;
|
||||
GrpcReader& operator=(const GrpcReader&) = delete;
|
||||
|
||||
// Sends the IDs of all cached chunks to the workstation for statistical
|
||||
// purposes.
|
||||
absl::Status SendCachedContentIds(std::vector<ContentIdProto> content_ids);
|
||||
|
||||
// DataStoreReader:
|
||||
absl::StatusOr<size_t> Get(const ContentIdProto& key, void* data,
|
||||
uint64_t size, uint64_t offset) override;
|
||||
absl::Status Get(ChunkTransferList* chunks) override;
|
||||
absl::Status Get(const ContentIdProto& key, Buffer* data) override;
|
||||
|
||||
private:
|
||||
std::unique_ptr<AssetStreamClient> client_;
|
||||
};
|
||||
|
||||
} // namespace cdc_ft
|
||||
|
||||
#endif // DATA_STORE_GRPC_READER_H_
|
||||
151
data_store/mem_data_store.cc
Normal file
151
data_store/mem_data_store.cc
Normal file
@@ -0,0 +1,151 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "data_store/mem_data_store.h"
|
||||
|
||||
#include "common/status.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
MemDataStore::MemDataStore() = default;
|
||||
|
||||
MemDataStore::~MemDataStore() = default;
|
||||
|
||||
ContentIdProto MemDataStore::AddData(std::vector<char> data) {
|
||||
ContentIdProto id = ContentId::FromArray(data.data(), data.size());
|
||||
data_lookup_[id] = std::move(data);
|
||||
return id;
|
||||
}
|
||||
|
||||
ContentIdProto MemDataStore::AddProto(
|
||||
const google::protobuf::MessageLite& message) {
|
||||
std::vector<char> data;
|
||||
data.resize(message.ByteSizeLong());
|
||||
message.SerializeToArray(data.data(), static_cast<int>(data.size()));
|
||||
return AddData(std::move(data));
|
||||
}
|
||||
|
||||
absl::StatusOr<size_t> MemDataStore::Get(const ContentIdProto& id, void* data,
|
||||
size_t offset, size_t size) {
|
||||
auto it = data_lookup_.find(id);
|
||||
if (it == data_lookup_.end()) {
|
||||
return absl::NotFoundError(absl::StrFormat("Failed to find data id '%s'",
|
||||
ContentId::ToHexString(id)));
|
||||
}
|
||||
|
||||
const std::vector<char>& data_vec = it->second;
|
||||
if (offset >= data_vec.size()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t bytes_to_copy = std::min<uint64_t>(data_vec.size() - offset, size);
|
||||
memcpy(data, data_vec.data() + offset, bytes_to_copy);
|
||||
return bytes_to_copy;
|
||||
}
|
||||
|
||||
absl::Status MemDataStore::Get(const ContentIdProto& id, Buffer* data) {
|
||||
auto it = data_lookup_.find(id);
|
||||
if (it == data_lookup_.end()) {
|
||||
return absl::NotFoundError(absl::StrFormat("Failed to find data id '%s'",
|
||||
ContentId::ToHexString(id)));
|
||||
}
|
||||
|
||||
const std::vector<char>& data_vec = it->second;
|
||||
data->clear();
|
||||
data->append(data_vec.data(), data_vec.size());
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status MemDataStore::Get(ChunkTransferList* chunks) {
|
||||
for (ChunkTransferTask& chunk : *chunks) {
|
||||
if (chunk.done) continue;
|
||||
auto it = data_lookup_.find(chunk.id);
|
||||
if (it == data_lookup_.end()) continue;
|
||||
// Copy the potentially prefetched string for caching.
|
||||
chunk.chunk_data = std::string(it->second.data(), it->second.size());
|
||||
if (!chunk.size) {
|
||||
chunk.done = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (chunk.offset >= chunk.chunk_data.size()) {
|
||||
return absl::OutOfRangeError(absl::StrFormat(
|
||||
"Chunk '%s': requested offset %u is larger or equal than size %u",
|
||||
ContentId::ToHexString(chunk.id), chunk.offset,
|
||||
chunk.chunk_data.size()));
|
||||
}
|
||||
uint64_t bytes_to_copy =
|
||||
std::min<uint64_t>(chunk.chunk_data.size() - chunk.offset, chunk.size);
|
||||
if (bytes_to_copy < chunk.size) {
|
||||
return absl::DataLossError(
|
||||
absl::StrFormat("Chunk '%s': requested size %u at offset %u is "
|
||||
"larger than chunk size %u",
|
||||
ContentId::ToHexString(chunk.id), chunk.size,
|
||||
chunk.offset, chunk.chunk_data.size()));
|
||||
}
|
||||
memcpy(chunk.data, chunk.chunk_data.data() + chunk.offset, bytes_to_copy);
|
||||
chunk.done = true;
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
bool MemDataStore::Contains(const ContentIdProto& content_id) {
|
||||
return data_lookup_.find(content_id) != data_lookup_.end();
|
||||
}
|
||||
|
||||
absl::Status MemDataStore::Put(const ContentIdProto& content_id,
|
||||
const void* data, size_t size) {
|
||||
data_lookup_[content_id] =
|
||||
std::vector<char>(reinterpret_cast<const char*>(data),
|
||||
reinterpret_cast<const char*>(data) + size);
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status MemDataStore::Remove(const ContentIdProto& content_id) {
|
||||
data_lookup_.erase(content_id);
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status MemDataStore::Wipe() {
|
||||
data_lookup_.clear();
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status MemDataStore::Prune(
|
||||
std::unordered_set<ContentIdProto> ids_to_keep) {
|
||||
// Find the set of chunks not in |ids_to_keep|.
|
||||
std::vector<ContentIdProto> to_delete;
|
||||
for (const auto& [id, _] : data_lookup_) {
|
||||
if (ids_to_keep.find(id) == ids_to_keep.end())
|
||||
to_delete.push_back(id);
|
||||
else
|
||||
ids_to_keep.erase(id);
|
||||
}
|
||||
|
||||
// Delete chunks not in |ids_to_keep|.
|
||||
for (const ContentIdProto& id : to_delete) {
|
||||
data_lookup_.erase(id);
|
||||
}
|
||||
|
||||
// Verify that all chunks in |ids_to_keep| are present in the store.
|
||||
if (!ids_to_keep.empty()) {
|
||||
return absl::NotFoundError(absl::StrFormat(
|
||||
"%u chunks, e.g. '%s', not found in the store", ids_to_keep.size(),
|
||||
ContentId::ToHexString(*ids_to_keep.begin())));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace cdc_ft
|
||||
82
data_store/mem_data_store.h
Normal file
82
data_store/mem_data_store.h
Normal file
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright 2022 Google LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef DATA_STORE_MEM_DATA_STORE_H_
|
||||
#define DATA_STORE_MEM_DATA_STORE_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "data_store/data_store_writer.h"
|
||||
#include "manifest/content_id.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
// In-memory implementation of a DataStoreWriter. Data needs to be pre-
|
||||
// populated manually using AddData() and AddProto(). Useful for testing.
|
||||
class MemDataStore : public DataStoreWriter {
|
||||
public:
|
||||
using ChunkMap = std::unordered_map<ContentIdProto, std::vector<char>>;
|
||||
|
||||
MemDataStore();
|
||||
MemDataStore(const MemDataStore&) = delete;
|
||||
MemDataStore& operator=(const MemDataStore&) = delete;
|
||||
|
||||
virtual ~MemDataStore();
|
||||
|
||||
// TODO: Extract AddData in a helper function.
|
||||
// Adds |data| to the memory-backed storage and returns the id to it.
|
||||
ContentIdProto AddData(std::vector<char> data);
|
||||
|
||||
// TODO: Extract AddProto in a helper function.
|
||||
// Serializes |message|, adds it to the memory-backed storage, and returns the
|
||||
// id to it.
|
||||
ContentIdProto AddProto(const google::protobuf::MessageLite& message);
|
||||
|
||||
// Note: DO NOT MIX Add* get Get* methods in a multi-threaded environment!
|
||||
// Get* methods are thread-safe as they are read-only, but Add* methods write
|
||||
// to the data. They are not thread-safe.
|
||||
|
||||
// DataStoreReader:
|
||||
absl::StatusOr<size_t> Get(const ContentIdProto& id, void* data,
|
||||
size_t offset, size_t size) override;
|
||||
absl::Status Get(const ContentIdProto& content_id, Buffer* data) override;
|
||||
absl::Status Get(ChunkTransferList* chunks) override;
|
||||
|
||||
// DataStoreWriter:
|
||||
bool Contains(const ContentIdProto& content_id) override;
|
||||
|
||||
absl::Status Put(const ContentIdProto& content_id, const void* data,
|
||||
size_t size) override;
|
||||
|
||||
absl::Status Remove(const ContentIdProto& content_id) override;
|
||||
|
||||
absl::Status Wipe() override;
|
||||
|
||||
absl::Status Prune(std::unordered_set<ContentIdProto> ids_to_keep) override;
|
||||
|
||||
// Direct access to the chunks for testing.
|
||||
const ChunkMap& Chunks() const { return data_lookup_; }
|
||||
ChunkMap& Chunks() { return data_lookup_; }
|
||||
|
||||
private:
|
||||
// Maps content IDs to chunks.
|
||||
ChunkMap data_lookup_;
|
||||
};
|
||||
|
||||
} // namespace cdc_ft
|
||||
|
||||
#endif // DATA_STORE_MEM_DATA_STORE_H_
|
||||
188
data_store/mem_data_store_test.cc
Normal file
188
data_store/mem_data_store_test.cc
Normal file
@@ -0,0 +1,188 @@
|
||||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "data_store/mem_data_store.h"
|
||||
|
||||
#include "common/status_test_macros.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "manifest/content_id.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
namespace {
|
||||
|
||||
TEST(MemDataStoreTest, GetWithMultipleIds) {
|
||||
std::vector<char> expected_data1 = {1, 3, 3, 7};
|
||||
std::vector<char> expected_data2 = {15, 0, 0, 13, 0};
|
||||
|
||||
MemDataStore p;
|
||||
ContentIdProto id1 = p.AddData(expected_data1);
|
||||
ContentIdProto id2 = p.AddData(expected_data2);
|
||||
|
||||
std::vector<char> data1;
|
||||
std::vector<char> data2;
|
||||
|
||||
data1.resize(expected_data1.size());
|
||||
data2.resize(expected_data2.size());
|
||||
|
||||
absl::StatusOr<uint64_t> bytes_read1 =
|
||||
p.Get(id1, data1.data(), 0, data1.size());
|
||||
absl::StatusOr<uint64_t> bytes_read2 =
|
||||
p.Get(id2, data2.data(), 0, data2.size());
|
||||
|
||||
ASSERT_OK(bytes_read1);
|
||||
ASSERT_OK(bytes_read2);
|
||||
|
||||
EXPECT_EQ(*bytes_read1, data1.size());
|
||||
EXPECT_EQ(*bytes_read2, data2.size());
|
||||
|
||||
EXPECT_EQ(expected_data1, data1);
|
||||
EXPECT_EQ(expected_data2, data2);
|
||||
}
|
||||
|
||||
TEST(MemDataStoreTest, GetWithRangeInsideOfData) {
|
||||
MemDataStore p;
|
||||
ContentIdProto id = p.AddData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9});
|
||||
|
||||
std::vector<char> data;
|
||||
data.resize(5);
|
||||
absl::StatusOr<uint64_t> bytes_read =
|
||||
p.Get(id, data.data(), /*offset=*/2, data.size());
|
||||
|
||||
ASSERT_OK(bytes_read);
|
||||
EXPECT_EQ(*bytes_read, data.size());
|
||||
EXPECT_EQ(data, std::vector<char>({2, 3, 4, 5, 6}));
|
||||
}
|
||||
|
||||
TEST(MemDataStoreTest, GetWithRangePartlyOutsideOfData) {
|
||||
MemDataStore p;
|
||||
ContentIdProto id = p.AddData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9});
|
||||
|
||||
std::vector<char> data;
|
||||
data.resize(5);
|
||||
absl::StatusOr<uint64_t> bytes_read =
|
||||
p.Get(id, data.data(), /*offset=*/7, data.size());
|
||||
|
||||
ASSERT_OK(bytes_read);
|
||||
ASSERT_EQ(*bytes_read, 3);
|
||||
data.resize(3);
|
||||
EXPECT_EQ(data, std::vector<char>({7, 8, 9}));
|
||||
}
|
||||
|
||||
TEST(MemDataStoreTest, GetWithRangeOutsideOfData) {
|
||||
MemDataStore p;
|
||||
ContentIdProto id = p.AddData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9});
|
||||
|
||||
std::vector<char> data;
|
||||
data.resize(5);
|
||||
absl::StatusOr<uint64_t> bytes_read =
|
||||
p.Get(id, data.data(), /*offset=*/12, data.size());
|
||||
|
||||
ASSERT_OK(bytes_read);
|
||||
EXPECT_EQ(*bytes_read, 0);
|
||||
}
|
||||
|
||||
TEST(MemDataStoreTest, GetWholeChunk) {
|
||||
std::vector<char> expected_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||
Buffer expected_buffer;
|
||||
expected_buffer.append(expected_data.data(), expected_data.size());
|
||||
MemDataStore p;
|
||||
ContentIdProto id = p.AddData(std::move(expected_data));
|
||||
|
||||
Buffer data;
|
||||
EXPECT_OK(p.Get(id, &data));
|
||||
EXPECT_EQ(data, expected_buffer);
|
||||
}
|
||||
|
||||
TEST(MemDataStoreTest, GetProtoWithMultipleKeys) {
|
||||
AssetProto expected_proto1;
|
||||
AssetProto expected_proto2;
|
||||
|
||||
expected_proto1.set_type(AssetProto::DIRECTORY);
|
||||
expected_proto2.set_type(AssetProto::FILE);
|
||||
|
||||
expected_proto1.set_name("dir");
|
||||
expected_proto2.set_name("file");
|
||||
|
||||
// Use a MemDataStore to get test data in.
|
||||
// Note that GetProto is implemented by DataStoreReader.
|
||||
MemDataStore p;
|
||||
ContentIdProto key1 = p.AddProto(expected_proto1);
|
||||
ContentIdProto key2 = p.AddProto(expected_proto2);
|
||||
|
||||
AssetProto proto1;
|
||||
AssetProto proto2;
|
||||
|
||||
EXPECT_OK(p.GetProto(key1, &proto1));
|
||||
EXPECT_OK(p.GetProto(key2, &proto2));
|
||||
|
||||
EXPECT_EQ(expected_proto1.type(), proto1.type());
|
||||
EXPECT_EQ(expected_proto1.type(), proto1.type());
|
||||
|
||||
EXPECT_EQ(expected_proto1.name(), proto1.name());
|
||||
EXPECT_EQ(expected_proto2.name(), proto2.name());
|
||||
}
|
||||
|
||||
TEST(MemDataStoreTest, PutGet) {
|
||||
std::vector<char> expected_data = {1, 3, 3, 7};
|
||||
ContentIdProto content_id =
|
||||
ContentId::FromArray(expected_data.data(), expected_data.size());
|
||||
|
||||
MemDataStore p;
|
||||
ASSERT_OK(p.Put(content_id, expected_data.data(), expected_data.size()));
|
||||
ASSERT_TRUE(p.Contains(content_id));
|
||||
|
||||
std::vector<char> data;
|
||||
data.resize(expected_data.size());
|
||||
absl::StatusOr<uint64_t> bytes_read =
|
||||
p.Get(content_id, data.data(), 0, data.size());
|
||||
|
||||
ASSERT_OK(bytes_read);
|
||||
EXPECT_EQ(*bytes_read, data.size());
|
||||
EXPECT_EQ(expected_data, data);
|
||||
}
|
||||
|
||||
TEST(MemDataStoreTest, PruneSucceeds) {
|
||||
MemDataStore p;
|
||||
ContentIdProto content_ids[4];
|
||||
for (size_t n = 0; n < std::size(content_ids); ++n) {
|
||||
content_ids[n] = ContentId::FromArray(&n, sizeof(n));
|
||||
EXPECT_OK(p.Put(content_ids[n], &n, sizeof(n)));
|
||||
}
|
||||
|
||||
std::unordered_set<ContentIdProto> ids_to_keep = {content_ids[0],
|
||||
content_ids[2]};
|
||||
EXPECT_OK(p.Prune(std::move(ids_to_keep)));
|
||||
|
||||
EXPECT_TRUE(p.Contains(content_ids[0]));
|
||||
EXPECT_TRUE(p.Contains(content_ids[2]));
|
||||
|
||||
EXPECT_FALSE(p.Contains(content_ids[1]));
|
||||
EXPECT_FALSE(p.Contains(content_ids[3]));
|
||||
}
|
||||
|
||||
TEST(MemDataStoreTest, PruneFailsNotFound) {
|
||||
MemDataStore p;
|
||||
ContentIdProto content_ids[2];
|
||||
for (size_t n = 0; n < std::size(content_ids); ++n)
|
||||
content_ids[n] = ContentId::FromArray(&n, sizeof(n));
|
||||
EXPECT_OK(p.Put(content_ids[0], nullptr, 0));
|
||||
|
||||
std::unordered_set<ContentIdProto> ids_to_keep = {content_ids[1]};
|
||||
EXPECT_TRUE(absl::IsNotFound(p.Prune(std::move(ids_to_keep))));
|
||||
|
||||
EXPECT_FALSE(p.Contains(content_ids[0]));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace cdc_ft
|
||||
Reference in New Issue
Block a user