Files
netris-cdc-file-transfer/data_store/data_store_reader.h
Christian Schneider 4326e972ac Releasing the former Stadia file transfer tools
The tools allow efficient and fast synchronization of large directory
trees from a Windows workstation to a Linux target machine.

cdc_rsync* support efficient copy of files by using content-defined
chunking (CDC) to identify chunks within files that can be reused.

asset_stream_manager + cdc_fuse_fs support efficient streaming of a
local directory to a remote virtual file system based on FUSE. It also
employs CDC to identify and reuse unchanged data chunks.
2022-11-03 10:39:10 +01:00

130 lines
5.2 KiB
C++

/*
* Copyright 2022 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef DATA_STORE_DATA_STORE_READER_H_
#define DATA_STORE_DATA_STORE_READER_H_
#include <vector>
#include "absl/status/statusor.h"
#include "common/buffer.h"
#include "common/status.h"
#include "common/status_macros.h"
#include "manifest/content_id.h"
#include "manifest/manifest_proto_defs.h"
namespace cdc_ft {
// Describes which part of a chunk needs to copied into a given buffer.
struct ChunkTransferTask {
ChunkTransferTask() {}
ChunkTransferTask(ContentIdProto id, uint64_t offset, void* data,
uint64_t size)
: id(std::move(id)), offset(offset), data(data), size(size) {}
// Identifies the chunk.
ContentIdProto id;
// Relative offset into the chunk from where data should be copied.
uint64_t offset = 0;
// Data buffer into which the chunk is written. May be null for prefetching.
void* data = nullptr;
// Size of the |data| buffer. May be zero for prefetching.
uint64_t size = 0;
// If the storage layer fetches the complete chunk data, it can be moved into
// this string so that the data provider layer can cache the chunk.
std::string chunk_data;
// Indicates if the chunk was successfully copied into |data| or prefetched.
bool done = false;
};
// A std::vector of ChunkTransferTask elements.
class ChunkTransferList : public std::vector<ChunkTransferTask> {
public:
// Returns true if all tasks with a non-zero size have |done| set to true.
bool ReadDone() const;
// Returns true if all tasks have |done| set to true, including those only
// meant for prefetching (|size| == 0).
bool PrefetchDone() const;
// Returns a comma separated string of hex IDs of all chunks in this list. If
// the optional function |filter| is given, only those chunks are included for
// which |filter| returns true.
std::string ToHexString(
std::function<bool(const ChunkTransferTask&)> filter = nullptr) const;
// Same as ToHexString, but only includes tasks having |done| set to false.
std::string UndoneToHexString() const;
};
// DataStoreReader is an abstract interface to read from all data stores used
// for the file transfer, for example: a local cache, a data store, which
// receives data via a gRPC channel, etc.
class DataStoreReader {
public:
DataStoreReader() = default;
virtual ~DataStoreReader() = default;
DataStoreReader(const DataStoreReader&) = delete;
DataStoreReader& operator=(const DataStoreReader&) = delete;
// Suggests a data prefetch size based on the given |read_size|. The default
// implementation just returns |read_size|. Override this function to
// implement a prefetching strategy.
virtual size_t PrefetchSize(size_t read_size) const;
// Reads |size| bytes from the chunk specified by |content_id|, starting
// at the given |offset|, and writes the result into |data|.
// The return value is the number of read bytes.
// If the chunk is not found in the data store, returns NotFoundError.
virtual absl::StatusOr<size_t> Get(const ContentIdProto& content_id,
void* data, size_t offset,
size_t size) = 0;
// Reads all chunks from the given task list |chunks| that are not done yet,
// copies the data into the associated buffer, and marks the chunk as done. If
// the reader fetches the full chunk, the raw data may be moved to the task as
// well for caching.
//
// Returns success even if no chunk was found. Check |chunks->ReadDone()| or
// |chunks->PrefetchDone()| to verify all chunks were fetched. Returns any
// error other than absl::NotFoundError from the underlying implementation.
//
// The default implementation calls the single item `Get()` method for each
// task in |chunks|. Override this method in a sub-class for optimized batch
// processing.
virtual absl::Status Get(ChunkTransferList* chunks);
// Reads the complete data chunk specified by |content_id| and writes the
// result into |data|.
// If the chunk is not found in the data store, returns NotFoundError.
virtual absl::Status Get(const ContentIdProto& content_id, Buffer* data) = 0;
// Reads the complete chunk identified by |content_id| and parses it as the
// given protocol buffer.
absl::Status GetProto(const ContentIdProto& content_id,
google::protobuf::Message* proto);
// Reads the complete chunk identified by |content_id| and parses it as the
// given protocol buffer. Uses the given Buffer |buf| as intermediate
// storage.
absl::Status GetProto(const ContentIdProto& content_id, Buffer* buf,
google::protobuf::Message* proto);
}; // class DataStoreReader
} // namespace cdc_ft
#endif // DATA_STORE_DATA_STORE_READER_H_