mirror of
https://github.com/nestriness/cdc-file-transfer.git
synced 2026-01-30 10:35:37 +02:00
Change fastcdc to a better and simpler algorithm. (#79)
This CL changes the chunking algorithm from "normalized chunking" to simple "regression chunking", and changes the has criteria from 'hash&mask' to 'hash<=threshold'. These are all ideas taken from testing and analysis done at https://github.com/dbaarda/rollsum-chunking/blob/master/RESULTS.rst Regression chunking was introduced in https://www.usenix.org/system/files/conference/atc12/atc12-final293.pdf The algorithm uses an arbitrary number of regressions using power-of-2 regression target lengths. This means we can use a simple bitmask for the regression hash criteria. Regression chunking yields high deduplication rates even for lower max chunk sizes, so that the cdc_stream max chunk can be reduced to 512K from 1024K. This fixes potential latency spikes from large chunks.
This commit is contained in:
@@ -14,7 +14,7 @@ experimentation. See the file `indexer.h` for preprocessor macros that can be
|
|||||||
enabled, for example:
|
enabled, for example:
|
||||||
|
|
||||||
```
|
```
|
||||||
bazel build -c opt --copt=-DCDC_GEAR_TABLE=1 //cdc_indexer
|
bazel build -c opt --copt=-DCDC_GEAR_BITS=32 //cdc_indexer
|
||||||
```
|
```
|
||||||
|
|
||||||
At the end of the operation, the indexer outputs a summary of the results such
|
At the end of the operation, the indexer outputs a summary of the results such
|
||||||
@@ -25,7 +25,7 @@ as the following:
|
|||||||
Operation succeeded.
|
Operation succeeded.
|
||||||
|
|
||||||
Chunk size (min/avg/max): 128 KB / 256 KB / 1024 KB | Threads: 12
|
Chunk size (min/avg/max): 128 KB / 256 KB / 1024 KB | Threads: 12
|
||||||
gear_table: 64 bit | mask_s: 0x49249249249249 | mask_l: 0x1249249249
|
gear_table: 64 bit | threshold: 0x7fffc0001fff
|
||||||
Duration: 00:03
|
Duration: 00:03
|
||||||
Total files: 2
|
Total files: 2
|
||||||
Total chunks: 39203
|
Total chunks: 39203
|
||||||
|
|||||||
@@ -140,8 +140,7 @@ Indexer::Impl::Impl(const IndexerConfig& cfg,
|
|||||||
fastcdc::Config ccfg(cfg_.min_chunk_size, cfg_.avg_chunk_size,
|
fastcdc::Config ccfg(cfg_.min_chunk_size, cfg_.avg_chunk_size,
|
||||||
cfg_.max_chunk_size);
|
cfg_.max_chunk_size);
|
||||||
Indexer::Chunker chunker(ccfg, nullptr);
|
Indexer::Chunker chunker(ccfg, nullptr);
|
||||||
cfg_.mask_s = chunker.Stage(0).mask;
|
cfg_.threshold = chunker.Threshold();
|
||||||
cfg_.mask_l = chunker.Stage(chunker.StagesCount() - 1).mask;
|
|
||||||
// Collect inputs.
|
// Collect inputs.
|
||||||
for (auto it = inputs.begin(); it != inputs.end(); ++it) {
|
for (auto it = inputs.begin(); it != inputs.end(); ++it) {
|
||||||
inputs_.push(*it);
|
inputs_.push(*it);
|
||||||
@@ -368,8 +367,7 @@ IndexerConfig::IndexerConfig()
|
|||||||
max_chunk_size(0),
|
max_chunk_size(0),
|
||||||
max_chunk_size_step(0),
|
max_chunk_size_step(0),
|
||||||
num_threads(0),
|
num_threads(0),
|
||||||
mask_s(0),
|
threshold(0) {}
|
||||||
mask_l(0) {}
|
|
||||||
|
|
||||||
Indexer::Indexer() : impl_(nullptr) {}
|
Indexer::Indexer() : impl_(nullptr) {}
|
||||||
|
|
||||||
|
|||||||
@@ -27,16 +27,10 @@
|
|||||||
#include "fastcdc/fastcdc.h"
|
#include "fastcdc/fastcdc.h"
|
||||||
|
|
||||||
// Compile-time parameters for the FastCDC algorithm.
|
// Compile-time parameters for the FastCDC algorithm.
|
||||||
#define CDC_GEAR_32BIT 1
|
#define CDC_GEAR_32BIT 32
|
||||||
#define CDC_GEAR_64BIT 2
|
#define CDC_GEAR_64BIT 64
|
||||||
#ifndef CDC_GEAR_TABLE
|
#ifndef CDC_GEAR_BITS
|
||||||
#define CDC_GEAR_TABLE CDC_GEAR_64BIT
|
#define CDC_GEAR_BITS CDC_GEAR_64BIT
|
||||||
#endif
|
|
||||||
#ifndef CDC_MASK_STAGES
|
|
||||||
#define CDC_MASK_STAGES 7
|
|
||||||
#endif
|
|
||||||
#ifndef CDC_MASK_BIT_LSHIFT_AMOUNT
|
|
||||||
#define CDC_MASK_BIT_LSHIFT_AMOUNT 3
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace cdc_ft {
|
namespace cdc_ft {
|
||||||
@@ -66,23 +60,20 @@ struct IndexerConfig {
|
|||||||
uint32_t num_threads;
|
uint32_t num_threads;
|
||||||
// Which hash function to use.
|
// Which hash function to use.
|
||||||
HashType hash_type;
|
HashType hash_type;
|
||||||
// The masks will be populated by the indexer, setting them here has no
|
// The threshold will be populated by the indexer, setting it here has no
|
||||||
// effect. They are in this struct so that they can be conveniently accessed
|
// effect. It is in this struct so that it can be conveniently accessed
|
||||||
// when printing the operation summary (and since they are derived from the
|
// when printing the operation summary (and since it is derived from the
|
||||||
// configuration, they are technically part of it).
|
// configuration, it is technically part of it).
|
||||||
uint64_t mask_s;
|
uint64_t threshold;
|
||||||
uint64_t mask_l;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class Indexer {
|
class Indexer {
|
||||||
public:
|
public:
|
||||||
using hash_t = std::string;
|
using hash_t = std::string;
|
||||||
#if CDC_GEAR_TABLE == CDC_GEAR_32BIT
|
#if CDC_GEAR_BITS == CDC_GEAR_32BIT
|
||||||
typedef fastcdc::Chunker32<CDC_MASK_STAGES, CDC_MASK_BIT_LSHIFT_AMOUNT>
|
typedef fastcdc::Chunker32<> Chunker;
|
||||||
Chunker;
|
#elif CDC_GEAR_BITS == CDC_GEAR_64BIT
|
||||||
#elif CDC_GEAR_TABLE == CDC_GEAR_64BIT
|
typedef fastcdc::Chunker64<> Chunker;
|
||||||
typedef fastcdc::Chunker64<CDC_MASK_STAGES, CDC_MASK_BIT_LSHIFT_AMOUNT>
|
|
||||||
Chunker;
|
|
||||||
#else
|
#else
|
||||||
#error "Unknown gear table"
|
#error "Unknown gear table"
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -64,9 +64,9 @@ namespace {
|
|||||||
|
|
||||||
const char* GearTable() {
|
const char* GearTable() {
|
||||||
// The following macros are defined in indexer.h.
|
// The following macros are defined in indexer.h.
|
||||||
#if CDC_GEAR_TABLE == CDC_GEAR_32BIT
|
#if CDC_GEAR_BITS == CDC_GEAR_32BIT
|
||||||
return "32 bit";
|
return "32 bit";
|
||||||
#elif CDC_GEAR_TABLE == CDC_GEAR_64BIT
|
#elif CDC_GEAR_BITS == CDC_GEAR_64BIT
|
||||||
return "64 bit";
|
return "64 bit";
|
||||||
#else
|
#else
|
||||||
#error "Unknown gear table"
|
#error "Unknown gear table"
|
||||||
@@ -165,9 +165,8 @@ void ShowSummary(const IndexerConfig& cfg, const Indexer::OpStats& stats,
|
|||||||
<< HumanBytes(cfg.max_chunk_size)
|
<< HumanBytes(cfg.max_chunk_size)
|
||||||
<< " | Hash: " << HashTypeToString(cfg.hash_type)
|
<< " | Hash: " << HashTypeToString(cfg.hash_type)
|
||||||
<< " | Threads: " << cfg.num_threads << std::endl;
|
<< " | Threads: " << cfg.num_threads << std::endl;
|
||||||
std::cout << "gear_table: " << GearTable() << " | mask_s: 0x" << std::hex
|
std::cout << "gear_table: " << GearTable() << " | threshold: 0x" << std::hex
|
||||||
<< cfg.mask_s << " | mask_l: 0x" << cfg.mask_l << std::dec
|
<< cfg.threshold << std::dec << std::endl;
|
||||||
<< std::endl;
|
|
||||||
std::cout << std::setw(title_w) << "Duration:" << std::setw(num_w)
|
std::cout << std::setw(title_w) << "Duration:" << std::setw(num_w)
|
||||||
<< HumanDuration(elapsed) << std::endl;
|
<< HumanDuration(elapsed) << std::endl;
|
||||||
std::cout << std::setw(title_w) << "Total files:" << std::setw(num_w)
|
std::cout << std::setw(title_w) << "Total files:" << std::setw(num_w)
|
||||||
@@ -279,11 +278,10 @@ absl::Status WriteResultsFile(const std::string& filepath,
|
|||||||
|
|
||||||
path::FileCloser closer(fout);
|
path::FileCloser closer(fout);
|
||||||
|
|
||||||
static constexpr int num_columns = 15;
|
static constexpr int num_columns = 14;
|
||||||
static const char* columns[num_columns] = {
|
static const char* columns[num_columns] = {
|
||||||
"gear_table",
|
"gear_table",
|
||||||
"mask_s",
|
"threshold",
|
||||||
"mask_l",
|
|
||||||
"Min chunk size [KiB]",
|
"Min chunk size [KiB]",
|
||||||
"Avg chunk size [KiB]",
|
"Avg chunk size [KiB]",
|
||||||
"Max chunk size [KiB]",
|
"Max chunk size [KiB]",
|
||||||
@@ -332,7 +330,7 @@ absl::Status WriteResultsFile(const std::string& filepath,
|
|||||||
// Write user-supplied description
|
// Write user-supplied description
|
||||||
if (!description.empty()) std::fprintf(fout, "%s,", description.c_str());
|
if (!description.empty()) std::fprintf(fout, "%s,", description.c_str());
|
||||||
// Write chunking params.
|
// Write chunking params.
|
||||||
std::fprintf(fout, "%s,0x%zx,0x%zx,", GearTable(), cfg.mask_s, cfg.mask_l);
|
std::fprintf(fout, "%s,0x%zx,", GearTable(), cfg.threshold);
|
||||||
std::fprintf(fout, "%zu,%zu,%zu,", cfg.min_chunk_size >> 10,
|
std::fprintf(fout, "%zu,%zu,%zu,", cfg.min_chunk_size >> 10,
|
||||||
cfg.avg_chunk_size >> 10, cfg.max_chunk_size >> 10);
|
cfg.avg_chunk_size >> 10, cfg.max_chunk_size >> 10);
|
||||||
// Write speed, files, chunks.
|
// Write speed, files, chunks.
|
||||||
|
|||||||
@@ -158,7 +158,7 @@ class MultiSessionTest : public ManifestTestBase {
|
|||||||
EXPECT_EQ(data->file_count, file_count);
|
EXPECT_EQ(data->file_count, file_count);
|
||||||
EXPECT_EQ(data->min_chunk_size, 128 << 10);
|
EXPECT_EQ(data->min_chunk_size, 128 << 10);
|
||||||
EXPECT_EQ(data->avg_chunk_size, 256 << 10);
|
EXPECT_EQ(data->avg_chunk_size, 256 << 10);
|
||||||
EXPECT_EQ(data->max_chunk_size, 1024 << 10);
|
EXPECT_EQ(data->max_chunk_size, 512 << 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
metrics::ManifestUpdateData GetManifestUpdateData(
|
metrics::ManifestUpdateData GetManifestUpdateData(
|
||||||
|
|||||||
@@ -24,14 +24,12 @@
|
|||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <limits>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace cdc_ft {
|
namespace cdc_ft {
|
||||||
namespace fastcdc {
|
namespace fastcdc {
|
||||||
|
|
||||||
static constexpr uint32_t default_mask_stages = 7;
|
|
||||||
static constexpr uint32_t default_mask_lshift = 3;
|
|
||||||
|
|
||||||
// Configures the chunk sizes that the ChunkerTmpl class produces. All sizes are
|
// Configures the chunk sizes that the ChunkerTmpl class produces. All sizes are
|
||||||
// given in bytes.
|
// given in bytes.
|
||||||
struct Config {
|
struct Config {
|
||||||
@@ -41,9 +39,12 @@ struct Config {
|
|||||||
// that this size can still be undercut for the last chunk after processing
|
// that this size can still be undercut for the last chunk after processing
|
||||||
// the input data.
|
// the input data.
|
||||||
size_t min_size;
|
size_t min_size;
|
||||||
// The average chunk size is the target size for chunks. Sizes will show a
|
// The average chunk size is the target size for chunks, not including the
|
||||||
// normal distribution around the average size, depending on the template
|
// effects of max_size regression. Before regression, sizes will show an
|
||||||
// parameters of the ChunkerTmpl class.
|
// offset exponential distribution decaying after min_size with the desired
|
||||||
|
// average size. Regression will "reflect-back" the exponential
|
||||||
|
// distribution past max_size, which reduces the actual average size and
|
||||||
|
// gives a very flat distribution when max_size is small.
|
||||||
size_t avg_size;
|
size_t avg_size;
|
||||||
// The maximum size is the upper bound for generating chunks. This limit is
|
// The maximum size is the upper bound for generating chunks. This limit is
|
||||||
// never exceeded. If a chunk boundary was not detected based on the content
|
// never exceeded. If a chunk boundary was not detected based on the content
|
||||||
@@ -57,53 +58,57 @@ using ChunkFoundHandler = std::function<void(const uint8_t* data, size_t len)>;
|
|||||||
// Implements a very fast content-defined chunking algorithm.
|
// Implements a very fast content-defined chunking algorithm.
|
||||||
//
|
//
|
||||||
// FastCDC [1] identifies chunk boundaries based on a simple yet efficient
|
// FastCDC [1] identifies chunk boundaries based on a simple yet efficient
|
||||||
// rolling hash. This library implements a modified version of this algorithm to
|
// "gear" rolling hash, a "normalized chunking" algorithm using a stepped
|
||||||
// achieve better normalization of the chunk sizes around the target average
|
// chunk probability with a pair spread-out bitmasks for the '!(hash&mask)'
|
||||||
// size. This behavior can be tweaked with several parameters.
|
// "hash criteria".
|
||||||
|
//
|
||||||
|
// This library implements a modified version based on rollsum-chunking [2]
|
||||||
|
// tests and analysis that showed simple "exponential chunking" gives better
|
||||||
|
// deduplication, and a 'hash<=threshold' "hash criteria" works better for
|
||||||
|
// the gear rollsum and can support arbitrary non-power-of-two sizes.
|
||||||
|
//
|
||||||
|
// For limiting block sizes it uses a modified version of "Regression
|
||||||
|
// Chunking"[3] with an arbitrary number of regressions using power-of-2
|
||||||
|
// target block lengths (not multiples of the target block length, which
|
||||||
|
// doesn't have to be a power-of-2). This means we can use a bitmask for the
|
||||||
|
// most significant bits for the regression hash criteria.
|
||||||
//
|
//
|
||||||
// The Config struct passed in during construction defines the minimum, average,
|
// The Config struct passed in during construction defines the minimum, average,
|
||||||
// and maximum allowed chunk sizes. Those are runtime parameters.
|
// and maximum allowed chunk sizes. Those are runtime parameters.
|
||||||
//
|
//
|
||||||
// The template allows additional compile-time configuration:
|
// The template allows additional compile-time configuration:
|
||||||
// - T, gear: an array of random numbers that serves as a look-up table to
|
//
|
||||||
|
// - T : The type used for the hash. Should be an unsigned integer type,
|
||||||
|
// ideally uint32_t or uint64_t. The number of bits of this type determines
|
||||||
|
// the "sliding window" size of the gear hash. A smaller type is likely to be
|
||||||
|
// faster at the expense of reduced deduplication.
|
||||||
|
//
|
||||||
|
// - gear: an array of random numbers that serves as a look-up table to
|
||||||
// modify be added to the rolling hash in each round based on the input data.
|
// modify be added to the rolling hash in each round based on the input data.
|
||||||
// This library comes with two different tables, one of type uint32_t and one of
|
// This library comes with two different tables, one of type uint32_t and one
|
||||||
// uint64_t. Both showed good results in our experiments, yet the 64-bit version
|
// of uint64_t. Both showed good results in our experiments, yet the 64-bit
|
||||||
// provided slightly better deduplication.
|
// version provided slightly better deduplication.
|
||||||
// - mask_stages: the number of stages in which the requirements for identifying
|
|
||||||
// a chunk boundary is gradually losened as the amount of data processed is
|
|
||||||
// approaching the maximum chunk size. More stages result in a smoother normal
|
|
||||||
// distribution of chunk sizes around the configured average size. Our
|
|
||||||
// experiments showed good normalization with stages between 5 and 9.
|
|
||||||
// - mask_lshift: defines how much the bits set in the mask that identifies the
|
|
||||||
// chunk boundary are spread apart. Our experiments showed a better
|
|
||||||
// deduplication rate with a small amount of lshift (between 2 and 4).
|
|
||||||
//
|
//
|
||||||
// [1] https://www.usenix.org/system/files/conference/atc16/atc16-paper-xia.pdf.
|
// [1] https://www.usenix.org/system/files/conference/atc16/atc16-paper-xia.pdf.
|
||||||
|
// [2] https://github.com/dbaarda/rollsum-chunking/blob/master/RESULTS.rst
|
||||||
|
// [3] https://www.usenix.org/system/files/conference/atc12/atc12-final293.pdf
|
||||||
//
|
//
|
||||||
// TODO: Remove template parameters.
|
// TODO: Remove template parameters.
|
||||||
template <typename T, const T gear[256],
|
template <typename T, const T gear[256]>
|
||||||
uint32_t mask_stages = default_mask_stages,
|
|
||||||
uint32_t mask_lshift = default_mask_lshift>
|
|
||||||
class ChunkerTmpl {
|
class ChunkerTmpl {
|
||||||
public:
|
public:
|
||||||
struct MaskStage {
|
|
||||||
size_t barrier;
|
|
||||||
uint64_t mask;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Constructor.
|
// Constructor.
|
||||||
ChunkerTmpl(const Config& cfg, ChunkFoundHandler handler)
|
ChunkerTmpl(const Config& cfg, ChunkFoundHandler handler)
|
||||||
: cfg_(cfg), handler_(handler) {
|
: cfg_(cfg), handler_(handler) {
|
||||||
static_assert(mask_stages > 0 && mask_stages <= 64,
|
assert(cfg_.avg_size >= 1);
|
||||||
"mask_stages must be between 1 and 64");
|
|
||||||
static_assert(mask_lshift > 0 && mask_lshift <= 31,
|
|
||||||
"mask_lshift must be between 1 and 31");
|
|
||||||
assert(cfg_.min_size <= cfg_.avg_size);
|
assert(cfg_.min_size <= cfg_.avg_size);
|
||||||
assert(cfg_.avg_size <= cfg_.max_size);
|
assert(cfg_.avg_size <= cfg_.max_size);
|
||||||
|
|
||||||
|
// Calculate the threshold the hash must be <= to for a 1/(avg-min+1)
|
||||||
|
// chance of a chunk boundary.
|
||||||
|
threshold_ =
|
||||||
|
std::numeric_limits<T>::max() / (cfg_.avg_size - cfg_.min_size + 1);
|
||||||
data_.reserve(cfg_.max_size << 1);
|
data_.reserve(cfg_.max_size << 1);
|
||||||
InitStages();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Slices the given data block into chunks and calls the specified handler
|
// Slices the given data block into chunks and calls the specified handler
|
||||||
@@ -145,92 +150,10 @@ class ChunkerTmpl {
|
|||||||
// be smaller than the specified minimum chunk size.
|
// be smaller than the specified minimum chunk size.
|
||||||
void Finalize() { Process(nullptr, 0); }
|
void Finalize() { Process(nullptr, 0); }
|
||||||
|
|
||||||
// Returns the number of mask stages used for determening chunk boundaries.
|
// Returns the threshold for the hash <= threshold chunk boundary.
|
||||||
uint32_t StagesCount() { return mask_stages; }
|
T Threshold() { return threshold_; }
|
||||||
|
|
||||||
// Returns the mask stage with the given index.
|
|
||||||
const MaskStage& Stage(uint32_t i) {
|
|
||||||
assert(i < mask_stages);
|
|
||||||
return stages_[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Returns approximately log_2 of the given size, slightly adjusted to better
|
|
||||||
// achieve the average chunk size.
|
|
||||||
static uint32_t Bits(size_t size) {
|
|
||||||
uint32_t bits = 0;
|
|
||||||
for (; size > 0; size >>= 1) bits++;
|
|
||||||
// Adjust number of bits to better hit the target chunk size (evaluated via
|
|
||||||
// experiments).
|
|
||||||
return bits > 3 ? bits - 3 : 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns a bitmask with the given number of bits set to 1.
|
|
||||||
static uint64_t Mask(const uint32_t bits) {
|
|
||||||
assert(bits > 0 && bits < 64);
|
|
||||||
uint64_t mask = 0;
|
|
||||||
|
|
||||||
// Check which bit pattern we need to make the 1s fit into 64 bit:
|
|
||||||
// 10..10..10... vs. 110..110..110... vs. 1110..1110..1110...
|
|
||||||
uint64_t pattern = 1ull;
|
|
||||||
uint32_t actual_lshift = mask_lshift;
|
|
||||||
for (uint32_t num_ones = 1; num_ones <= 32; num_ones++) {
|
|
||||||
// Round up integer division: (bits + num_ones - 1) / num_ones
|
|
||||||
if (((bits + num_ones - 1) / num_ones) * actual_lshift < 64) {
|
|
||||||
// The number of rounds needed depends on the number of 1s in "pattern".
|
|
||||||
uint32_t num_shifts = bits / num_ones;
|
|
||||||
for (uint32_t j = 0; j < num_shifts; j++) {
|
|
||||||
mask = (mask << actual_lshift) | pattern;
|
|
||||||
}
|
|
||||||
// Append any missing 1s to the end.
|
|
||||||
for (uint32_t j = num_shifts * num_ones; j < bits; j++) {
|
|
||||||
mask = (mask << 1) | 1ull;
|
|
||||||
}
|
|
||||||
return mask;
|
|
||||||
}
|
|
||||||
// Switch to the next denser pattern (e.g. 100100... => 11001100...).
|
|
||||||
pattern = (pattern << 1) | 1ull;
|
|
||||||
actual_lshift++;
|
|
||||||
}
|
|
||||||
// If we came here it's likely an error.
|
|
||||||
assert(bits == 0 || mask != 0);
|
|
||||||
return mask;
|
|
||||||
}
|
|
||||||
|
|
||||||
void InitStages() {
|
|
||||||
constexpr uint32_t mask_stages_left = mask_stages / 2;
|
|
||||||
constexpr uint32_t mask_stages_right = mask_stages - mask_stages_left;
|
|
||||||
const uint32_t avg_bits = Bits(cfg_.avg_size);
|
|
||||||
|
|
||||||
// Minimum distance from the average size to the extremes.
|
|
||||||
size_t dist =
|
|
||||||
std::min(cfg_.avg_size - cfg_.min_size, cfg_.max_size - cfg_.avg_size);
|
|
||||||
int stg = 0;
|
|
||||||
// Decrease mask bits by one in each stage from (bits + n) downto (bits +
|
|
||||||
// 1), barriers at 1/2, 1/3, ... 1/(n+1) of dist.
|
|
||||||
for (uint32_t i = 0; i < mask_stages_left; i++) {
|
|
||||||
// Bitmasks require at least one bit set.
|
|
||||||
uint32_t bits =
|
|
||||||
avg_bits + mask_stages_left > i ? avg_bits + mask_stages_left - i : 1;
|
|
||||||
stages_[stg].mask = Mask(bits);
|
|
||||||
stages_[stg].barrier = cfg_.avg_size - dist / (i + 2);
|
|
||||||
stg++;
|
|
||||||
}
|
|
||||||
// Decrease mask bits by one in each stage from (bits) downto (bits - n),
|
|
||||||
// barriers at 1/(n+1), 1/n, ..., 1/2 of dist.
|
|
||||||
for (int i = mask_stages_right; i > 0; i--) {
|
|
||||||
// Bitmasks require at least one bit set.
|
|
||||||
uint32_t bits = avg_bits + i > mask_stages_right
|
|
||||||
? avg_bits + i - mask_stages_right
|
|
||||||
: 1;
|
|
||||||
stages_[stg].mask = Mask(bits);
|
|
||||||
stages_[stg].barrier = cfg_.avg_size + dist / i;
|
|
||||||
stg++;
|
|
||||||
}
|
|
||||||
// Adjust the final barrier to the max. chunk size.
|
|
||||||
stages_[mask_stages - 1].barrier = cfg_.max_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t FindChunkBoundary(const uint8_t* data, size_t len) {
|
size_t FindChunkBoundary(const uint8_t* data, size_t len) {
|
||||||
if (len <= cfg_.min_size) {
|
if (len <= cfg_.min_size) {
|
||||||
return len;
|
return len;
|
||||||
@@ -239,30 +162,41 @@ class ChunkerTmpl {
|
|||||||
len = cfg_.max_size;
|
len = cfg_.max_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize the regression length to len (the end) and the regression
|
||||||
|
// mask to an empty bitmask (match any hash).
|
||||||
|
size_t rc_len = len;
|
||||||
|
T rc_mask = 0;
|
||||||
|
|
||||||
// Init hash to all 1's to avoid zero-length chunks with min_size=0.
|
// Init hash to all 1's to avoid zero-length chunks with min_size=0.
|
||||||
uint64_t hash = UINT64_MAX;
|
T hash = std::numeric_limits<T>::max();
|
||||||
// Skip the first min_size bytes, but "warm up" the rolling hash for 64
|
// Skip the first min_size bytes, but "warm up" the rolling hash for enough
|
||||||
// rounds to make sure the 64-bit hash has gathered full "content history".
|
// rounds to make sure the hash has gathered full "content history".
|
||||||
size_t i = cfg_.min_size > 64 ? cfg_.min_size - 64 : 0;
|
size_t i = cfg_.min_size > kHashBits ? cfg_.min_size - kHashBits : 0;
|
||||||
for (/*empty*/; i < cfg_.min_size; ++i) {
|
for (/*empty*/; i < cfg_.min_size; ++i) {
|
||||||
hash = (hash << 1) + gear[data[i]];
|
hash = (hash << 1) + gear[data[i]];
|
||||||
}
|
}
|
||||||
for (uint32_t stg = 0; stg < mask_stages && i < len; stg++) {
|
for (/*empty*/; i < len; ++i) {
|
||||||
uint64_t mask = stages_[stg].mask;
|
if (!(hash & rc_mask)) {
|
||||||
size_t barrier = std::min(len, stages_[stg].barrier);
|
if (hash <= threshold_) {
|
||||||
for (/*empty*/; i < barrier; ++i) {
|
// This hash matches the target length hash criteria, return it.
|
||||||
if (!(hash & mask)) {
|
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
// This is a better regression point. Set it as the new rc_len and
|
||||||
|
// update rc_mask to check as many MSBits as this hash would pass.
|
||||||
|
rc_len = i;
|
||||||
|
rc_mask = std::numeric_limits<T>::max();
|
||||||
|
while (hash & rc_mask) rc_mask <<= 1;
|
||||||
|
}
|
||||||
hash = (hash << 1) + gear[data[i]];
|
hash = (hash << 1) + gear[data[i]];
|
||||||
}
|
}
|
||||||
}
|
// Return best regression point we found or the end if it's better.
|
||||||
return i;
|
return (hash & rc_mask) ? rc_len : i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static constexpr size_t kHashBits = sizeof(T) * 8;
|
||||||
const Config cfg_;
|
const Config cfg_;
|
||||||
const ChunkFoundHandler handler_;
|
const ChunkFoundHandler handler_;
|
||||||
MaskStage stages_[mask_stages];
|
T threshold_;
|
||||||
std::vector<uint8_t> data_;
|
std::vector<uint8_t> data_;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -404,16 +338,12 @@ static constexpr uint64_t gear64[256] = {
|
|||||||
}; // namespace internal
|
}; // namespace internal
|
||||||
|
|
||||||
// Chunker template with a 32-bit gear table.
|
// Chunker template with a 32-bit gear table.
|
||||||
template <uint32_t mask_stages = default_mask_stages,
|
template <const uint32_t gear[256] = internal::gear32>
|
||||||
uint32_t mask_lshift = default_mask_lshift>
|
using Chunker32 = ChunkerTmpl<uint32_t, gear>;
|
||||||
using Chunker32 =
|
|
||||||
ChunkerTmpl<uint32_t, internal::gear32, mask_stages, mask_lshift>;
|
|
||||||
|
|
||||||
// Chunker template with a 64-bit gear table.
|
// Chunker template with a 64-bit gear table.
|
||||||
template <uint32_t mask_stages = default_mask_stages,
|
template <const uint64_t gear[256] = internal::gear64>
|
||||||
uint32_t mask_lshift = default_mask_lshift>
|
using Chunker64 = ChunkerTmpl<uint64_t, gear>;
|
||||||
using Chunker64 =
|
|
||||||
ChunkerTmpl<uint64_t, internal::gear64, mask_stages, mask_lshift>;
|
|
||||||
|
|
||||||
// Default chunker class using params that are known to work well.
|
// Default chunker class using params that are known to work well.
|
||||||
using Chunker = Chunker64<>;
|
using Chunker = Chunker64<>;
|
||||||
|
|||||||
@@ -25,160 +25,26 @@ namespace fastcdc {
|
|||||||
// - data like {1, 1, 1, ...} results in a continuously all-ones rolling hash,
|
// - data like {1, 1, 1, ...} results in a continuously all-ones rolling hash,
|
||||||
// thus is never identified as a chunk boundary.
|
// thus is never identified as a chunk boundary.
|
||||||
static const uint64_t testgear64[256]{0, 1}; // 0, 1, 0, 0, 0, ...
|
static const uint64_t testgear64[256]{0, 1}; // 0, 1, 0, 0, 0, ...
|
||||||
static constexpr uint32_t test_mask_stages = 5;
|
|
||||||
static constexpr uint32_t test_mask_lshift = 1;
|
|
||||||
|
|
||||||
template <uint32_t mask_stages = test_mask_stages,
|
template <const uint64_t gear[256] = testgear64>
|
||||||
uint32_t mask_lshift = test_mask_lshift>
|
using TestChunker = Chunker64<gear>;
|
||||||
using TestChunker = ChunkerTmpl<uint64_t, testgear64, mask_stages, mask_lshift>;
|
|
||||||
|
|
||||||
// Returns the number of bits set to 1 in the given mask.
|
|
||||||
uint32_t BitCount(uint64_t mask) {
|
|
||||||
uint32_t count = 0;
|
|
||||||
for (; mask; mask >>= 1) {
|
|
||||||
count += mask & 1u;
|
|
||||||
}
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
class ChunkerTest : public ::testing::Test {
|
class ChunkerTest : public ::testing::Test {
|
||||||
public:
|
public:
|
||||||
ChunkerTest() {}
|
ChunkerTest() {}
|
||||||
|
|
||||||
protected:
|
|
||||||
template <uint32_t mask_stages>
|
|
||||||
static void ValidateStagesTmpl(const Config& cfg);
|
|
||||||
|
|
||||||
template <uint32_t mask_lshift>
|
|
||||||
static void ValidateLshiftTmpl(const Config& cfg);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <uint32_t mask_stages>
|
// Tests that the threshold for hash comparison is set correctly.
|
||||||
void ChunkerTest::ValidateStagesTmpl(const Config& cfg) {
|
TEST_F(ChunkerTest, ValidateThreshold) {
|
||||||
TestChunker<mask_stages> chunker(cfg, nullptr);
|
|
||||||
EXPECT_EQ(chunker.StagesCount(), mask_stages);
|
|
||||||
|
|
||||||
for (uint32_t i = 1; i < chunker.StagesCount(); i++) {
|
|
||||||
auto prev_stg = chunker.Stage(i - 1);
|
|
||||||
auto stg = chunker.Stage(i);
|
|
||||||
EXPECT_LT(prev_stg.barrier, stg.barrier)
|
|
||||||
<< "Stage " << i + 1 << " of " << mask_stages
|
|
||||||
<< ": barriers should be at increasing positions";
|
|
||||||
if (prev_stg.mask > 1) {
|
|
||||||
EXPECT_EQ(BitCount(prev_stg.mask), BitCount(stg.mask) + 1)
|
|
||||||
<< "Stage " << i + 1 << " of " << mask_stages
|
|
||||||
<< ": number of bits in adjacent stages should differ by 1";
|
|
||||||
} else {
|
|
||||||
EXPECT_EQ(1, BitCount(stg.mask))
|
|
||||||
<< "Stage " << i + 1 << " of " << mask_stages
|
|
||||||
<< ": number of bits in last bitmasks should be 1";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
EXPECT_EQ(chunker.Stage(mask_stages - 1).barrier, cfg.max_size)
|
|
||||||
<< "final stage barrier must match the maximum chunk size";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Tests that the stages to apply different bitmasks are initialized properly
|
|
||||||
TEST_F(ChunkerTest, ValidateStages) {
|
|
||||||
// Sizes: 128/256/512 bytes
|
// Sizes: 128/256/512 bytes
|
||||||
Config cfg(128, 256, 512);
|
Config cfg(128, 256, 512);
|
||||||
ValidateStagesTmpl<1>(cfg);
|
TestChunker<> chunker(cfg, nullptr);
|
||||||
ValidateStagesTmpl<2>(cfg);
|
EXPECT_EQ(0x1fc07f01fc07f01, chunker.Threshold());
|
||||||
ValidateStagesTmpl<3>(cfg);
|
|
||||||
ValidateStagesTmpl<4>(cfg);
|
|
||||||
ValidateStagesTmpl<5>(cfg);
|
|
||||||
ValidateStagesTmpl<6>(cfg);
|
|
||||||
ValidateStagesTmpl<7>(cfg);
|
|
||||||
ValidateStagesTmpl<8>(cfg);
|
|
||||||
|
|
||||||
// Sizes: 128/256/512 KiB
|
|
||||||
cfg = Config(128 << 10, 256 << 10, 512 << 10);
|
|
||||||
ValidateStagesTmpl<1>(cfg);
|
|
||||||
ValidateStagesTmpl<2>(cfg);
|
|
||||||
ValidateStagesTmpl<3>(cfg);
|
|
||||||
ValidateStagesTmpl<4>(cfg);
|
|
||||||
ValidateStagesTmpl<5>(cfg);
|
|
||||||
ValidateStagesTmpl<6>(cfg);
|
|
||||||
ValidateStagesTmpl<7>(cfg);
|
|
||||||
ValidateStagesTmpl<8>(cfg);
|
|
||||||
ValidateStagesTmpl<16>(cfg);
|
|
||||||
ValidateStagesTmpl<32>(cfg);
|
|
||||||
ValidateStagesTmpl<64>(cfg);
|
|
||||||
|
|
||||||
// Sizes: 128/256/512 MiB
|
|
||||||
cfg = Config(128 << 20, 256 << 20, 512 << 20);
|
|
||||||
ValidateStagesTmpl<1>(cfg);
|
|
||||||
ValidateStagesTmpl<2>(cfg);
|
|
||||||
ValidateStagesTmpl<3>(cfg);
|
|
||||||
ValidateStagesTmpl<4>(cfg);
|
|
||||||
ValidateStagesTmpl<5>(cfg);
|
|
||||||
ValidateStagesTmpl<6>(cfg);
|
|
||||||
ValidateStagesTmpl<7>(cfg);
|
|
||||||
ValidateStagesTmpl<8>(cfg);
|
|
||||||
ValidateStagesTmpl<16>(cfg);
|
|
||||||
ValidateStagesTmpl<32>(cfg);
|
|
||||||
ValidateStagesTmpl<64>(cfg);
|
|
||||||
|
|
||||||
// Sizes: 0/512/1024 KiB
|
|
||||||
cfg = Config(0, 512 << 10, 1024 << 10);
|
|
||||||
ValidateStagesTmpl<1>(cfg);
|
|
||||||
ValidateStagesTmpl<2>(cfg);
|
|
||||||
ValidateStagesTmpl<3>(cfg);
|
|
||||||
ValidateStagesTmpl<4>(cfg);
|
|
||||||
ValidateStagesTmpl<5>(cfg);
|
|
||||||
ValidateStagesTmpl<6>(cfg);
|
|
||||||
ValidateStagesTmpl<7>(cfg);
|
|
||||||
ValidateStagesTmpl<8>(cfg);
|
|
||||||
ValidateStagesTmpl<16>(cfg);
|
|
||||||
ValidateStagesTmpl<32>(cfg);
|
|
||||||
ValidateStagesTmpl<64>(cfg);
|
|
||||||
|
|
||||||
// Sizes: 0/512/1024 MiB
|
|
||||||
cfg = Config(0, 512 << 20, 1024 << 20);
|
|
||||||
ValidateStagesTmpl<1>(cfg);
|
|
||||||
ValidateStagesTmpl<2>(cfg);
|
|
||||||
ValidateStagesTmpl<3>(cfg);
|
|
||||||
ValidateStagesTmpl<4>(cfg);
|
|
||||||
ValidateStagesTmpl<5>(cfg);
|
|
||||||
ValidateStagesTmpl<6>(cfg);
|
|
||||||
ValidateStagesTmpl<7>(cfg);
|
|
||||||
ValidateStagesTmpl<8>(cfg);
|
|
||||||
ValidateStagesTmpl<16>(cfg);
|
|
||||||
ValidateStagesTmpl<32>(cfg);
|
|
||||||
ValidateStagesTmpl<64>(cfg);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <uint32_t mask_lshift>
|
|
||||||
void ChunkerTest::ValidateLshiftTmpl(const Config& cfg) {
|
|
||||||
TestChunker<1, mask_lshift> chunker(cfg, nullptr);
|
|
||||||
uint64_t mask = chunker.Stage(0).mask;
|
|
||||||
uint64_t expected = BitCount(mask);
|
|
||||||
EXPECT_GE(expected, 1) << "no bits were set in the bit mask for lshift "
|
|
||||||
<< mask_lshift;
|
|
||||||
// Compare no. of all 1-bits to no. of 1-bits with the given shift amount.
|
|
||||||
uint32_t actual = 0;
|
|
||||||
for (; mask; mask >>= mask_lshift) {
|
|
||||||
actual += mask & 1u;
|
|
||||||
}
|
|
||||||
EXPECT_EQ(expected, actual)
|
|
||||||
<< "number of bits set is different with lshift " << mask_lshift;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Tests that the bitmasks for each stage honor the mask_lshift template
|
|
||||||
// parameter correctly.
|
|
||||||
TEST_F(ChunkerTest, ValidateLshift) {
|
|
||||||
Config cfg(32, 64, 128);
|
|
||||||
ValidateLshiftTmpl<1>(cfg);
|
|
||||||
ValidateLshiftTmpl<2>(cfg);
|
|
||||||
ValidateLshiftTmpl<3>(cfg);
|
|
||||||
ValidateLshiftTmpl<4>(cfg);
|
|
||||||
ValidateLshiftTmpl<5>(cfg);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tests that the minimum chunk size is not undercut.
|
// Tests that the minimum chunk size is not undercut.
|
||||||
TEST_F(ChunkerTest, MinChunkSize) {
|
TEST_F(ChunkerTest, MinChunkSize) {
|
||||||
Config cfg(32, 64, 128);
|
Config cfg(64, 96, 128);
|
||||||
std::vector<size_t> chunk_sizes;
|
std::vector<size_t> chunk_sizes;
|
||||||
TestChunker<> chunker(cfg, [&](const uint8_t* /* data */, size_t len) {
|
TestChunker<> chunker(cfg, [&](const uint8_t* /* data */, size_t len) {
|
||||||
chunk_sizes.push_back(len);
|
chunk_sizes.push_back(len);
|
||||||
@@ -187,7 +53,7 @@ TEST_F(ChunkerTest, MinChunkSize) {
|
|||||||
std::vector<uint8_t> data(cfg.max_size, 0);
|
std::vector<uint8_t> data(cfg.max_size, 0);
|
||||||
chunker.Process(data.data(), data.size());
|
chunker.Process(data.data(), data.size());
|
||||||
chunker.Finalize();
|
chunker.Finalize();
|
||||||
EXPECT_EQ(chunk_sizes.size(), 4);
|
EXPECT_EQ(chunk_sizes.size(), 2);
|
||||||
for (size_t size : chunk_sizes) {
|
for (size_t size : chunk_sizes) {
|
||||||
EXPECT_EQ(size, cfg.min_size);
|
EXPECT_EQ(size, cfg.min_size);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -201,8 +201,11 @@ bool ManifestTestBase::InProgress(const ContentIdProto& manifest_id,
|
|||||||
|
|
||||||
void ManifestTestBase::ValidateChunkLookup(const std::string& rel_path,
|
void ManifestTestBase::ValidateChunkLookup(const std::string& rel_path,
|
||||||
bool expect_contained) {
|
bool expect_contained) {
|
||||||
|
Buffer file;
|
||||||
|
EXPECT_OK(path::ReadFile(path::Join(cfg_.src_dir, rel_path), &file));
|
||||||
|
|
||||||
uint64_t offset = 0;
|
uint64_t offset = 0;
|
||||||
auto handler = [&offset, &rel_path, file_chunks = &file_chunks_,
|
auto handler = [&file, &offset, &rel_path, file_chunks = &file_chunks_,
|
||||||
expect_contained](const void* data, size_t size) {
|
expect_contained](const void* data, size_t size) {
|
||||||
ContentIdProto id = ContentId::FromArray(data, size);
|
ContentIdProto id = ContentId::FromArray(data, size);
|
||||||
|
|
||||||
@@ -214,8 +217,14 @@ void ManifestTestBase::ValidateChunkLookup(const std::string& rel_path,
|
|||||||
expect_contained);
|
expect_contained);
|
||||||
if (expect_contained) {
|
if (expect_contained) {
|
||||||
EXPECT_EQ(lookup_path, rel_path);
|
EXPECT_EQ(lookup_path, rel_path);
|
||||||
EXPECT_EQ(lookup_offset, offset);
|
|
||||||
EXPECT_EQ(lookup_size, size);
|
EXPECT_EQ(lookup_size, size);
|
||||||
|
|
||||||
|
// The offset can be ambiguous since the file might contain duplicate
|
||||||
|
// data. Make sure that the actual data is the same.
|
||||||
|
EXPECT_LE(offset + size, file.size());
|
||||||
|
EXPECT_LE(lookup_offset + size, file.size());
|
||||||
|
EXPECT_EQ(memcmp(file.data() + offset, file.data() + lookup_offset, size),
|
||||||
|
0);
|
||||||
}
|
}
|
||||||
|
|
||||||
offset += size;
|
offset += size;
|
||||||
@@ -224,9 +233,7 @@ void ManifestTestBase::ValidateChunkLookup(const std::string& rel_path,
|
|||||||
cfg_.max_chunk_size);
|
cfg_.max_chunk_size);
|
||||||
fastcdc::Chunker chunker(cdc_cfg, handler);
|
fastcdc::Chunker chunker(cdc_cfg, handler);
|
||||||
|
|
||||||
Buffer b;
|
chunker.Process(reinterpret_cast<uint8_t*>(file.data()), file.size());
|
||||||
EXPECT_OK(path::ReadFile(path::Join(cfg_.src_dir, rel_path), &b));
|
|
||||||
chunker.Process(reinterpret_cast<uint8_t*>(b.data()), b.size());
|
|
||||||
chunker.Finalize();
|
chunker.Finalize();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ struct UpdaterConfig {
|
|||||||
size_t avg_chunk_size = 256 << 10;
|
size_t avg_chunk_size = 256 << 10;
|
||||||
|
|
||||||
// Maximum allowed chunk size.
|
// Maximum allowed chunk size.
|
||||||
size_t max_chunk_size = 1024 << 10;
|
size_t max_chunk_size = 512 << 10;
|
||||||
|
|
||||||
// Size of the chunker thread pool. Defaults to the number of available CPUs.
|
// Size of the chunker thread pool. Defaults to the number of available CPUs.
|
||||||
uint32_t num_threads = 0;
|
uint32_t num_threads = 0;
|
||||||
|
|||||||
@@ -180,19 +180,19 @@ TEST_F(ManifestUpdaterTest, UpdateAll_PrunesUnreferencedChunks) {
|
|||||||
EXPECT_OK(updater.Update(
|
EXPECT_OK(updater.Update(
|
||||||
MakeUpdateOps({"subdir/b.txt", "subdir/c.txt", "subdir/d.txt"}),
|
MakeUpdateOps({"subdir/b.txt", "subdir/c.txt", "subdir/d.txt"}),
|
||||||
&file_chunks_, nullptr));
|
&file_chunks_, nullptr));
|
||||||
// 1 for manifest id, 1 for manifest, 5 indirect assets.
|
// 1 for manifest id, 1 for manifest, 6 indirect assets.
|
||||||
// 2 additional chunks from the first Update() that are now unreferenced.
|
// 2 additional chunks from the first Update() that are now unreferenced.
|
||||||
// -1, because the indirect asset for "a.txt" is deduplicated
|
// -1, because the indirect asset for "a.txt" is deduplicated
|
||||||
EXPECT_EQ(data_store_.Chunks().size(), 8)
|
EXPECT_EQ(data_store_.Chunks().size(), 9)
|
||||||
<< "Manifest: " << ContentId::ToHexString(updater.ManifestId())
|
<< "Manifest: " << ContentId::ToHexString(updater.ManifestId())
|
||||||
<< std::endl
|
<< std::endl
|
||||||
<< DumpDataStoreProtos();
|
<< DumpDataStoreProtos();
|
||||||
|
|
||||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||||
// 1 for manifest id, 1 for manifest, 5 indirect assets.
|
// 1 for manifest id, 1 for manifest, 6 indirect assets.
|
||||||
// Pruning has removed the 2 unreferenced ones.
|
// Pruning has removed the 2 unreferenced ones.
|
||||||
EXPECT_EQ(data_store_.Chunks().size(), 7)
|
EXPECT_EQ(data_store_.Chunks().size(), 8)
|
||||||
<< "Manifest: " << ContentId::ToHexString(updater.ManifestId())
|
<< "Manifest: " << ContentId::ToHexString(updater.ManifestId())
|
||||||
<< std::endl
|
<< std::endl
|
||||||
<< DumpDataStoreProtos();
|
<< DumpDataStoreProtos();
|
||||||
@@ -224,9 +224,9 @@ TEST_F(ManifestUpdaterTest, UpdateAll_RecoversFromMissingChunks) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
EXPECT_OK(updater.UpdateAll(&file_chunks_));
|
||||||
// 1 for manifest id, 1 for manifest, 5 indirect assets.
|
// 1 for manifest id, 1 for manifest, 6 indirect assets.
|
||||||
// There would be 8 chunks without the removal above, see UpdateAll_Prune.
|
// There would be 9 chunks without the removal above, see UpdateAll_Prune.
|
||||||
EXPECT_EQ(data_store_.Chunks().size(), 7)
|
EXPECT_EQ(data_store_.Chunks().size(), 8)
|
||||||
<< "Manifest: " << ContentId::ToHexString(updater.ManifestId())
|
<< "Manifest: " << ContentId::ToHexString(updater.ManifestId())
|
||||||
<< std::endl
|
<< std::endl
|
||||||
<< DumpDataStoreProtos();
|
<< DumpDataStoreProtos();
|
||||||
|
|||||||
Reference in New Issue
Block a user