mirror of
https://github.com/nestriness/cdc-file-transfer.git
synced 2026-01-30 12:25:35 +02:00
The tools allow efficient and fast synchronization of large directory trees from a Windows workstation to a Linux target machine. cdc_rsync* support efficient copy of files by using content-defined chunking (CDC) to identify chunks within files that can be reused. asset_stream_manager + cdc_fuse_fs support efficient streaming of a local directory to a remote virtual file system based on FUSE. It also employs CDC to identify and reuse unchanged data chunks.
255 lines
8.0 KiB
C++
255 lines
8.0 KiB
C++
// Copyright 2022 Google LLC
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "fastcdc/fastcdc.h"
|
|
|
|
#include "gtest/gtest.h"
|
|
|
|
namespace cdc_ft {
|
|
namespace fastcdc {
|
|
|
|
// This gear function has the following properties:
|
|
// - data like {0, 0, 0, ...} results in a continuously zero rolling hash, thus
|
|
// is always identified as a chunk boundary.
|
|
// - data like {1, 1, 1, ...} results in a continuously all-ones rolling hash,
|
|
// thus is never identified as a chunk boundary.
|
|
static const uint64_t testgear64[256]{0, 1}; // 0, 1, 0, 0, 0, ...
|
|
static constexpr uint32_t test_mask_stages = 5;
|
|
static constexpr uint32_t test_mask_lshift = 1;
|
|
|
|
template <uint32_t mask_stages = test_mask_stages,
|
|
uint32_t mask_lshift = test_mask_lshift>
|
|
using TestChunker = ChunkerTmpl<uint64_t, testgear64, mask_stages, mask_lshift>;
|
|
|
|
// Returns the number of bits set to 1 in the given mask.
|
|
uint32_t BitCount(uint64_t mask) {
|
|
uint32_t count = 0;
|
|
for (; mask; mask >>= 1) {
|
|
count += mask & 1u;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
class ChunkerTest : public ::testing::Test {
|
|
public:
|
|
ChunkerTest() {}
|
|
|
|
protected:
|
|
template <uint32_t mask_stages>
|
|
static void ValidateStagesTmpl(const Config& cfg);
|
|
|
|
template <uint32_t mask_lshift>
|
|
static void ValidateLshiftTmpl(const Config& cfg);
|
|
};
|
|
|
|
template <uint32_t mask_stages>
|
|
void ChunkerTest::ValidateStagesTmpl(const Config& cfg) {
|
|
TestChunker<mask_stages> chunker(cfg, nullptr);
|
|
EXPECT_EQ(chunker.StagesCount(), mask_stages);
|
|
|
|
for (uint32_t i = 1; i < chunker.StagesCount(); i++) {
|
|
auto prev_stg = chunker.Stage(i - 1);
|
|
auto stg = chunker.Stage(i);
|
|
EXPECT_LT(prev_stg.barrier, stg.barrier)
|
|
<< "Stage " << i + 1 << " of " << mask_stages
|
|
<< ": barriers should be at increasing positions";
|
|
if (prev_stg.mask > 1) {
|
|
EXPECT_EQ(BitCount(prev_stg.mask), BitCount(stg.mask) + 1)
|
|
<< "Stage " << i + 1 << " of " << mask_stages
|
|
<< ": number of bits in adjacent stages should differ by 1";
|
|
} else {
|
|
EXPECT_EQ(1, BitCount(stg.mask))
|
|
<< "Stage " << i + 1 << " of " << mask_stages
|
|
<< ": number of bits in last bitmasks should be 1";
|
|
}
|
|
}
|
|
|
|
EXPECT_EQ(chunker.Stage(mask_stages - 1).barrier, cfg.max_size)
|
|
<< "final stage barrier must match the maximum chunk size";
|
|
}
|
|
|
|
// Tests that the stages to apply different bitmasks are initialized properly
|
|
TEST_F(ChunkerTest, ValidateStages) {
|
|
// Sizes: 128/256/512 bytes
|
|
Config cfg(128, 256, 512);
|
|
ValidateStagesTmpl<1>(cfg);
|
|
ValidateStagesTmpl<2>(cfg);
|
|
ValidateStagesTmpl<3>(cfg);
|
|
ValidateStagesTmpl<4>(cfg);
|
|
ValidateStagesTmpl<5>(cfg);
|
|
ValidateStagesTmpl<6>(cfg);
|
|
ValidateStagesTmpl<7>(cfg);
|
|
ValidateStagesTmpl<8>(cfg);
|
|
|
|
// Sizes: 128/256/512 KiB
|
|
cfg = Config(128 << 10, 256 << 10, 512 << 10);
|
|
ValidateStagesTmpl<1>(cfg);
|
|
ValidateStagesTmpl<2>(cfg);
|
|
ValidateStagesTmpl<3>(cfg);
|
|
ValidateStagesTmpl<4>(cfg);
|
|
ValidateStagesTmpl<5>(cfg);
|
|
ValidateStagesTmpl<6>(cfg);
|
|
ValidateStagesTmpl<7>(cfg);
|
|
ValidateStagesTmpl<8>(cfg);
|
|
ValidateStagesTmpl<16>(cfg);
|
|
ValidateStagesTmpl<32>(cfg);
|
|
ValidateStagesTmpl<64>(cfg);
|
|
|
|
// Sizes: 128/256/512 MiB
|
|
cfg = Config(128 << 20, 256 << 20, 512 << 20);
|
|
ValidateStagesTmpl<1>(cfg);
|
|
ValidateStagesTmpl<2>(cfg);
|
|
ValidateStagesTmpl<3>(cfg);
|
|
ValidateStagesTmpl<4>(cfg);
|
|
ValidateStagesTmpl<5>(cfg);
|
|
ValidateStagesTmpl<6>(cfg);
|
|
ValidateStagesTmpl<7>(cfg);
|
|
ValidateStagesTmpl<8>(cfg);
|
|
ValidateStagesTmpl<16>(cfg);
|
|
ValidateStagesTmpl<32>(cfg);
|
|
ValidateStagesTmpl<64>(cfg);
|
|
|
|
// Sizes: 0/512/1024 KiB
|
|
cfg = Config(0, 512 << 10, 1024 << 10);
|
|
ValidateStagesTmpl<1>(cfg);
|
|
ValidateStagesTmpl<2>(cfg);
|
|
ValidateStagesTmpl<3>(cfg);
|
|
ValidateStagesTmpl<4>(cfg);
|
|
ValidateStagesTmpl<5>(cfg);
|
|
ValidateStagesTmpl<6>(cfg);
|
|
ValidateStagesTmpl<7>(cfg);
|
|
ValidateStagesTmpl<8>(cfg);
|
|
ValidateStagesTmpl<16>(cfg);
|
|
ValidateStagesTmpl<32>(cfg);
|
|
ValidateStagesTmpl<64>(cfg);
|
|
|
|
// Sizes: 0/512/1024 MiB
|
|
cfg = Config(0, 512 << 20, 1024 << 20);
|
|
ValidateStagesTmpl<1>(cfg);
|
|
ValidateStagesTmpl<2>(cfg);
|
|
ValidateStagesTmpl<3>(cfg);
|
|
ValidateStagesTmpl<4>(cfg);
|
|
ValidateStagesTmpl<5>(cfg);
|
|
ValidateStagesTmpl<6>(cfg);
|
|
ValidateStagesTmpl<7>(cfg);
|
|
ValidateStagesTmpl<8>(cfg);
|
|
ValidateStagesTmpl<16>(cfg);
|
|
ValidateStagesTmpl<32>(cfg);
|
|
ValidateStagesTmpl<64>(cfg);
|
|
}
|
|
|
|
template <uint32_t mask_lshift>
|
|
void ChunkerTest::ValidateLshiftTmpl(const Config& cfg) {
|
|
TestChunker<1, mask_lshift> chunker(cfg, nullptr);
|
|
uint64_t mask = chunker.Stage(0).mask;
|
|
uint64_t expected = BitCount(mask);
|
|
EXPECT_GE(expected, 1) << "no bits were set in the bit mask for lshift "
|
|
<< mask_lshift;
|
|
// Compare no. of all 1-bits to no. of 1-bits with the given shift amount.
|
|
uint32_t actual = 0;
|
|
for (; mask; mask >>= mask_lshift) {
|
|
actual += mask & 1u;
|
|
}
|
|
EXPECT_EQ(expected, actual)
|
|
<< "number of bits set is different with lshift " << mask_lshift;
|
|
}
|
|
|
|
// Tests that the bitmasks for each stage honor the mask_lshift template
|
|
// parameter correctly.
|
|
TEST_F(ChunkerTest, ValidateLshift) {
|
|
Config cfg(32, 64, 128);
|
|
ValidateLshiftTmpl<1>(cfg);
|
|
ValidateLshiftTmpl<2>(cfg);
|
|
ValidateLshiftTmpl<3>(cfg);
|
|
ValidateLshiftTmpl<4>(cfg);
|
|
ValidateLshiftTmpl<5>(cfg);
|
|
}
|
|
|
|
// Tests that the minimum chunk size is not undercut.
|
|
TEST_F(ChunkerTest, MinChunkSize) {
|
|
Config cfg(32, 64, 128);
|
|
std::vector<size_t> chunk_sizes;
|
|
TestChunker<> chunker(cfg, [&](const uint8_t* /* data */, size_t len) {
|
|
chunk_sizes.push_back(len);
|
|
});
|
|
// All-zero data matches a chunk boundary everywhere.
|
|
std::vector<uint8_t> data(cfg.max_size, 0);
|
|
chunker.Process(data.data(), data.size());
|
|
chunker.Finalize();
|
|
EXPECT_EQ(chunk_sizes.size(), 4);
|
|
for (size_t size : chunk_sizes) {
|
|
EXPECT_EQ(size, cfg.min_size);
|
|
}
|
|
}
|
|
|
|
// Tests that maximum chunk size is not exceeded.
|
|
TEST_F(ChunkerTest, MaxChunkSize) {
|
|
Config cfg(32, 64, 128);
|
|
std::vector<size_t> chunk_sizes;
|
|
TestChunker<> chunker(cfg, [&](const uint8_t* /* data */, size_t len) {
|
|
chunk_sizes.push_back(len);
|
|
});
|
|
// All-ones data never matches a chunk boundary.
|
|
std::vector<uint8_t> data(4 * cfg.max_size, 1);
|
|
chunker.Process(data.data(), data.size());
|
|
chunker.Finalize();
|
|
EXPECT_EQ(chunk_sizes.size(), 4);
|
|
for (size_t size : chunk_sizes) {
|
|
EXPECT_EQ(size, cfg.max_size);
|
|
}
|
|
}
|
|
|
|
// Tests that Finalize() returns the remaining data as a chunk.
|
|
TEST_F(ChunkerTest, FinalizeChunk) {
|
|
Config cfg(32, 64, 128);
|
|
std::vector<size_t> chunk_sizes;
|
|
TestChunker<> chunker(cfg, [&](const uint8_t* /* data */, size_t len) {
|
|
chunk_sizes.push_back(len);
|
|
});
|
|
std::vector<uint8_t> data(1, 0);
|
|
chunker.Process(data.data(), data.size());
|
|
EXPECT_EQ(chunk_sizes.size(), 0);
|
|
chunker.Finalize();
|
|
EXPECT_EQ(chunk_sizes.size(), 1);
|
|
EXPECT_EQ(chunk_sizes[0], 1);
|
|
}
|
|
|
|
// Tests that Finalize() works when no data is left.
|
|
TEST_F(ChunkerTest, FinalizeEmptyChunk) {
|
|
Config cfg(32, 64, 128);
|
|
std::vector<size_t> chunk_sizes;
|
|
TestChunker<> chunker(cfg, [&](const uint8_t* /* data */, size_t len) {
|
|
chunk_sizes.push_back(len);
|
|
});
|
|
std::vector<uint8_t> data(1, 0);
|
|
chunker.Process(data.data(), 0);
|
|
EXPECT_EQ(chunk_sizes.size(), 0);
|
|
chunker.Finalize();
|
|
EXPECT_EQ(chunk_sizes.size(), 0);
|
|
}
|
|
|
|
// Tests that Finalize() works when Process() was not called.
|
|
TEST_F(ChunkerTest, FinalizeWithoutProcess) {
|
|
Config cfg(32, 64, 128);
|
|
std::vector<size_t> chunk_sizes;
|
|
TestChunker<> chunker(cfg, [&](const uint8_t* /* data */, size_t len) {
|
|
chunk_sizes.push_back(len);
|
|
});
|
|
chunker.Finalize();
|
|
EXPECT_EQ(chunk_sizes.size(), 0);
|
|
}
|
|
|
|
} // namespace fastcdc
|
|
} // namespace cdc_ft
|