mirror of
https://github.com/nestriness/cdc-file-transfer.git
synced 2026-01-30 14:35:37 +02:00
* Fix #76 fastcdc chunk boundary off-by-one. This ensures that the last byte included in the gear-hash that identified the chunk boundary is included in the chunk. This ensures chunks are still matched when the byte immediately after them is changed. * Init gear hash to all 1's to prevent zero-length chunks with min_size=0. Also change the `MaxChunkSize` test to use min_size=0 to test this works.
This commit is contained in:
@@ -239,7 +239,8 @@ class ChunkerTmpl {
|
|||||||
len = cfg_.max_size;
|
len = cfg_.max_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t hash = 0;
|
// Init hash to all 1's to avoid zero-length chunks with min_size=0.
|
||||||
|
uint64_t hash = (uint64_t)-1;
|
||||||
// Skip the first min_size bytes, but "warm up" the rolling hash for 64
|
// Skip the first min_size bytes, but "warm up" the rolling hash for 64
|
||||||
// rounds to make sure the 64-bit hash has gathered full "content history".
|
// rounds to make sure the 64-bit hash has gathered full "content history".
|
||||||
size_t i = cfg_.min_size > 64 ? cfg_.min_size - 64 : 0;
|
size_t i = cfg_.min_size > 64 ? cfg_.min_size - 64 : 0;
|
||||||
@@ -250,10 +251,10 @@ class ChunkerTmpl {
|
|||||||
uint64_t mask = stages_[stg].mask;
|
uint64_t mask = stages_[stg].mask;
|
||||||
size_t barrier = std::min(len, stages_[stg].barrier);
|
size_t barrier = std::min(len, stages_[stg].barrier);
|
||||||
for (/*empty*/; i < barrier; ++i) {
|
for (/*empty*/; i < barrier; ++i) {
|
||||||
hash = (hash << 1) + gear[data[i]];
|
|
||||||
if (!(hash & mask)) {
|
if (!(hash & mask)) {
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
hash = (hash << 1) + gear[data[i]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return i;
|
return i;
|
||||||
|
|||||||
@@ -195,7 +195,7 @@ TEST_F(ChunkerTest, MinChunkSize) {
|
|||||||
|
|
||||||
// Tests that maximum chunk size is not exceeded.
|
// Tests that maximum chunk size is not exceeded.
|
||||||
TEST_F(ChunkerTest, MaxChunkSize) {
|
TEST_F(ChunkerTest, MaxChunkSize) {
|
||||||
Config cfg(32, 64, 128);
|
Config cfg(0, 64, 128);
|
||||||
std::vector<size_t> chunk_sizes;
|
std::vector<size_t> chunk_sizes;
|
||||||
TestChunker<> chunker(cfg, [&](const uint8_t* /* data */, size_t len) {
|
TestChunker<> chunker(cfg, [&](const uint8_t* /* data */, size_t len) {
|
||||||
chunk_sizes.push_back(len);
|
chunk_sizes.push_back(len);
|
||||||
|
|||||||
Reference in New Issue
Block a user