mirror of
https://github.com/nestriness/cdc-file-transfer.git
synced 2026-01-30 14:25:36 +02:00
* Fix #76 fastcdc chunk boundary off-by-one. This ensures that the last byte included in the gear-hash that identified the chunk boundary is included in the chunk. This ensures chunks are still matched when the byte immediately after them is changed. * Init gear hash to all 1's to prevent zero-length chunks with min_size=0. Also change the `MaxChunkSize` test to use min_size=0 to test this works.
This commit is contained in:
@@ -239,7 +239,8 @@ class ChunkerTmpl {
|
||||
len = cfg_.max_size;
|
||||
}
|
||||
|
||||
uint64_t hash = 0;
|
||||
// Init hash to all 1's to avoid zero-length chunks with min_size=0.
|
||||
uint64_t hash = (uint64_t)-1;
|
||||
// Skip the first min_size bytes, but "warm up" the rolling hash for 64
|
||||
// rounds to make sure the 64-bit hash has gathered full "content history".
|
||||
size_t i = cfg_.min_size > 64 ? cfg_.min_size - 64 : 0;
|
||||
@@ -250,10 +251,10 @@ class ChunkerTmpl {
|
||||
uint64_t mask = stages_[stg].mask;
|
||||
size_t barrier = std::min(len, stages_[stg].barrier);
|
||||
for (/*empty*/; i < barrier; ++i) {
|
||||
hash = (hash << 1) + gear[data[i]];
|
||||
if (!(hash & mask)) {
|
||||
return i;
|
||||
}
|
||||
hash = (hash << 1) + gear[data[i]];
|
||||
}
|
||||
}
|
||||
return i;
|
||||
|
||||
Reference in New Issue
Block a user