[cdc_rsync] Improve throughput for local copies (#74)

On Windows, fclose() seems to be very expensive for large files, where
closing a 1 GB file takes up to 5 seconds. This CL calls fclose() in
background threads. This tremendously improves local syncs, e.g.
copying a 4.5 GB, 300 files data set takes only 7 seconds instead of
30 seconds.

Also increases the buffer size for copying from 16K to 128K (better
throughput for local copies), and adds a timestamp to debug and
verbose console logs (useful when comparing client and server logs).
This commit is contained in:
Lutz Justen
2023-01-31 16:33:03 +01:00
committed by GitHub
parent 1200b34316
commit 5a909bb443
9 changed files with 275 additions and 73 deletions

View File

@@ -45,6 +45,11 @@ void Threadpool::Shutdown() {
for (auto& worker : workers_) {
if (worker.joinable()) worker.join();
}
// Discard all completed tasks.
absl::MutexLock lock(&completed_tasks_mutex_);
std::queue<std::unique_ptr<Task>> empty;
std::swap(completed_tasks_, empty);
}
void Threadpool::QueueTask(std::unique_ptr<Task> task) {
@@ -77,6 +82,21 @@ std::unique_ptr<Task> Threadpool::GetCompletedTask() {
return task;
}
void Threadpool::SetTaskCompletedCallback(TaskCompletedCallback cb) {
absl::MutexLock lock(&completed_tasks_mutex_);
on_task_completed_ = std::move(cb);
}
bool Threadpool::WaitForQueuedTasksAtMost(size_t count,
absl::Duration timeout) const {
absl::MutexLock lock(&task_queue_mutex_);
auto cond = [this, count]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(task_queue_mutex_) {
return shutdown_ || outstanding_task_count_ <= count;
};
return task_queue_mutex_.AwaitWithTimeout(absl::Condition(&cond), timeout) &&
outstanding_task_count_ <= count;
}
void Threadpool::ThreadWorkerMain() {
bool task_finished = false;
for (;;) {
@@ -85,7 +105,8 @@ void Threadpool::ThreadWorkerMain() {
absl::MutexLock lock(&task_queue_mutex_);
// Decrease task count here, so we don't have to lock again at the end of
// the loop.
// the loop. It is important to first push the task, then decrease this
// count. Otherwise, there's a race between Wait() and GetCompletedTask().
if (task_finished) {
assert(outstanding_task_count_ > 0);
--outstanding_task_count_;
@@ -104,17 +125,18 @@ void Threadpool::ThreadWorkerMain() {
}
// Run task, but make it cancellable.
task->ThreadRun([this]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(
task_queue_mutex_) -> bool { return shutdown_; });
{
task->ThreadRun([this]() ABSL_LOCKS_EXCLUDED(task_queue_mutex_) -> bool {
absl::MutexLock lock(&task_queue_mutex_);
if (shutdown_) break;
}
return shutdown_;
});
// Push task to completed queue.
absl::MutexLock lock(&completed_tasks_mutex_);
completed_tasks_.push(std::move(task));
if (on_task_completed_) {
on_task_completed_(std::move(task));
} else {
completed_tasks_.push(std::move(task));
}
task_finished = true;
}
}