[cdc_rsync] Improve throughput for local copies (#74)

On Windows, fclose() seems to be very expensive for large files, where
closing a 1 GB file takes up to 5 seconds. This CL calls fclose() in
background threads. This tremendously improves local syncs, e.g.
copying a 4.5 GB, 300 files data set takes only 7 seconds instead of
30 seconds.

Also increases the buffer size for copying from 16K to 128K (better
throughput for local copies), and adds a timestamp to debug and
verbose console logs (useful when comparing client and server logs).
This commit is contained in:
Lutz Justen
2023-01-31 16:33:03 +01:00
committed by GitHub
parent 1200b34316
commit 5a909bb443
9 changed files with 275 additions and 73 deletions

View File

@@ -158,6 +158,7 @@ cc_library(
deps = [
":clock",
":platform",
":stopwatch",
"@com_google_absl//absl/strings:str_format",
"@com_google_absl//absl/synchronization",
],

View File

@@ -126,21 +126,22 @@ void ConsoleLog::WriteLogMessage(LogLevel level, const char* file, int line,
absl::MutexLock lock(&mutex_);
// Show leaner log messages in non-verbose mode.
bool show_file_func = GetLogLevel() <= LogLevel::kDebug;
bool show_time_file_func = GetLogLevel() <= LogLevel::kDebug;
FILE* stdfile = level >= LogLevel::kError ? stderr : stdout;
#if PLATFORM_WINDOWS
HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
SetConsoleTextAttribute(hConsole, GetConsoleColor(level));
if (show_file_func) {
fprintf(stdfile, "%s(%i): %s(): %s\n", file, line, func, message);
if (show_time_file_func) {
fprintf(stdfile, "%0.3f %s(%i): %s(): %s\n", stopwatch_.ElapsedSeconds(),
file, line, func, message);
} else {
fprintf(stdfile, "%s\n", message);
}
SetConsoleTextAttribute(hConsole, kLightGray);
#else
if (show_file_func) {
fprintf(stdfile, "%-7s %s(%i): %s(): %s\n", GetLogLevelString(level), file,
line, func, message);
if (show_time_file_func) {
fprintf(stdfile, "%-7s %0.3f %s(%i): %s(): %s\n", GetLogLevelString(level),
stopwatch_.ElapsedSeconds(), file, line, func, message);
} else {
fprintf(stdfile, "%-7s %s\n", GetLogLevelString(level), message);
}

View File

@@ -22,6 +22,7 @@
#include "absl/strings/str_format.h"
#include "absl/synchronization/mutex.h"
#include "common/clock.h"
#include "common/stopwatch.h"
namespace cdc_ft {
@@ -120,6 +121,7 @@ class ConsoleLog : public Log {
ABSL_LOCKS_EXCLUDED(mutex_);
private:
Stopwatch stopwatch_;
absl::Mutex mutex_;
};

View File

@@ -45,6 +45,11 @@ void Threadpool::Shutdown() {
for (auto& worker : workers_) {
if (worker.joinable()) worker.join();
}
// Discard all completed tasks.
absl::MutexLock lock(&completed_tasks_mutex_);
std::queue<std::unique_ptr<Task>> empty;
std::swap(completed_tasks_, empty);
}
void Threadpool::QueueTask(std::unique_ptr<Task> task) {
@@ -77,6 +82,21 @@ std::unique_ptr<Task> Threadpool::GetCompletedTask() {
return task;
}
void Threadpool::SetTaskCompletedCallback(TaskCompletedCallback cb) {
absl::MutexLock lock(&completed_tasks_mutex_);
on_task_completed_ = std::move(cb);
}
bool Threadpool::WaitForQueuedTasksAtMost(size_t count,
absl::Duration timeout) const {
absl::MutexLock lock(&task_queue_mutex_);
auto cond = [this, count]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(task_queue_mutex_) {
return shutdown_ || outstanding_task_count_ <= count;
};
return task_queue_mutex_.AwaitWithTimeout(absl::Condition(&cond), timeout) &&
outstanding_task_count_ <= count;
}
void Threadpool::ThreadWorkerMain() {
bool task_finished = false;
for (;;) {
@@ -85,7 +105,8 @@ void Threadpool::ThreadWorkerMain() {
absl::MutexLock lock(&task_queue_mutex_);
// Decrease task count here, so we don't have to lock again at the end of
// the loop.
// the loop. It is important to first push the task, then decrease this
// count. Otherwise, there's a race between Wait() and GetCompletedTask().
if (task_finished) {
assert(outstanding_task_count_ > 0);
--outstanding_task_count_;
@@ -104,17 +125,18 @@ void Threadpool::ThreadWorkerMain() {
}
// Run task, but make it cancellable.
task->ThreadRun([this]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(
task_queue_mutex_) -> bool { return shutdown_; });
{
task->ThreadRun([this]() ABSL_LOCKS_EXCLUDED(task_queue_mutex_) -> bool {
absl::MutexLock lock(&task_queue_mutex_);
if (shutdown_) break;
}
return shutdown_;
});
// Push task to completed queue.
absl::MutexLock lock(&completed_tasks_mutex_);
completed_tasks_.push(std::move(task));
if (on_task_completed_) {
on_task_completed_(std::move(task));
} else {
completed_tasks_.push(std::move(task));
}
task_finished = true;
}
}

View File

@@ -18,7 +18,6 @@
#define COMMON_THREADPOOL_H_
#include <atomic>
#include <condition_variable>
#include <functional>
#include <memory>
#include <queue>
@@ -57,7 +56,8 @@ class Threadpool {
void QueueTask(std::unique_ptr<Task> task)
ABSL_LOCKS_EXCLUDED(task_queue_mutex_);
// If available, returns the next completed task.
// Returns the next completed task if available or nullptr all are either
// queued or in progress.
// For a single worker thread (|num_threads| == 1), tasks are completed in
// FIFO order. This is no longer the case for multiple threads
// (|num_threads| > 1). Tasks that got queued later might complete first.
@@ -71,6 +71,14 @@ class Threadpool {
std::unique_ptr<Task> GetCompletedTask()
ABSL_LOCKS_EXCLUDED(completed_tasks_mutex_);
using TaskCompletedCallback = std::function<void(std::unique_ptr<Task>)>;
// Set a callback that is called immediately in a background thread when a
// task is completed. The task will not be put onto the completed queue, so
// if this callback is set, do not call (Try)GetCompletedTask.
void SetTaskCompletedCallback(TaskCompletedCallback cb)
ABSL_LOCKS_EXCLUDED(completed_tasks_mutex_);
// Returns the total number of worker threads in the pool.
size_t NumThreads() const { return workers_.size(); }
@@ -80,6 +88,14 @@ class Threadpool {
return outstanding_task_count_;
}
// Block until the number of queued tasks drops below or equal to |count|, or
// until the timeout is exceeded, or until Shutdown() is called, whatever
// comes sooner. Returns true if less than or equal to |count| tasks are
// queued.
bool WaitForQueuedTasksAtMost(
size_t count, absl::Duration timeout = absl::InfiniteDuration()) const
ABSL_LOCKS_EXCLUDED(mutex_);
private:
// Background thread worker method. Picks tasks and runs them.
void ThreadWorkerMain()
@@ -94,6 +110,8 @@ class Threadpool {
absl::Mutex completed_tasks_mutex_;
std::queue<std::unique_ptr<Task>> completed_tasks_
ABSL_GUARDED_BY(completed_tasks_mutex_);
TaskCompletedCallback on_task_completed_
ABSL_GUARDED_BY(completed_tasks_mutex_);
std::vector<std::thread> workers_;
};

View File

@@ -151,5 +151,37 @@ TEST_F(ThreadpoolTest, GetCompletedTask) {
EXPECT_EQ(completed_task.get(), task);
}
TEST_F(ThreadpoolTest, SetTaskCompletedCallback) {
auto task_func = [](Task::IsCancelledPredicate) { /* empty */ };
Semaphore task_finished(0);
Threadpool pool(1);
std::atomic_bool finished = false;
pool.SetTaskCompletedCallback(
[&task_finished, &finished](std::unique_ptr<Task> task) {
finished = true;
task_finished.Signal();
});
pool.QueueTask(std::make_unique<TestTask>(task_func));
task_finished.Wait();
EXPECT_TRUE(finished);
EXPECT_FALSE(pool.TryGetCompletedTask());
}
TEST_F(ThreadpoolTest, WaitForQueuedTasksAtMost) {
Semaphore task_signal(0);
auto task_func = [&task_signal](Task::IsCancelledPredicate) {
task_signal.Wait();
};
Threadpool pool(1);
pool.QueueTask(std::make_unique<TestTask>(task_func));
pool.QueueTask(std::make_unique<TestTask>(task_func));
EXPECT_FALSE(pool.WaitForQueuedTasksAtMost(1, absl::Milliseconds(10)));
task_signal.Signal();
EXPECT_TRUE(pool.WaitForQueuedTasksAtMost(1, absl::Milliseconds(5000)));
EXPECT_EQ(pool.NumQueuedTasks(), 1);
task_signal.Signal();
}
} // namespace
} // namespace cdc_ft