mirror of
https://github.com/nestriness/cdc-file-transfer.git
synced 2026-05-01 16:43:08 +03:00
[cdc_rsync] Improve throughput for local copies (#74)
On Windows, fclose() seems to be very expensive for large files, where closing a 1 GB file takes up to 5 seconds. This CL calls fclose() in background threads. This tremendously improves local syncs, e.g. copying a 4.5 GB, 300 files data set takes only 7 seconds instead of 30 seconds. Also increases the buffer size for copying from 16K to 128K (better throughput for local copies), and adds a timestamp to debug and verbose console logs (useful when comparing client and server logs).
This commit is contained in:
@@ -158,6 +158,7 @@ cc_library(
|
||||
deps = [
|
||||
":clock",
|
||||
":platform",
|
||||
":stopwatch",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
"@com_google_absl//absl/synchronization",
|
||||
],
|
||||
|
||||
@@ -126,21 +126,22 @@ void ConsoleLog::WriteLogMessage(LogLevel level, const char* file, int line,
|
||||
absl::MutexLock lock(&mutex_);
|
||||
|
||||
// Show leaner log messages in non-verbose mode.
|
||||
bool show_file_func = GetLogLevel() <= LogLevel::kDebug;
|
||||
bool show_time_file_func = GetLogLevel() <= LogLevel::kDebug;
|
||||
FILE* stdfile = level >= LogLevel::kError ? stderr : stdout;
|
||||
#if PLATFORM_WINDOWS
|
||||
HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
|
||||
SetConsoleTextAttribute(hConsole, GetConsoleColor(level));
|
||||
if (show_file_func) {
|
||||
fprintf(stdfile, "%s(%i): %s(): %s\n", file, line, func, message);
|
||||
if (show_time_file_func) {
|
||||
fprintf(stdfile, "%0.3f %s(%i): %s(): %s\n", stopwatch_.ElapsedSeconds(),
|
||||
file, line, func, message);
|
||||
} else {
|
||||
fprintf(stdfile, "%s\n", message);
|
||||
}
|
||||
SetConsoleTextAttribute(hConsole, kLightGray);
|
||||
#else
|
||||
if (show_file_func) {
|
||||
fprintf(stdfile, "%-7s %s(%i): %s(): %s\n", GetLogLevelString(level), file,
|
||||
line, func, message);
|
||||
if (show_time_file_func) {
|
||||
fprintf(stdfile, "%-7s %0.3f %s(%i): %s(): %s\n", GetLogLevelString(level),
|
||||
stopwatch_.ElapsedSeconds(), file, line, func, message);
|
||||
} else {
|
||||
fprintf(stdfile, "%-7s %s\n", GetLogLevelString(level), message);
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "absl/synchronization/mutex.h"
|
||||
#include "common/clock.h"
|
||||
#include "common/stopwatch.h"
|
||||
|
||||
namespace cdc_ft {
|
||||
|
||||
@@ -120,6 +121,7 @@ class ConsoleLog : public Log {
|
||||
ABSL_LOCKS_EXCLUDED(mutex_);
|
||||
|
||||
private:
|
||||
Stopwatch stopwatch_;
|
||||
absl::Mutex mutex_;
|
||||
};
|
||||
|
||||
|
||||
@@ -45,6 +45,11 @@ void Threadpool::Shutdown() {
|
||||
for (auto& worker : workers_) {
|
||||
if (worker.joinable()) worker.join();
|
||||
}
|
||||
|
||||
// Discard all completed tasks.
|
||||
absl::MutexLock lock(&completed_tasks_mutex_);
|
||||
std::queue<std::unique_ptr<Task>> empty;
|
||||
std::swap(completed_tasks_, empty);
|
||||
}
|
||||
|
||||
void Threadpool::QueueTask(std::unique_ptr<Task> task) {
|
||||
@@ -77,6 +82,21 @@ std::unique_ptr<Task> Threadpool::GetCompletedTask() {
|
||||
return task;
|
||||
}
|
||||
|
||||
void Threadpool::SetTaskCompletedCallback(TaskCompletedCallback cb) {
|
||||
absl::MutexLock lock(&completed_tasks_mutex_);
|
||||
on_task_completed_ = std::move(cb);
|
||||
}
|
||||
|
||||
bool Threadpool::WaitForQueuedTasksAtMost(size_t count,
|
||||
absl::Duration timeout) const {
|
||||
absl::MutexLock lock(&task_queue_mutex_);
|
||||
auto cond = [this, count]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(task_queue_mutex_) {
|
||||
return shutdown_ || outstanding_task_count_ <= count;
|
||||
};
|
||||
return task_queue_mutex_.AwaitWithTimeout(absl::Condition(&cond), timeout) &&
|
||||
outstanding_task_count_ <= count;
|
||||
}
|
||||
|
||||
void Threadpool::ThreadWorkerMain() {
|
||||
bool task_finished = false;
|
||||
for (;;) {
|
||||
@@ -85,7 +105,8 @@ void Threadpool::ThreadWorkerMain() {
|
||||
absl::MutexLock lock(&task_queue_mutex_);
|
||||
|
||||
// Decrease task count here, so we don't have to lock again at the end of
|
||||
// the loop.
|
||||
// the loop. It is important to first push the task, then decrease this
|
||||
// count. Otherwise, there's a race between Wait() and GetCompletedTask().
|
||||
if (task_finished) {
|
||||
assert(outstanding_task_count_ > 0);
|
||||
--outstanding_task_count_;
|
||||
@@ -104,17 +125,18 @@ void Threadpool::ThreadWorkerMain() {
|
||||
}
|
||||
|
||||
// Run task, but make it cancellable.
|
||||
task->ThreadRun([this]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(
|
||||
task_queue_mutex_) -> bool { return shutdown_; });
|
||||
|
||||
{
|
||||
task->ThreadRun([this]() ABSL_LOCKS_EXCLUDED(task_queue_mutex_) -> bool {
|
||||
absl::MutexLock lock(&task_queue_mutex_);
|
||||
if (shutdown_) break;
|
||||
}
|
||||
return shutdown_;
|
||||
});
|
||||
|
||||
// Push task to completed queue.
|
||||
absl::MutexLock lock(&completed_tasks_mutex_);
|
||||
completed_tasks_.push(std::move(task));
|
||||
if (on_task_completed_) {
|
||||
on_task_completed_(std::move(task));
|
||||
} else {
|
||||
completed_tasks_.push(std::move(task));
|
||||
}
|
||||
task_finished = true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,7 +18,6 @@
|
||||
#define COMMON_THREADPOOL_H_
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
@@ -57,7 +56,8 @@ class Threadpool {
|
||||
void QueueTask(std::unique_ptr<Task> task)
|
||||
ABSL_LOCKS_EXCLUDED(task_queue_mutex_);
|
||||
|
||||
// If available, returns the next completed task.
|
||||
// Returns the next completed task if available or nullptr all are either
|
||||
// queued or in progress.
|
||||
// For a single worker thread (|num_threads| == 1), tasks are completed in
|
||||
// FIFO order. This is no longer the case for multiple threads
|
||||
// (|num_threads| > 1). Tasks that got queued later might complete first.
|
||||
@@ -71,6 +71,14 @@ class Threadpool {
|
||||
std::unique_ptr<Task> GetCompletedTask()
|
||||
ABSL_LOCKS_EXCLUDED(completed_tasks_mutex_);
|
||||
|
||||
using TaskCompletedCallback = std::function<void(std::unique_ptr<Task>)>;
|
||||
|
||||
// Set a callback that is called immediately in a background thread when a
|
||||
// task is completed. The task will not be put onto the completed queue, so
|
||||
// if this callback is set, do not call (Try)GetCompletedTask.
|
||||
void SetTaskCompletedCallback(TaskCompletedCallback cb)
|
||||
ABSL_LOCKS_EXCLUDED(completed_tasks_mutex_);
|
||||
|
||||
// Returns the total number of worker threads in the pool.
|
||||
size_t NumThreads() const { return workers_.size(); }
|
||||
|
||||
@@ -80,6 +88,14 @@ class Threadpool {
|
||||
return outstanding_task_count_;
|
||||
}
|
||||
|
||||
// Block until the number of queued tasks drops below or equal to |count|, or
|
||||
// until the timeout is exceeded, or until Shutdown() is called, whatever
|
||||
// comes sooner. Returns true if less than or equal to |count| tasks are
|
||||
// queued.
|
||||
bool WaitForQueuedTasksAtMost(
|
||||
size_t count, absl::Duration timeout = absl::InfiniteDuration()) const
|
||||
ABSL_LOCKS_EXCLUDED(mutex_);
|
||||
|
||||
private:
|
||||
// Background thread worker method. Picks tasks and runs them.
|
||||
void ThreadWorkerMain()
|
||||
@@ -94,6 +110,8 @@ class Threadpool {
|
||||
absl::Mutex completed_tasks_mutex_;
|
||||
std::queue<std::unique_ptr<Task>> completed_tasks_
|
||||
ABSL_GUARDED_BY(completed_tasks_mutex_);
|
||||
TaskCompletedCallback on_task_completed_
|
||||
ABSL_GUARDED_BY(completed_tasks_mutex_);
|
||||
|
||||
std::vector<std::thread> workers_;
|
||||
};
|
||||
|
||||
@@ -151,5 +151,37 @@ TEST_F(ThreadpoolTest, GetCompletedTask) {
|
||||
EXPECT_EQ(completed_task.get(), task);
|
||||
}
|
||||
|
||||
TEST_F(ThreadpoolTest, SetTaskCompletedCallback) {
|
||||
auto task_func = [](Task::IsCancelledPredicate) { /* empty */ };
|
||||
|
||||
Semaphore task_finished(0);
|
||||
Threadpool pool(1);
|
||||
std::atomic_bool finished = false;
|
||||
pool.SetTaskCompletedCallback(
|
||||
[&task_finished, &finished](std::unique_ptr<Task> task) {
|
||||
finished = true;
|
||||
task_finished.Signal();
|
||||
});
|
||||
pool.QueueTask(std::make_unique<TestTask>(task_func));
|
||||
task_finished.Wait();
|
||||
EXPECT_TRUE(finished);
|
||||
EXPECT_FALSE(pool.TryGetCompletedTask());
|
||||
}
|
||||
|
||||
TEST_F(ThreadpoolTest, WaitForQueuedTasksAtMost) {
|
||||
Semaphore task_signal(0);
|
||||
auto task_func = [&task_signal](Task::IsCancelledPredicate) {
|
||||
task_signal.Wait();
|
||||
};
|
||||
Threadpool pool(1);
|
||||
pool.QueueTask(std::make_unique<TestTask>(task_func));
|
||||
pool.QueueTask(std::make_unique<TestTask>(task_func));
|
||||
EXPECT_FALSE(pool.WaitForQueuedTasksAtMost(1, absl::Milliseconds(10)));
|
||||
task_signal.Signal();
|
||||
EXPECT_TRUE(pool.WaitForQueuedTasksAtMost(1, absl::Milliseconds(5000)));
|
||||
EXPECT_EQ(pool.NumQueuedTasks(), 1);
|
||||
task_signal.Signal();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace cdc_ft
|
||||
|
||||
Reference in New Issue
Block a user