Files
netris-cdc-file-transfer/cdc_rsync/progress_tracker.cc
Christian Schneider 4326e972ac Releasing the former Stadia file transfer tools
The tools allow efficient and fast synchronization of large directory
trees from a Windows workstation to a Linux target machine.

cdc_rsync* support efficient copy of files by using content-defined
chunking (CDC) to identify chunks within files that can be reused.

asset_stream_manager + cdc_fuse_fs support efficient streaming of a
local directory to a remote virtual file system based on FUSE. It also
employs CDC to identify and reuse unchanged data chunks.
2022-11-03 10:39:10 +01:00

550 lines
17 KiB
C++

// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cdc_rsync/progress_tracker.h"
#include <algorithm>
#include <cassert>
#include "absl/strings/str_format.h"
#include "common/util.h"
#include "json/json.h"
namespace cdc_ft {
namespace {
// Count signature progress 1/40 because it's probably faster than the rest.
// This assumes a sig speed of 800 MB/sec and a diffing speed of 20 MB/sec,
// but since we don't know the exact numbers, we're estimating them.
constexpr int kSigFactor = 40;
// Fills up |str| with spaces up to a string length of |size|.
void PaddRight(std::string* str, size_t size) {
if (str->size() < size) {
str->insert(str->size(), size - str->size(), ' ');
}
}
// Shortens |filepath| if it is longer than |max_len|.
// TODO: Improve this, e.g. by replacing directories in the middle by "..".
// TODO: Also make sure it plays nicely with UTF-8 code points.
std::string ShortenFilePath(const std::string filepath, size_t max_len) {
if (filepath.size() > max_len && max_len >= 3) {
return "..." + filepath.substr(filepath.size() - max_len + 3);
}
return filepath;
}
// Formatting might be messed up for >9999 ZB. Please don't sync large files!
const char* kSizeUnits[] = {"B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB"};
// Divides |size| by 1024 as long as it has more than 4 digits and returns the
// corresponding unit, e.g. 10240 -> 10KB.
const char* FormatIntBytes(uint64_t* size) {
int unitIdx = 0;
while (*size > 9999 && unitIdx < std::size(kSizeUnits) - 1) {
++unitIdx;
*size /= 1024;
}
return kSizeUnits[unitIdx];
}
// Divides |size| by 1024 as long as it has more than 3 digits and returns the
// corresponding unit, e.g. 1500 -> 1.5KB.
const char* FormatDoubleBytes(double* size) {
int unitIdx = 0;
while (*size > 999.9 && unitIdx < std::size(kSizeUnits) - 1) {
++unitIdx;
*size /= 1024.0;
}
return kSizeUnits[unitIdx];
}
// Formats |sec| seconds into hh::mm:ss if more than one hour or else mm:ss.
std::string FormatTime(double sec) {
int isec = static_cast<int>(sec);
int ihour = isec / 3600;
isec -= ihour * 3600;
int imin = isec / 60;
isec -= imin * 60;
if (ihour > 0) {
return absl::StrFormat("%02i:%02i:%02i", ihour, imin, isec);
}
return absl::StrFormat("%02i:%02i", imin, isec);
}
// Returns curr/total as double number, clamped to [0,1].
double GetProgress(uint64_t curr, uint64_t total) {
double progress = static_cast<double>(curr) / std::max<uint64_t>(1, total);
return std::min(std::max(progress, 0.0), 1.0);
}
} // namespace
ConsoleProgressPrinter::ConsoleProgressPrinter(bool quiet, bool is_tty)
: ProgressPrinter(quiet, is_tty) {}
void ConsoleProgressPrinter::Print(std::string text, bool newline,
int output_width) {
if (quiet()) {
return;
}
char linechar = newline || !is_tty() ? '\n' : '\r';
if (is_tty()) PaddRight(&text, output_width);
printf("%s%c", text.c_str(), linechar);
if (!is_tty()) fflush(stdout);
}
ProgressTracker::ProgressTracker(ProgressPrinter* printer, int verbosity,
bool json, int fixed_output_width,
SteadyClock* clock)
: printer_(printer),
only_total_progress_(verbosity == 0),
json_(json),
fixed_output_width_(fixed_output_width),
display_delay_sec_(printer_->is_tty() ? 0.1 : 1.0),
total_timer_(clock),
file_timer_(clock),
sig_timer_(clock),
print_timer_(clock) {}
ProgressTracker::~ProgressTracker() = default;
void ProgressTracker::StartFindFiles() {
assert(state_ == State::kIdle);
state_ = State::kSearch;
files_found_ = 0;
}
void ProgressTracker::ReportFileFound() {
assert(state_ == State::kSearch);
++files_found_;
UpdateOutput(false);
}
void ProgressTracker::ReportDirFound() {
assert(state_ == State::kSearch);
++dirs_found_;
UpdateOutput(false);
}
void ProgressTracker::ReportFileStats(
uint32_t num_missing_files, uint32_t num_extraneous_files,
uint32_t num_matching_files, uint32_t num_changed_files,
uint64_t total_missing_bytes, uint64_t total_changed_client_bytes,
uint64_t total_changed_server_bytes, uint32_t num_missing_dirs,
uint32_t num_extraneous_dirs, uint32_t num_matching_dirs,
bool whole_file_arg, bool checksum_arg, bool delete_arg) {
const char* fmt[] = {
"%6u file(s) and %u folder(s) are not present on the instance and will "
"be copied.",
"%6u file(s) changed and will be updated.",
"%6u file(s) and %u folder(s) match and do not have to be updated.",
"%6u file(s) and %u folder(s) on the instance do not exist on this "
"machine."};
if (whole_file_arg) {
fmt[1] = "%6u file(s) changed and will be copied due to -W/--whole-file.";
}
if (checksum_arg) {
fmt[2] =
"%6u file(s) and %u folder(s) have matching modified time and size, "
"but will be synced due to -c/--checksum.";
}
if (checksum_arg & whole_file_arg) {
fmt[2] =
"%6u file(s) and %u folder(s) have matching modified time and size, "
"but will be copied due to -c/--checksum and -W/--whole-file.";
}
if (delete_arg) {
fmt[3] =
"%6u file(s) and %u folder(s) on the instance do not exist on this "
"machine and will be deleted due to --delete.";
}
Print(absl::StrFormat(fmt[0], num_missing_files, num_missing_dirs), true);
Print(absl::StrFormat(fmt[1], num_changed_files), true);
Print(absl::StrFormat(fmt[2], num_matching_files, num_matching_dirs), true);
Print(absl::StrFormat(fmt[3], num_extraneous_files, num_extraneous_dirs),
true);
total_bytes_to_copy_ = total_missing_bytes;
total_bytes_to_diff_ = total_changed_client_bytes;
total_sig_bytes_ = total_changed_server_bytes;
total_files_to_delete_ = num_extraneous_files;
total_dirs_to_delete_ = num_extraneous_dirs;
}
void ProgressTracker::StartCopy(const std::string& filepath,
uint64_t filesize) {
assert(state_ == State::kIdle);
state_ = State::kCopy;
file_timer_.Reset();
sig_time_sec_ = 0;
curr_filepath_ = filepath;
curr_filesize_ = filesize;
curr_bytes_copied_ = 0;
}
void ProgressTracker::ReportCopyProgress(uint64_t num_bytes_copied) {
assert(state_ == State::kCopy);
curr_bytes_copied_ += num_bytes_copied;
total_bytes_copied_ += num_bytes_copied;
UpdateOutput(false);
}
void ProgressTracker::StartSync(const std::string& filepath,
uint64_t client_size, uint64_t server_size) {
assert(state_ == State::kIdle);
state_ = State::kSyncDiff;
file_timer_.Reset();
sig_timer_.Reset();
sig_time_sec_ = 0;
curr_filepath_ = filepath;
curr_filesize_ = client_size;
server_filesize_ = server_size;
curr_sig_bytes_read_ = 0;
curr_bytes_diffed_ = 0;
}
void ProgressTracker::ReportSyncProgress(size_t num_client_bytes_processed,
size_t num_server_bytes_processed) {
assert(state_ == State::kSyncSig || state_ == State::kSyncDiff);
// If diffing is blocked on getting more server chunks, switch to kSyncSig,
// which effectively changes the output from "Dxxx%" to "Sxxx%" and removes
// the ETA.
State new_state = state_;
if (num_client_bytes_processed > 0) {
new_state = State::kSyncDiff;
} else if (num_server_bytes_processed > 0) {
new_state = State::kSyncSig;
}
// Measure time exclusively spent in server-side signature computation. This
// is later taken into account to get a better speed estimation for diffing.
if (state_ == State::kSyncDiff && new_state == State::kSyncSig) {
sig_timer_.Reset();
} else if (state_ == State::kSyncSig && new_state == State::kSyncDiff) {
sig_time_sec_ += sig_timer_.ElapsedSeconds();
}
state_ = new_state;
curr_bytes_diffed_ += num_client_bytes_processed;
total_bytes_diffed_ += num_client_bytes_processed;
curr_sig_bytes_read_ += num_server_bytes_processed;
total_sig_bytes_read_ += num_server_bytes_processed;
UpdateOutput(false);
}
void ProgressTracker::StartDeleteFiles() {
assert(state_ == State::kIdle);
state_ = State::kDelete;
files_deleted_ = 0;
}
void ProgressTracker::ReportFileDeleted(const std::string& filepath) {
curr_filepath_ = filepath;
++files_deleted_;
UpdateOutput(false);
}
void ProgressTracker::ReportDirDeleted(const std::string& filepath) {
curr_filepath_ = filepath;
++dirs_deleted_;
UpdateOutput(false);
}
void ProgressTracker::Finish() {
assert(state_ != State::kIdle);
UpdateOutput(true);
if (state_ == State::kSearch) {
// Total time does not count file search time.
total_timer_.Reset();
}
state_ = State::kIdle;
}
void ProgressTracker::UpdateOutput(bool finished) {
if (printer_->quiet()) {
return;
}
if (state_ == State::kDelete) {
if (total_files_to_delete_ + total_dirs_to_delete_ == 0 ||
(!only_total_progress_ && finished)) {
// No need to print here, it would result in "0/0" or duplicate lines.
return;
}
if (only_total_progress_) {
if (!finished && print_timer_.ElapsedSeconds() < display_delay_sec_) {
return;
}
print_timer_.Reset();
Print(absl::StrFormat("%u/%u file(s) and %u/%u folder(s) deleted.",
files_deleted_, total_files_to_delete_,
dirs_deleted_, total_dirs_to_delete_),
finished);
return;
}
std::string txt =
absl::StrFormat("deleted %u / %u", files_deleted_ + dirs_deleted_,
total_files_to_delete_ + total_dirs_to_delete_);
int width = GetOutputWidth();
int file_width = std::max(12, width - static_cast<int>(txt.size()));
std::string short_path = ShortenFilePath(curr_filepath_, file_width);
PaddRight(&short_path, file_width);
printer_->Print(short_path + txt, true, width);
return;
}
if (only_total_progress_ && state_ != State::kSearch) {
finished &= GetTotalProgress() == 1;
if (!finished && print_timer_.ElapsedSeconds() < display_delay_sec_) {
return;
}
print_timer_.Reset();
if (json_) {
double total_progress, total_sec, total_eta_sec;
GetTotalProgressStats(&total_progress, &total_sec, &total_eta_sec);
Json::Value val;
val["total_progress"] = total_progress;
val["total_duration"] = total_sec;
val["total_eta"] = total_eta_sec;
PrintJson(val, finished);
} else {
Print(GetTotalProgressText(), finished);
}
return;
}
// Always print if finished (to make sure to get the line feed).
if (!finished && print_timer_.ElapsedSeconds() < display_delay_sec_) {
return;
}
print_timer_.Reset();
switch (state_) {
case State::kSearch: {
Print(absl::StrFormat("%u file(s) and %u folder(s) found", files_found_,
dirs_found_),
finished);
break;
}
case State::kCopy: {
// file C 50% 12345MB 123.4MB/s 00:10 ETA 50% TOT 05:00 ETA
double progress = GetProgress(curr_bytes_copied_, curr_filesize_);
OutputFileProgress(OutputType::kCopy, progress, finished);
break;
}
case State::kSyncSig: {
// file S 50% 12345MB ---.-MB/s 00:10 ETA 50% TOT 05:00 ETA
double progress =
GetProgress(curr_bytes_diffed_ + curr_sig_bytes_read_ / kSigFactor,
curr_filesize_ + server_filesize_ / kSigFactor);
OutputFileProgress(OutputType::kSig, progress, finished);
break;
}
case State::kSyncDiff: {
// file D 50% 12345MB 123.4MB/s 00:10 ETA 50% TOT 05:00 ETA
double progress =
GetProgress(curr_bytes_diffed_ + curr_sig_bytes_read_ / kSigFactor,
curr_filesize_ + server_filesize_ / kSigFactor);
OutputFileProgress(OutputType::kDiff, progress, finished);
break;
}
case State::kDelete:
// Should have been handled above.
assert(false);
case State::kIdle:
break;
}
}
void ProgressTracker::OutputFileProgress(OutputType type, double progress,
bool finished) const {
// Just in case some calculation wasn't 100% right.
if (finished) {
progress = 1.0;
}
double file_sec = file_timer_.ElapsedSeconds();
double file_eta_sec = file_sec / std::max(1e-3, progress) - file_sec;
double filespeed = 0;
// Don't bother trying to estimate transfer speed for signature.
if (type == OutputType::kCopy) {
filespeed = curr_filesize_ * progress / std::max(1e-3, file_sec);
} else if (type == OutputType::kDiff) {
// Take the sig time into account to estimate the effective transfer speed,
// using the actual diffing progress, not the combined diffing + signing
// progress.
double diff_progress = GetProgress(curr_bytes_diffed_, curr_filesize_);
double diff_sec = file_sec - sig_time_sec_;
double final_file_sec =
diff_sec / std::max(1e-3, diff_progress) + sig_time_sec_;
filespeed = curr_filesize_ / std::max(1e-3, final_file_sec);
}
if (json_) {
const char* op = type == OutputType::kCopy ? "Copy"
: type == OutputType::kSig ? "Sign"
: "Diff";
double total_progress, total_sec, total_eta_sec;
GetTotalProgressStats(&total_progress, &total_sec, &total_eta_sec);
Json::Value val;
val["file"] = curr_filepath_;
val["operation"] = op;
val["size"] = curr_filesize_;
val["bytes_per_second"] = filespeed;
val["duration"] = file_sec;
val["eta"] = file_eta_sec;
val["total_progress"] = total_progress;
val["total_duration"] = total_sec;
val["total_eta"] = total_eta_sec;
PrintJson(val, finished);
} else {
char ch = type == OutputType::kCopy ? 'C'
: type == OutputType::kSig ? 'S'
: 'D';
int progress_percent = static_cast<int>(progress * 100);
uint64_t filesize = curr_filesize_;
const char* filesize_unit = FormatIntBytes(&filesize);
std::string filetime_str = FormatTime(file_sec);
const char* filespeed_unit = "B";
std::string filespeed_str = "---.-";
// Don't bother trying to estimate transfer speed for signature.
if (type != OutputType::kSig) {
filespeed_unit = FormatDoubleBytes(&filespeed);
filespeed_str = absl::StrFormat("%5.1f", filespeed);
}
const char* fileeta_str = " ";
if (progress < 1.0) {
// While in progress, time is an ETA.
filetime_str = FormatTime(file_eta_sec);
fileeta_str = "ETA";
}
// file S 50% 12345MB ---.-MB/s --:-- ETA 50% TOT 005:00 ETA
std::string txt = absl::StrFormat(
" %c%3i%% %5i%-2s "
"%s%-2s/s %s %3s "
"%s",
ch, progress_percent, filesize, filesize_unit, filespeed_str.c_str(),
filespeed_unit, filetime_str.c_str(), fileeta_str,
GetTotalProgressText().c_str());
int width = GetOutputWidth();
int file_width = std::max(12, width - static_cast<int>(txt.size()));
std::string short_path = ShortenFilePath(curr_filepath_, file_width);
PaddRight(&short_path, file_width);
printer_->Print(short_path + txt, finished, width);
}
}
void ProgressTracker::Print(std::string text, bool finished) const {
if (printer_->quiet()) return;
printer_->Print(std::move(text), finished, GetOutputWidth());
}
void ProgressTracker::PrintJson(const Json::Value& val, bool finished) const {
if (printer_->quiet()) return;
Json::FastWriter writer;
std::string json = writer.write(val);
if (!json.empty() && json.back() == '\n') json.pop_back();
printer_->Print(json, finished, 0);
}
double ProgressTracker::GetTotalProgress() const {
return GetProgress(total_bytes_copied_ + total_bytes_diffed_ +
total_sig_bytes_read_ / kSigFactor,
total_bytes_to_copy_ + total_bytes_to_diff_ +
total_sig_bytes_ / kSigFactor);
}
void ProgressTracker::GetTotalProgressStats(double* total_progress,
double* total_sec,
double* total_eta_sec) const {
*total_progress = GetTotalProgress();
*total_sec = total_timer_.ElapsedSeconds();
*total_eta_sec = *total_sec / std::max(1e-3, *total_progress) - *total_sec;
}
std::string ProgressTracker::GetTotalProgressText() const {
double total_progress, total_sec, total_eta_sec;
GetTotalProgressStats(&total_progress, &total_sec, &total_eta_sec);
int total_progress_percent = static_cast<int>(total_progress * 100);
const char* total_eta_str = " ";
if (total_progress < 1.0) {
// total_sec is an ETA.
total_sec = total_eta_sec;
total_eta_str = "ETA";
}
std::string total_sec_str = FormatTime(total_sec);
return absl::StrFormat("%3i%% TOT %s %s", total_progress_percent,
total_sec_str.c_str(), total_eta_str);
}
int ProgressTracker::GetOutputWidth() const {
return fixed_output_width_ > 0 ? fixed_output_width_
: Util::GetConsoleWidth();
}
} // namespace cdc_ft