From 4326e972ac81aebd335921d1319b3db037d6888f Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Fri, 7 Oct 2022 10:47:04 +0200 Subject: [PATCH] Releasing the former Stadia file transfer tools The tools allow efficient and fast synchronization of large directory trees from a Windows workstation to a Linux target machine. cdc_rsync* support efficient copy of files by using content-defined chunking (CDC) to identify chunks within files that can be reused. asset_stream_manager + cdc_fuse_fs support efficient streaming of a local directory to a remote virtual file system based on FUSE. It also employs CDC to identify and reuse unchanged data chunks. --- .bazelrc | 70 + .clang-format | 29 + .gitignore | 13 + .gitmodules | 12 + LICENSE | 202 +++ NMakeBazelProject.targets | 60 + README.md | 14 + WORKSPACE | 99 ++ absl_helper/BUILD | 12 + absl_helper/jedec_size_flag.cc | 93 + absl_helper/jedec_size_flag.h | 61 + all_files.vcxitems | 274 +++ all_files.vcxitems.user | 6 + asset_stream_manager/.gitignore | 3 + asset_stream_manager/BUILD | 186 ++ asset_stream_manager/asset_stream_config.cc | 184 ++ asset_stream_manager/asset_stream_config.h | 107 ++ .../asset_stream_manager.vcxproj | 90 + .../asset_stream_manager.vcxproj.filters | 2 + asset_stream_manager/asset_stream_server.cc | 41 + asset_stream_manager/asset_stream_server.h | 91 + .../background_service_impl.cc | 56 + .../background_service_impl.h | 68 + asset_stream_manager/cdc_fuse_manager.cc | 225 +++ asset_stream_manager/cdc_fuse_manager.h | 98 ++ .../grpc_asset_stream_server.cc | 305 ++++ .../grpc_asset_stream_server.h | 69 + ...ocal_assets_stream_manager_service_impl.cc | 259 +++ ...local_assets_stream_manager_service_impl.h | 90 + asset_stream_manager/main.cc | 182 ++ asset_stream_manager/metrics_recorder.cc | 69 + asset_stream_manager/metrics_recorder.h | 77 + asset_stream_manager/metrics_recorder_test.cc | 131 ++ asset_stream_manager/multi_session.cc | 699 ++++++++ asset_stream_manager/multi_session.h | 266 +++ asset_stream_manager/multi_session_test.cc | 488 ++++++ asset_stream_manager/session.cc | 131 ++ asset_stream_manager/session.h | 90 + asset_stream_manager/session_config.h | 63 + .../session_management_server.cc | 76 + .../session_management_server.h | 63 + asset_stream_manager/session_manager.cc | 193 ++ asset_stream_manager/session_manager.h | 100 ++ .../testdata/multi_session/non_empty/a.txt | 1 + .../multi_session/non_empty/subdir/b.txt | 1 + .../multi_session/non_empty/subdir/c.txt | 1 + .../multi_session/non_empty/subdir/d.txt | 1 + asset_stream_manager/testdata/root.txt | 0 .../testing_asset_stream_server.cc | 50 + .../testing_asset_stream_server.h | 60 + cdc_fuse_fs/.gitignore | 3 + cdc_fuse_fs/BUILD | 136 ++ cdc_fuse_fs/asset.cc | 520 ++++++ cdc_fuse_fs/asset.h | 182 ++ cdc_fuse_fs/asset_stream_client.cc | 112 ++ cdc_fuse_fs/asset_stream_client.h | 62 + cdc_fuse_fs/asset_test.cc | 820 +++++++++ cdc_fuse_fs/cdc_fuse_fs.cc | 1553 +++++++++++++++++ cdc_fuse_fs/cdc_fuse_fs.h | 86 + cdc_fuse_fs/cdc_fuse_fs.vcxproj | 61 + cdc_fuse_fs/cdc_fuse_fs.vcxproj.filters | 2 + cdc_fuse_fs/cdc_fuse_fs_test.cc | 1146 ++++++++++++ cdc_fuse_fs/config_stream_client.cc | 122 ++ cdc_fuse_fs/config_stream_client.h | 67 + cdc_fuse_fs/constants.h | 33 + cdc_fuse_fs/main.cc | 202 +++ cdc_fuse_fs/mock_libfuse.cc | 111 ++ cdc_fuse_fs/mock_libfuse.h | 123 ++ cdc_indexer/BUILD | 35 + cdc_indexer/README.md | 72 + cdc_indexer/indexer.cc | 434 +++++ cdc_indexer/indexer.h | 145 ++ cdc_indexer/main.cc | 435 +++++ cdc_rsync/.gitignore | 4 + cdc_rsync/BUILD | 191 ++ cdc_rsync/README.md | 5 + cdc_rsync/base/BUILD | 92 + cdc_rsync/base/cdc_interface.cc | 670 +++++++ cdc_rsync/base/cdc_interface.h | 73 + cdc_rsync/base/cdc_interface_test.cc | 118 ++ cdc_rsync/base/fake_socket.cc | 70 + cdc_rsync/base/fake_socket.h | 57 + cdc_rsync/base/message_pump.cc | 473 +++++ cdc_rsync/base/message_pump.h | 275 +++ cdc_rsync/base/message_pump_test.cc | 272 +++ cdc_rsync/base/server_exit_code.h | 63 + cdc_rsync/base/socket.h | 45 + .../base/testdata/cdc_interface/new_file.txt | 1 + .../base/testdata/cdc_interface/old_file.txt | 1 + cdc_rsync/base/testdata/root.txt | 0 cdc_rsync/cdc_rsync.cc | 125 ++ cdc_rsync/cdc_rsync.h | 107 ++ cdc_rsync/cdc_rsync_client.cc | 789 +++++++++ cdc_rsync/cdc_rsync_client.h | 132 ++ cdc_rsync/client_file_info.h | 43 + cdc_rsync/client_socket.cc | 174 ++ cdc_rsync/client_socket.h | 53 + cdc_rsync/cpp.hint | 2 + cdc_rsync/dllmain.cc | 29 + cdc_rsync/error_messages.h | 54 + cdc_rsync/file_finder_and_sender.cc | 248 +++ cdc_rsync/file_finder_and_sender.h | 90 + cdc_rsync/file_finder_and_sender_test.cc | 408 +++++ cdc_rsync/parallel_file_opener.cc | 122 ++ cdc_rsync/parallel_file_opener.h | 74 + cdc_rsync/parallel_file_opener_test.cc | 78 + cdc_rsync/progress_tracker.cc | 549 ++++++ cdc_rsync/progress_tracker.h | 244 +++ cdc_rsync/progress_tracker_test.cc | 491 ++++++ cdc_rsync/protos/BUILD | 14 + cdc_rsync/protos/messages.proto | 193 ++ .../testdata/file_finder_and_sender/a.txt | 1 + .../testdata/file_finder_and_sender/b.txt | 1 + .../testdata/file_finder_and_sender/c.txt | 1 + .../file_finder_and_sender/subdir/d.txt | 1 + .../file_finder_and_sender/subdir/e.txt | 1 + .../testdata/parallel_file_opener/file1.txt | 1 + .../testdata/parallel_file_opener/file2.txt | 1 + .../testdata/parallel_file_opener/file3.txt | 1 + cdc_rsync/testdata/root.txt | 0 cdc_rsync/zstd_stream.cc | 182 ++ cdc_rsync/zstd_stream.h | 65 + cdc_rsync/zstd_stream_test.cc | 72 + cdc_rsync_cli/.gitignore | 3 + cdc_rsync_cli/BUILD | 44 + cdc_rsync_cli/cdc_rsync_cli.vcxproj | 87 + cdc_rsync_cli/cdc_rsync_cli.vcxproj.filters | 2 + cdc_rsync_cli/main.cc | 72 + cdc_rsync_cli/params.cc | 442 +++++ cdc_rsync_cli/params.h | 54 + cdc_rsync_cli/params_test.cc | 512 ++++++ .../testdata/params/empty_source_files.txt | 3 + .../testdata/params/exclude_files.txt | 2 + .../testdata/params/include_files.txt | 1 + .../testdata/params/source_files.txt | 6 + cdc_rsync_cli/testdata/root.txt | 0 cdc_rsync_server/.gitignore | 4 + cdc_rsync_server/BUILD | 158 ++ cdc_rsync_server/cdc_rsync_server.cc | 758 ++++++++ cdc_rsync_server/cdc_rsync_server.h | 121 ++ cdc_rsync_server/cdc_rsync_server.vcxproj | 59 + .../cdc_rsync_server.vcxproj.filters | 2 + cdc_rsync_server/file_deleter_and_sender.cc | 111 ++ cdc_rsync_server/file_deleter_and_sender.h | 65 + .../file_deleter_and_sender_test.cc | 263 +++ cdc_rsync_server/file_diff_generator.cc | 287 +++ cdc_rsync_server/file_diff_generator.h | 75 + cdc_rsync_server/file_diff_generator_test.cc | 619 +++++++ cdc_rsync_server/file_finder.cc | 96 + cdc_rsync_server/file_finder.h | 53 + cdc_rsync_server/file_finder_test.cc | 121 ++ cdc_rsync_server/file_info.h | 83 + cdc_rsync_server/main.cc | 105 ++ cdc_rsync_server/server_socket.cc | 214 +++ cdc_rsync_server/server_socket.h | 62 + .../file_diff_generator/base_dir/a.txt | 1 + .../file_diff_generator/base_dir/b.txt | 1 + .../file_diff_generator/base_dir/c.txt | 1 + .../file_diff_generator/base_dir/e.txt | 1 + .../file_diff_generator/base_dir/f.txt | 1 + .../file_diff_generator/base_dir/g.txt | 1 + .../file_diff_generator/copy_dest/e.txt | 1 + .../file_diff_generator/copy_dest/f.txt | 1 + .../file_diff_generator/copy_dest/g.txt | 1 + .../file_diff_generator/copy_dest/h.txt | 1 + .../file_diff_generator/copy_dest/i.txt | 1 + .../file_diff_generator/copy_dest/j.txt | 1 + .../testdata/file_finder/base_dir/a.txt | 1 + .../testdata/file_finder/base_dir/b.txt | 1 + .../testdata/file_finder/base_dir/dir1/c.txt | 1 + .../testdata/file_finder/base_dir/dir2/d.txt | 1 + .../testdata/file_finder/copy_dest/a.txt | 1 + .../testdata/file_finder/copy_dest/dir1/c.txt | 1 + .../testdata/file_finder/copy_dest/dir1/f.txt | 1 + .../testdata/file_finder/copy_dest/dir3/d.txt | 1 + .../testdata/file_finder/copy_dest/e.txt | 1 + cdc_rsync_server/testdata/root.txt | 0 cdc_rsync_server/unzstd_stream.cc | 89 + cdc_rsync_server/unzstd_stream.h | 50 + common/BUILD | 560 ++++++ common/buffer.cc | 88 + common/buffer.h | 87 + common/buffer_test.cc | 167 ++ common/clock.cc | 64 + common/clock.h | 61 + common/dir_iter.cc | 287 +++ common/dir_iter.h | 145 ++ common/dir_iter_test.cc | 267 +++ common/errno_mapping.cc | 169 ++ common/errno_mapping.h | 34 + common/errno_mapping_test.cc | 54 + common/file_watcher_win.cc | 629 +++++++ common/file_watcher_win.h | 111 ++ common/file_watcher_win_test.cc | 609 +++++++ common/gamelet_component.cc | 92 + common/gamelet_component.h | 70 + common/gamelet_component_test.cc | 124 ++ common/grpc_status.h | 55 + common/log.cc | 170 ++ common/log.h | 144 ++ common/log_test.cc | 134 ++ common/path.cc | 1190 +++++++++++++ common/path.h | 417 +++++ common/path_filter.cc | 80 + common/path_filter.h | 72 + common/path_filter_test.cc | 130 ++ common/path_test.cc | 1521 ++++++++++++++++ common/platform.h | 30 + common/port_manager.h | 113 ++ common/port_manager_test.cc | 256 +++ common/port_manager_win.cc | 302 ++++ common/process.h | 145 ++ common/process_test.cc | 342 ++++ common/process_win.cc | 848 +++++++++ common/remote_util.cc | 229 +++ common/remote_util.h | 123 ++ common/remote_util_test.cc | 106 ++ common/scoped_handle_win.cc | 77 + common/scoped_handle_win.h | 61 + common/sdk_util.cc | 93 + common/sdk_util.h | 96 + common/sdk_util_test.cc | 115 ++ common/semaphore.cc | 35 + common/semaphore.h | 52 + common/semaphore_test.cc | 122 ++ common/stats_collector.cc | 158 ++ common/stats_collector.h | 54 + common/status.cc | 54 + common/status.h | 101 ++ common/status_macros.h | 42 + common/status_test_macros.h | 82 + common/stopwatch.cc | 35 + common/stopwatch.h | 52 + common/stopwatch_test.cc | 47 + common/stub_process.cc | 158 ++ common/stub_process.h | 78 + common/test_main.cc | 53 + common/test_main.h | 32 + common/testdata/dir_iter/a/aa/aaa1.txt | 0 common/testdata/dir_iter/a/aa/aaa2.txt | 0 common/testdata/dir_iter/a/aa1.txt | 0 common/testdata/dir_iter/a/aa2.txt | 0 common/testdata/dir_iter/a/ab/aab1.txt | 0 common/testdata/dir_iter/a/ab/aab2.txt | 0 common/testdata/dir_iter/b/ba/bba1.txt | 0 common/testdata/dir_iter/b/ba/bba2.txt | 0 common/testdata/dir_iter/b/bb/bbb1.txt | 0 common/testdata/dir_iter/b/bb/bbb2.txt | 0 common/testdata/dir_iter/c/c1.txt | 0 common/testdata/dir_iter/c/c2.txt | 0 common/testdata/dir_iter/d/d1.txt | 0 common/testdata/dir_iter/d/d2.txt | 0 common/testdata/dir_iter/root.txt | 0 .../gamelet_component/other/cdc_rsync_server | 1 + .../gamelet_component/valid/cdc_rsync_server | 1 + common/testdata/root.txt | 0 common/testing_clock.cc | 44 + common/testing_clock.h | 56 + common/thread_safe_map.h | 52 + common/thread_safe_map_test.cc | 59 + common/threadpool.cc | 122 ++ common/threadpool.h | 103 ++ common/threadpool_test.cc | 155 ++ common/url.cc | 59 + common/url.h | 42 + common/url_test.cc | 52 + common/util.cc | 298 ++++ common/util.h | 137 ++ common/util_test.cc | 288 +++ data_store/BUILD | 128 ++ data_store/data_provider.cc | 364 ++++ data_store/data_provider.h | 157 ++ data_store/data_provider_test.cc | 370 ++++ data_store/data_store_reader.cc | 98 ++ data_store/data_store_reader.h | 129 ++ data_store/data_store_writer.cc | 44 + data_store/data_store_writer.h | 83 + data_store/disk_data_store.cc | 362 ++++ data_store/disk_data_store.h | 191 ++ data_store/disk_data_store_test.cc | 453 +++++ data_store/grpc_reader.cc | 103 ++ data_store/grpc_reader.h | 57 + data_store/mem_data_store.cc | 151 ++ data_store/mem_data_store.h | 82 + data_store/mem_data_store_test.cc | 188 ++ docs/code-of-conduct.md | 93 + docs/contributing.md | 28 + fastcdc/.gitignore | 6 + fastcdc/BUILD | 24 + fastcdc/fastcdc.h | 421 +++++ fastcdc/fastcdc_test.cc | 254 +++ file_transfer.sln | 84 + manifest.natvis | 17 + manifest/BUILD | 220 +++ manifest/asset_builder.cc | 115 ++ manifest/asset_builder.h | 151 ++ manifest/content_id.cc | 99 ++ manifest/content_id.h | 90 + manifest/content_id_test.cc | 79 + manifest/fake_manifest_builder.cc | 182 ++ manifest/fake_manifest_builder.h | 73 + manifest/fake_manifest_builder_test.cc | 120 ++ manifest/file_chunk_map.cc | 253 +++ manifest/file_chunk_map.h | 206 +++ manifest/file_chunk_map_test.cc | 252 +++ manifest/manifest_builder.cc | 740 ++++++++ manifest/manifest_builder.h | 296 ++++ manifest/manifest_builder_test.cc | 1138 ++++++++++++ manifest/manifest_iterator.cc | 163 ++ manifest/manifest_iterator.h | 94 + manifest/manifest_printer.cc | 59 + manifest/manifest_printer.h | 42 + manifest/manifest_proto_defs.h | 52 + manifest/manifest_test_base.cc | 239 +++ manifest/manifest_test_base.h | 155 ++ manifest/manifest_updater.cc | 816 +++++++++ manifest/manifest_updater.h | 268 +++ manifest/manifest_updater_test.cc | 655 +++++++ manifest/stats_printer.cc | 278 +++ manifest/stats_printer.h | 135 ++ .../manifest_updater/executables/game.elf | Bin 0 -> 50 bytes .../manifest_updater/executables/normal.txt | 1 + .../manifest_updater/executables/script.sh | 17 + .../manifest_updater/executables/win.exe | Bin 0 -> 1024 bytes .../testdata/manifest_updater/non_empty/a.txt | 1 + .../manifest_updater/non_empty/subdir/b.txt | 1 + .../manifest_updater/non_empty/subdir/c.txt | 1 + .../manifest_updater/non_empty/subdir/d.txt | 1 + manifest/testdata/root.txt | 0 metrics/BUILD | 49 + metrics/enums.h | 63 + metrics/messages.cc | 101 ++ metrics/messages.h | 157 ++ metrics/messages_test.cc | 222 +++ metrics/metrics.cc | 26 + metrics/metrics.h | 37 + proto/BUILD | 86 + proto/asset_stream_service.proto | 75 + proto/background_service.proto | 44 + proto/local_assets_stream_manager.proto | 59 + proto/manifest.proto | 151 ++ protobuf.natvis | 50 + rm_bazel_out_dir.bat | 34 + tests_asset_streaming_30/.gitignore | 2 + tests_asset_streaming_30/BUILD | 48 + .../tests_asset_streaming_30.vcxproj | 71 + tests_cdc_rsync/.gitignore | 2 + tests_cdc_rsync/BUILD | 44 + tests_cdc_rsync/tests_cdc_rsync.vcxproj | 72 + tests_common/.gitignore | 2 + tests_common/BUILD | 45 + tests_common/tests_common.vcxproj | 71 + third_party/absl | 1 + third_party/blake3/BUILD.bazel | 49 + third_party/dirent/BUILD.bazel | 15 + third_party/fuse/BUILD | 134 ++ third_party/fuse/config.h.linux | 92 + .../fuse/disable_symbol_versioning.patch | 14 + third_party/googletest | 1 + third_party/grpc | 1 + third_party/protobuf | 1 + third_party/zstd/BUILD.bazel | 99 ++ tools/BUILD | 8 + tools/windows_cc_library.bzl | 91 + 364 files changed, 49410 insertions(+) create mode 100644 .bazelrc create mode 100644 .clang-format create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100644 LICENSE create mode 100644 NMakeBazelProject.targets create mode 100644 README.md create mode 100644 WORKSPACE create mode 100644 absl_helper/BUILD create mode 100644 absl_helper/jedec_size_flag.cc create mode 100644 absl_helper/jedec_size_flag.h create mode 100644 all_files.vcxitems create mode 100644 all_files.vcxitems.user create mode 100644 asset_stream_manager/.gitignore create mode 100644 asset_stream_manager/BUILD create mode 100644 asset_stream_manager/asset_stream_config.cc create mode 100644 asset_stream_manager/asset_stream_config.h create mode 100644 asset_stream_manager/asset_stream_manager.vcxproj create mode 100644 asset_stream_manager/asset_stream_manager.vcxproj.filters create mode 100644 asset_stream_manager/asset_stream_server.cc create mode 100644 asset_stream_manager/asset_stream_server.h create mode 100644 asset_stream_manager/background_service_impl.cc create mode 100644 asset_stream_manager/background_service_impl.h create mode 100644 asset_stream_manager/cdc_fuse_manager.cc create mode 100644 asset_stream_manager/cdc_fuse_manager.h create mode 100644 asset_stream_manager/grpc_asset_stream_server.cc create mode 100644 asset_stream_manager/grpc_asset_stream_server.h create mode 100644 asset_stream_manager/local_assets_stream_manager_service_impl.cc create mode 100644 asset_stream_manager/local_assets_stream_manager_service_impl.h create mode 100644 asset_stream_manager/main.cc create mode 100644 asset_stream_manager/metrics_recorder.cc create mode 100644 asset_stream_manager/metrics_recorder.h create mode 100644 asset_stream_manager/metrics_recorder_test.cc create mode 100644 asset_stream_manager/multi_session.cc create mode 100644 asset_stream_manager/multi_session.h create mode 100644 asset_stream_manager/multi_session_test.cc create mode 100644 asset_stream_manager/session.cc create mode 100644 asset_stream_manager/session.h create mode 100644 asset_stream_manager/session_config.h create mode 100644 asset_stream_manager/session_management_server.cc create mode 100644 asset_stream_manager/session_management_server.h create mode 100644 asset_stream_manager/session_manager.cc create mode 100644 asset_stream_manager/session_manager.h create mode 100644 asset_stream_manager/testdata/multi_session/non_empty/a.txt create mode 100644 asset_stream_manager/testdata/multi_session/non_empty/subdir/b.txt create mode 100644 asset_stream_manager/testdata/multi_session/non_empty/subdir/c.txt create mode 100644 asset_stream_manager/testdata/multi_session/non_empty/subdir/d.txt create mode 100644 asset_stream_manager/testdata/root.txt create mode 100644 asset_stream_manager/testing_asset_stream_server.cc create mode 100644 asset_stream_manager/testing_asset_stream_server.h create mode 100644 cdc_fuse_fs/.gitignore create mode 100644 cdc_fuse_fs/BUILD create mode 100644 cdc_fuse_fs/asset.cc create mode 100644 cdc_fuse_fs/asset.h create mode 100644 cdc_fuse_fs/asset_stream_client.cc create mode 100644 cdc_fuse_fs/asset_stream_client.h create mode 100644 cdc_fuse_fs/asset_test.cc create mode 100644 cdc_fuse_fs/cdc_fuse_fs.cc create mode 100644 cdc_fuse_fs/cdc_fuse_fs.h create mode 100644 cdc_fuse_fs/cdc_fuse_fs.vcxproj create mode 100644 cdc_fuse_fs/cdc_fuse_fs.vcxproj.filters create mode 100644 cdc_fuse_fs/cdc_fuse_fs_test.cc create mode 100644 cdc_fuse_fs/config_stream_client.cc create mode 100644 cdc_fuse_fs/config_stream_client.h create mode 100644 cdc_fuse_fs/constants.h create mode 100644 cdc_fuse_fs/main.cc create mode 100644 cdc_fuse_fs/mock_libfuse.cc create mode 100644 cdc_fuse_fs/mock_libfuse.h create mode 100644 cdc_indexer/BUILD create mode 100644 cdc_indexer/README.md create mode 100644 cdc_indexer/indexer.cc create mode 100644 cdc_indexer/indexer.h create mode 100644 cdc_indexer/main.cc create mode 100644 cdc_rsync/.gitignore create mode 100644 cdc_rsync/BUILD create mode 100644 cdc_rsync/README.md create mode 100644 cdc_rsync/base/BUILD create mode 100644 cdc_rsync/base/cdc_interface.cc create mode 100644 cdc_rsync/base/cdc_interface.h create mode 100644 cdc_rsync/base/cdc_interface_test.cc create mode 100644 cdc_rsync/base/fake_socket.cc create mode 100644 cdc_rsync/base/fake_socket.h create mode 100644 cdc_rsync/base/message_pump.cc create mode 100644 cdc_rsync/base/message_pump.h create mode 100644 cdc_rsync/base/message_pump_test.cc create mode 100644 cdc_rsync/base/server_exit_code.h create mode 100644 cdc_rsync/base/socket.h create mode 100644 cdc_rsync/base/testdata/cdc_interface/new_file.txt create mode 100644 cdc_rsync/base/testdata/cdc_interface/old_file.txt create mode 100644 cdc_rsync/base/testdata/root.txt create mode 100644 cdc_rsync/cdc_rsync.cc create mode 100644 cdc_rsync/cdc_rsync.h create mode 100644 cdc_rsync/cdc_rsync_client.cc create mode 100644 cdc_rsync/cdc_rsync_client.h create mode 100644 cdc_rsync/client_file_info.h create mode 100644 cdc_rsync/client_socket.cc create mode 100644 cdc_rsync/client_socket.h create mode 100644 cdc_rsync/cpp.hint create mode 100644 cdc_rsync/dllmain.cc create mode 100644 cdc_rsync/error_messages.h create mode 100644 cdc_rsync/file_finder_and_sender.cc create mode 100644 cdc_rsync/file_finder_and_sender.h create mode 100644 cdc_rsync/file_finder_and_sender_test.cc create mode 100644 cdc_rsync/parallel_file_opener.cc create mode 100644 cdc_rsync/parallel_file_opener.h create mode 100644 cdc_rsync/parallel_file_opener_test.cc create mode 100644 cdc_rsync/progress_tracker.cc create mode 100644 cdc_rsync/progress_tracker.h create mode 100644 cdc_rsync/progress_tracker_test.cc create mode 100644 cdc_rsync/protos/BUILD create mode 100644 cdc_rsync/protos/messages.proto create mode 100644 cdc_rsync/testdata/file_finder_and_sender/a.txt create mode 100644 cdc_rsync/testdata/file_finder_and_sender/b.txt create mode 100644 cdc_rsync/testdata/file_finder_and_sender/c.txt create mode 100644 cdc_rsync/testdata/file_finder_and_sender/subdir/d.txt create mode 100644 cdc_rsync/testdata/file_finder_and_sender/subdir/e.txt create mode 100644 cdc_rsync/testdata/parallel_file_opener/file1.txt create mode 100644 cdc_rsync/testdata/parallel_file_opener/file2.txt create mode 100644 cdc_rsync/testdata/parallel_file_opener/file3.txt create mode 100644 cdc_rsync/testdata/root.txt create mode 100644 cdc_rsync/zstd_stream.cc create mode 100644 cdc_rsync/zstd_stream.h create mode 100644 cdc_rsync/zstd_stream_test.cc create mode 100644 cdc_rsync_cli/.gitignore create mode 100644 cdc_rsync_cli/BUILD create mode 100644 cdc_rsync_cli/cdc_rsync_cli.vcxproj create mode 100644 cdc_rsync_cli/cdc_rsync_cli.vcxproj.filters create mode 100644 cdc_rsync_cli/main.cc create mode 100644 cdc_rsync_cli/params.cc create mode 100644 cdc_rsync_cli/params.h create mode 100644 cdc_rsync_cli/params_test.cc create mode 100644 cdc_rsync_cli/testdata/params/empty_source_files.txt create mode 100644 cdc_rsync_cli/testdata/params/exclude_files.txt create mode 100644 cdc_rsync_cli/testdata/params/include_files.txt create mode 100644 cdc_rsync_cli/testdata/params/source_files.txt create mode 100644 cdc_rsync_cli/testdata/root.txt create mode 100644 cdc_rsync_server/.gitignore create mode 100644 cdc_rsync_server/BUILD create mode 100644 cdc_rsync_server/cdc_rsync_server.cc create mode 100644 cdc_rsync_server/cdc_rsync_server.h create mode 100644 cdc_rsync_server/cdc_rsync_server.vcxproj create mode 100644 cdc_rsync_server/cdc_rsync_server.vcxproj.filters create mode 100644 cdc_rsync_server/file_deleter_and_sender.cc create mode 100644 cdc_rsync_server/file_deleter_and_sender.h create mode 100644 cdc_rsync_server/file_deleter_and_sender_test.cc create mode 100644 cdc_rsync_server/file_diff_generator.cc create mode 100644 cdc_rsync_server/file_diff_generator.h create mode 100644 cdc_rsync_server/file_diff_generator_test.cc create mode 100644 cdc_rsync_server/file_finder.cc create mode 100644 cdc_rsync_server/file_finder.h create mode 100644 cdc_rsync_server/file_finder_test.cc create mode 100644 cdc_rsync_server/file_info.h create mode 100644 cdc_rsync_server/main.cc create mode 100644 cdc_rsync_server/server_socket.cc create mode 100644 cdc_rsync_server/server_socket.h create mode 100644 cdc_rsync_server/testdata/file_diff_generator/base_dir/a.txt create mode 100644 cdc_rsync_server/testdata/file_diff_generator/base_dir/b.txt create mode 100644 cdc_rsync_server/testdata/file_diff_generator/base_dir/c.txt create mode 100644 cdc_rsync_server/testdata/file_diff_generator/base_dir/e.txt create mode 100644 cdc_rsync_server/testdata/file_diff_generator/base_dir/f.txt create mode 100644 cdc_rsync_server/testdata/file_diff_generator/base_dir/g.txt create mode 100644 cdc_rsync_server/testdata/file_diff_generator/copy_dest/e.txt create mode 100644 cdc_rsync_server/testdata/file_diff_generator/copy_dest/f.txt create mode 100644 cdc_rsync_server/testdata/file_diff_generator/copy_dest/g.txt create mode 100644 cdc_rsync_server/testdata/file_diff_generator/copy_dest/h.txt create mode 100644 cdc_rsync_server/testdata/file_diff_generator/copy_dest/i.txt create mode 100644 cdc_rsync_server/testdata/file_diff_generator/copy_dest/j.txt create mode 100644 cdc_rsync_server/testdata/file_finder/base_dir/a.txt create mode 100644 cdc_rsync_server/testdata/file_finder/base_dir/b.txt create mode 100644 cdc_rsync_server/testdata/file_finder/base_dir/dir1/c.txt create mode 100644 cdc_rsync_server/testdata/file_finder/base_dir/dir2/d.txt create mode 100644 cdc_rsync_server/testdata/file_finder/copy_dest/a.txt create mode 100644 cdc_rsync_server/testdata/file_finder/copy_dest/dir1/c.txt create mode 100644 cdc_rsync_server/testdata/file_finder/copy_dest/dir1/f.txt create mode 100644 cdc_rsync_server/testdata/file_finder/copy_dest/dir3/d.txt create mode 100644 cdc_rsync_server/testdata/file_finder/copy_dest/e.txt create mode 100644 cdc_rsync_server/testdata/root.txt create mode 100644 cdc_rsync_server/unzstd_stream.cc create mode 100644 cdc_rsync_server/unzstd_stream.h create mode 100644 common/BUILD create mode 100644 common/buffer.cc create mode 100644 common/buffer.h create mode 100644 common/buffer_test.cc create mode 100644 common/clock.cc create mode 100644 common/clock.h create mode 100644 common/dir_iter.cc create mode 100644 common/dir_iter.h create mode 100644 common/dir_iter_test.cc create mode 100644 common/errno_mapping.cc create mode 100644 common/errno_mapping.h create mode 100644 common/errno_mapping_test.cc create mode 100644 common/file_watcher_win.cc create mode 100644 common/file_watcher_win.h create mode 100644 common/file_watcher_win_test.cc create mode 100644 common/gamelet_component.cc create mode 100644 common/gamelet_component.h create mode 100644 common/gamelet_component_test.cc create mode 100644 common/grpc_status.h create mode 100644 common/log.cc create mode 100644 common/log.h create mode 100644 common/log_test.cc create mode 100644 common/path.cc create mode 100644 common/path.h create mode 100644 common/path_filter.cc create mode 100644 common/path_filter.h create mode 100644 common/path_filter_test.cc create mode 100644 common/path_test.cc create mode 100644 common/platform.h create mode 100644 common/port_manager.h create mode 100644 common/port_manager_test.cc create mode 100644 common/port_manager_win.cc create mode 100644 common/process.h create mode 100644 common/process_test.cc create mode 100644 common/process_win.cc create mode 100644 common/remote_util.cc create mode 100644 common/remote_util.h create mode 100644 common/remote_util_test.cc create mode 100644 common/scoped_handle_win.cc create mode 100644 common/scoped_handle_win.h create mode 100644 common/sdk_util.cc create mode 100644 common/sdk_util.h create mode 100644 common/sdk_util_test.cc create mode 100644 common/semaphore.cc create mode 100644 common/semaphore.h create mode 100644 common/semaphore_test.cc create mode 100644 common/stats_collector.cc create mode 100644 common/stats_collector.h create mode 100644 common/status.cc create mode 100644 common/status.h create mode 100644 common/status_macros.h create mode 100644 common/status_test_macros.h create mode 100644 common/stopwatch.cc create mode 100644 common/stopwatch.h create mode 100644 common/stopwatch_test.cc create mode 100644 common/stub_process.cc create mode 100644 common/stub_process.h create mode 100644 common/test_main.cc create mode 100644 common/test_main.h create mode 100644 common/testdata/dir_iter/a/aa/aaa1.txt create mode 100644 common/testdata/dir_iter/a/aa/aaa2.txt create mode 100644 common/testdata/dir_iter/a/aa1.txt create mode 100644 common/testdata/dir_iter/a/aa2.txt create mode 100644 common/testdata/dir_iter/a/ab/aab1.txt create mode 100644 common/testdata/dir_iter/a/ab/aab2.txt create mode 100644 common/testdata/dir_iter/b/ba/bba1.txt create mode 100644 common/testdata/dir_iter/b/ba/bba2.txt create mode 100644 common/testdata/dir_iter/b/bb/bbb1.txt create mode 100644 common/testdata/dir_iter/b/bb/bbb2.txt create mode 100644 common/testdata/dir_iter/c/c1.txt create mode 100644 common/testdata/dir_iter/c/c2.txt create mode 100644 common/testdata/dir_iter/d/d1.txt create mode 100644 common/testdata/dir_iter/d/d2.txt create mode 100644 common/testdata/dir_iter/root.txt create mode 100644 common/testdata/gamelet_component/other/cdc_rsync_server create mode 100644 common/testdata/gamelet_component/valid/cdc_rsync_server create mode 100644 common/testdata/root.txt create mode 100644 common/testing_clock.cc create mode 100644 common/testing_clock.h create mode 100644 common/thread_safe_map.h create mode 100644 common/thread_safe_map_test.cc create mode 100644 common/threadpool.cc create mode 100644 common/threadpool.h create mode 100644 common/threadpool_test.cc create mode 100644 common/url.cc create mode 100644 common/url.h create mode 100644 common/url_test.cc create mode 100644 common/util.cc create mode 100644 common/util.h create mode 100644 common/util_test.cc create mode 100644 data_store/BUILD create mode 100644 data_store/data_provider.cc create mode 100644 data_store/data_provider.h create mode 100644 data_store/data_provider_test.cc create mode 100644 data_store/data_store_reader.cc create mode 100644 data_store/data_store_reader.h create mode 100644 data_store/data_store_writer.cc create mode 100644 data_store/data_store_writer.h create mode 100644 data_store/disk_data_store.cc create mode 100644 data_store/disk_data_store.h create mode 100644 data_store/disk_data_store_test.cc create mode 100644 data_store/grpc_reader.cc create mode 100644 data_store/grpc_reader.h create mode 100644 data_store/mem_data_store.cc create mode 100644 data_store/mem_data_store.h create mode 100644 data_store/mem_data_store_test.cc create mode 100644 docs/code-of-conduct.md create mode 100644 docs/contributing.md create mode 100644 fastcdc/.gitignore create mode 100644 fastcdc/BUILD create mode 100644 fastcdc/fastcdc.h create mode 100644 fastcdc/fastcdc_test.cc create mode 100644 file_transfer.sln create mode 100644 manifest.natvis create mode 100644 manifest/BUILD create mode 100644 manifest/asset_builder.cc create mode 100644 manifest/asset_builder.h create mode 100644 manifest/content_id.cc create mode 100644 manifest/content_id.h create mode 100644 manifest/content_id_test.cc create mode 100644 manifest/fake_manifest_builder.cc create mode 100644 manifest/fake_manifest_builder.h create mode 100644 manifest/fake_manifest_builder_test.cc create mode 100644 manifest/file_chunk_map.cc create mode 100644 manifest/file_chunk_map.h create mode 100644 manifest/file_chunk_map_test.cc create mode 100644 manifest/manifest_builder.cc create mode 100644 manifest/manifest_builder.h create mode 100644 manifest/manifest_builder_test.cc create mode 100644 manifest/manifest_iterator.cc create mode 100644 manifest/manifest_iterator.h create mode 100644 manifest/manifest_printer.cc create mode 100644 manifest/manifest_printer.h create mode 100644 manifest/manifest_proto_defs.h create mode 100644 manifest/manifest_test_base.cc create mode 100644 manifest/manifest_test_base.h create mode 100644 manifest/manifest_updater.cc create mode 100644 manifest/manifest_updater.h create mode 100644 manifest/manifest_updater_test.cc create mode 100644 manifest/stats_printer.cc create mode 100644 manifest/stats_printer.h create mode 100644 manifest/testdata/manifest_updater/executables/game.elf create mode 100644 manifest/testdata/manifest_updater/executables/normal.txt create mode 100644 manifest/testdata/manifest_updater/executables/script.sh create mode 100644 manifest/testdata/manifest_updater/executables/win.exe create mode 100644 manifest/testdata/manifest_updater/non_empty/a.txt create mode 100644 manifest/testdata/manifest_updater/non_empty/subdir/b.txt create mode 100644 manifest/testdata/manifest_updater/non_empty/subdir/c.txt create mode 100644 manifest/testdata/manifest_updater/non_empty/subdir/d.txt create mode 100644 manifest/testdata/root.txt create mode 100644 metrics/BUILD create mode 100644 metrics/enums.h create mode 100644 metrics/messages.cc create mode 100644 metrics/messages.h create mode 100644 metrics/messages_test.cc create mode 100644 metrics/metrics.cc create mode 100644 metrics/metrics.h create mode 100644 proto/BUILD create mode 100644 proto/asset_stream_service.proto create mode 100644 proto/background_service.proto create mode 100644 proto/local_assets_stream_manager.proto create mode 100644 proto/manifest.proto create mode 100644 protobuf.natvis create mode 100644 rm_bazel_out_dir.bat create mode 100644 tests_asset_streaming_30/.gitignore create mode 100644 tests_asset_streaming_30/BUILD create mode 100644 tests_asset_streaming_30/tests_asset_streaming_30.vcxproj create mode 100644 tests_cdc_rsync/.gitignore create mode 100644 tests_cdc_rsync/BUILD create mode 100644 tests_cdc_rsync/tests_cdc_rsync.vcxproj create mode 100644 tests_common/.gitignore create mode 100644 tests_common/BUILD create mode 100644 tests_common/tests_common.vcxproj create mode 160000 third_party/absl create mode 100644 third_party/blake3/BUILD.bazel create mode 100644 third_party/dirent/BUILD.bazel create mode 100644 third_party/fuse/BUILD create mode 100644 third_party/fuse/config.h.linux create mode 100644 third_party/fuse/disable_symbol_versioning.patch create mode 160000 third_party/googletest create mode 160000 third_party/grpc create mode 160000 third_party/protobuf create mode 100644 third_party/zstd/BUILD.bazel create mode 100644 tools/BUILD create mode 100644 tools/windows_cc_library.bzl diff --git a/.bazelrc b/.bazelrc new file mode 100644 index 0000000..ad7ede3 --- /dev/null +++ b/.bazelrc @@ -0,0 +1,70 @@ +build:windows --cxxopt=/std:c++17 +build:windows --host_cxxopt=/std:c++17 +build:windows --linkopt="/OPT:REF" +build:windows --linkopt="/OPT:ICF" +build:windows --linkopt="/LTCG" + +build:windows --copt="/DUNICODE" +build:windows --copt="/D_UNICODE" +build:windows --copt="/W4" # Warning level 4 +build:windows --copt="/WX" # Treat warnings as errors +build:windows --copt="/Zc:forScope" # for loop initializer goes out of scope after loop +build:windows --copt="/EHsc" # Catches C++ exceptions only; assuming functions with `extern "C"` linkage never throw +build:windows --copt="/Zc:rvalueCast" # Enforce type conversion rules +build:windows --copt="/Zc:strictStrings" # Disable string literal type conversion + +# Warnings occuring in //third_party/grpc/... +build:windows --copt="/wd4018" # signed/unsigned mismatch +build:windows --copt="/wd4090" # different 'const' qualifiers +build:windows --copt="/wd4100" # unreferenced formal parameter +build:windows --copt="/wd4101" # unreferenced local variable +build:windows --copt="/wd4116" # unnamed type definition in parentheses +build:windows --copt="/wd4127" # conditional expression is constant +build:windows --copt="/wd4131" # old-style declarator +build:windows --copt="/wd4146" # unary minus operator applied to unsigned type +build:windows --copt="/wd4200" # nonstandard extension used: zero-sized array in struct/union +build:windows --copt="/wd4201" # nonstandard extension used: nameless struct/union +build:windows --copt="/wd4206" # nonstandard extension used: translation unit is empty +build:windows --copt="/wd4267" # conversion from 'size_t' to 'type', possible loss of data +build:windows --copt="/wd4244" # implicit narrowing conversion +build:windows --copt="/wd4245" # conversion from 'int' to 'uint32_t', signed/unsigned mismatch +build:windows --copt="/wd4310" # cast truncates constant value +build:windows --copt="/wd4312" # reinterpret_cast': conversion from ... to ... of greater size +build:windows --copt="/wd4324" # structure was padded due to alignment specifier +build:windows --copt="/wd4334" # result of 32-bit shift implicitly converted to 64 bits +build:windows --copt="/wd4389" # signed/unsigned mismatch +build:windows --copt="/wd4456" # declaration of 'var' hides previous local declaration +build:windows --copt="/wd4457" # declaration of 'var' hides function parameter +build:windows --copt="/wd4458" # declaration hides class member +build:windows --copt="/wd4459" # declaration of 'var' hides global declaration +build:windows --copt="/wd4646" # function declared with 'noreturn' has non-void return type +build:windows --copt="/wd4700" # uninitialized local variable used +build:windows --copt="/wd4701" # potentially uninitialized local variable used +build:windows --copt="/wd4702" # unreachable code +build:windows --copt="/wd4703" # potentially uninitialized local pointer variable used +build:windows --copt="/wd4706" # assignment within conditional expression +build:windows --copt="/wd4715" # not all control paths return a value +build:windows --copt="/wd4805" # unsafe mix of type 'int' and type 'bool' in operation +build:windows --copt="/wd4815" # zero-sized array in stack object will have no elements +build:windows --copt="/wd4834" # discarding return value of function with 'nodiscard' attribute +# Additional warnings occuring in //third_party/protobuf +build:windows --copt="/wd4125" # decimal digit terminates octal escape sequence +# Additional warnings occuring in other third_party libraries +build:windows --copt="/wd4005" # macro redefinition +# Additional warnings occuring in upb (opt-mode) +build:windows --copt="/wd4189" # local variable is initialized but not referenced + +# googletest uses this define. +build:windows --define absl=1 + +# Linux +build:linux --cxxopt=-std=c++17 +build:linux --host_cxxopt=-std=c++17 +build:linux --define absl=1 + +# Additional warnings occuring in //third_party/protobuf +build:linux --copt="-Wno-stringop-overflow" +# Additional warnings occuring in boringssl +build:linux --copt="-Wno-array-bounds" + +try-import %workspace%/user.bazelrc diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..0745d4b --- /dev/null +++ b/.clang-format @@ -0,0 +1,29 @@ +--- +Language: Proto +BasedOnStyle: Google + +--- +Language: Cpp +BasedOnStyle: Google + +DerivePointerAlignment: false +PointerAlignment: Left +SortIncludes: true +--- +Language: JavaScript +BasedOnStyle: Google + +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false + +--- +Language: TextProto +BasedOnStyle: Google + +--- +Language: CSharp +BasedOnStyle: Microsoft +ColumnLimit: 100 +NamespaceIndentation: All +BreakBeforeTernaryOperators: true \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..afa1b26 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +.vs +.vscode +*.log +bin +dependencies +*.cflags +*.config +*.creator* +*.cxxflags +*.files +*.includes +.qtc_clangd +bazel-* diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..3d18c16 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,12 @@ +[submodule "third_party/absl"] + path = third_party/absl + url = https://github.com/abseil/abseil-cpp.git +[submodule "third_party/protobuf"] + path = third_party/protobuf + url = https://github.com/google/protobuf.git +[submodule "third_party/googletest"] + path = third_party/googletest + url = https://github.com/google/googletest.git +[submodule "third_party/grpc"] + path = third_party/grpc + url = https://github.com/grpc/grpc.git diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/NMakeBazelProject.targets b/NMakeBazelProject.targets new file mode 100644 index 0000000..e194096 --- /dev/null +++ b/NMakeBazelProject.targets @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + windows + ggp_windows + x64-windows + k8 + dbg + opt + + | sed -r "s/^([^:\(]+[:\(][[:digit:]]+(,[[:digit:]]+)?[:\)])/$(BazelSourcePathPrefix)\\1/" + + 2>&1 $(BazelSedCommand) + + --config=$(BazelPlatform) --workspace_status_command="exit 0" --bes_backend= + $(BazelArgs) --linkopt=-Wl,--strip-all + + $(BazelArgs) --distinct_host_configuration=false + + $(BazelArgs) --copt=/GL + + $(BazelArgs) --copt=-fdata-sections --copt=-ffunction-sections --linkopt=-Wl,--gc-sections + + cmd.exe /Q /C $(SolutionDir)rm_bazel_out_dir.bat && + + && attrib -r $(OutDir)* + + $(GGP_SDK_PATH)BaseSDK\LLVM\10.0.1\include\c++\v1;$(GGP_SDK_PATH)BaseSDK\LLVM\10.0.1\lib\clang\10.0.1\include;$(GGP_SDK_PATH)sysroot\usr\include\x86_64-linux-gnu;$(GGP_SDK_PATH)sysroot\usr\include;$(BazelIncludePaths) + + + + + $(SolutionDir)..\..\bazel-out\$(BazelPlatformDir)-$(BazelCompilationMode)\bin;$(BazelIncludePaths) + $(RmBazelOutDir) bazel build --compilation_mode=$(BazelCompilationMode) $(BazelArgs) $(BazelTargets) $(BazelSedCommand) $(MakeRW) + $(RmBazelOutDir) bazel clean + $(RmBazelOutDir) bazel clean && bazel build --compilation_mode=$(BazelCompilationMode) $(BazelArgs) $(BazelTargets) $(BazelSedCommand) $(MakeRW) + $(OutDir)$(BazelOutputFile) + + diff --git a/README.md b/README.md new file mode 100644 index 0000000..a0a3239 --- /dev/null +++ b/README.md @@ -0,0 +1,14 @@ +# CDC File Transfer + +This repository contains tools for synching and streaming files. They are based +on Content Defined Chunking (CDC), in particular +[FastCDC](https://www.usenix.org/conference/atc16/technical-sessions/presentation/xia), +to split up files into chunks. + +## CDC RSync +Tool to sync files to a remote machine, similar to the standard Linux +[rsync](https://linux.die.net/man/1/rsync). It supports fast compression and +uses a higher performing remote diffing approach based on CDC. + +## Asset Streaming +Tool to stream assets from a Windows machine to a Linux device. \ No newline at end of file diff --git a/WORKSPACE b/WORKSPACE new file mode 100644 index 0000000..aa3519f --- /dev/null +++ b/WORKSPACE @@ -0,0 +1,99 @@ +workspace(name = "cdc_file_transfer") + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +http_archive( + name = "bazel_skylib", + sha256 = "f7be3474d42aae265405a592bb7da8e171919d74c16f082a5457840f06054728", + urls = ["https://github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz"], +) + +http_archive( + name = "rules_pkg", + sha256 = "451e08a4d78988c06fa3f9306ec813b836b1d076d0f055595444ba4ff22b867f", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/rules_pkg/releases/download/0.7.1/rules_pkg-0.7.1.tar.gz", + "https://github.com/bazelbuild/rules_pkg/releases/download/0.7.1/rules_pkg-0.7.1.tar.gz", + ], +) + +load("@rules_pkg//:deps.bzl", "rules_pkg_dependencies") + +rules_pkg_dependencies() + +http_archive( + name = "com_googlesource_code_re2", + sha256 = "f89c61410a072e5cbcf8c27e3a778da7d6fd2f2b5b1445cd4f4508bee946ab0f", + strip_prefix = "re2-2022-06-01", + url = "https://github.com/google/re2/archive/refs/tags/2022-06-01.tar.gz", +) + +http_archive( + name = "com_github_zstd", + build_file = "@//third_party/zstd:BUILD.bazel", + sha256 = "f7de13462f7a82c29ab865820149e778cbfe01087b3a55b5332707abf9db4a6e", + strip_prefix = "zstd-1.5.2", + url = "https://github.com/facebook/zstd/archive/refs/tags/v1.5.2.tar.gz", +) + +http_archive( + name = "com_github_blake3", + build_file = "@//third_party/blake3:BUILD.bazel", + sha256 = "112becf0983b5c83efff07f20b458f2dbcdbd768fd46502e7ddd831b83550109", + strip_prefix = "BLAKE3-1.3.1", + url = "https://github.com/BLAKE3-team/BLAKE3/archive/refs/tags/1.3.1.tar.gz", +) + +http_archive( + name = "com_github_fuse", + build_file = "@//third_party/fuse:BUILD", + sha256 = "832432d1ad4f833c20e13b57cf40ce5277a9d33e483205fc63c78111b3358874", + strip_prefix = "fuse-2.9.7", + patch_args = ["-p1"], + patches = ["@//third_party/fuse:disable_symbol_versioning.patch"], + url = "https://github.com/libfuse/libfuse/releases/download/fuse-2.9.7/fuse-2.9.7.tar.gz", +) + +http_archive( + name = "com_github_jsoncpp", + sha256 = "f409856e5920c18d0c2fb85276e24ee607d2a09b5e7d5f0a371368903c275da2", + strip_prefix = "jsoncpp-1.9.5", + url = "https://github.com/open-source-parsers/jsoncpp/archive/refs/tags/1.9.5.tar.gz", +) + +# Only required for //cdc_indexer. +http_archive( + name = "com_github_dirent", + build_file = "@//third_party/dirent:BUILD.bazel", + sha256 = "f72d39e3c39610b6901e391b140aa69b51e0eb99216939ed5e547b5dad03afb1", + strip_prefix = "dirent-1.23.2", + url = "https://github.com/tronkko/dirent/archive/refs/tags/1.23.2.tar.gz", +) + +local_repository( + name = "com_google_absl", + path = "third_party/absl", +) + +local_repository( + name = "com_google_googletest", + path = "third_party/googletest", +) + +local_repository( + name = "com_google_protobuf", + path = "third_party/protobuf", +) + +local_repository( + name = "com_github_grpc_grpc", + path = "third_party/grpc", +) + +load("@com_github_grpc_grpc//bazel:grpc_deps.bzl", "grpc_deps") + +grpc_deps() + +load("@com_github_grpc_grpc//bazel:grpc_extra_deps.bzl", "grpc_extra_deps") + +grpc_extra_deps() diff --git a/absl_helper/BUILD b/absl_helper/BUILD new file mode 100644 index 0000000..c407df8 --- /dev/null +++ b/absl_helper/BUILD @@ -0,0 +1,12 @@ +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "jedec_size_flag", + srcs = ["jedec_size_flag.cc"], + hdrs = ["jedec_size_flag.h"], + deps = [ + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/flags:marshalling", + "@com_google_absl//absl/strings", + ], +) diff --git a/absl_helper/jedec_size_flag.cc b/absl_helper/jedec_size_flag.cc new file mode 100644 index 0000000..3ce1547 --- /dev/null +++ b/absl_helper/jedec_size_flag.cc @@ -0,0 +1,93 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl_helper/jedec_size_flag.h" + +namespace cdc_ft { + +namespace { + +JedecUnit ToJedecUnit(char c) { + switch (c) { + case 'b': + case 'B': + return JedecUnit::Byte; + case 'k': + case 'K': + return JedecUnit::Kilo; + case 'm': + case 'M': + return JedecUnit::Mega; + case 'g': + case 'G': + return JedecUnit::Giga; + case 't': + case 'T': + return JedecUnit::Tera; + case 'p': + case 'P': + return JedecUnit::Peta; + default: + return JedecUnit::Unkown; + } +} + +int LeftShiftAmount(JedecUnit unit) { + switch (unit) { + case JedecUnit::Kilo: + return 10; + case JedecUnit::Mega: + return 20; + case JedecUnit::Giga: + return 30; + case JedecUnit::Tera: + return 40; + case JedecUnit::Peta: + return 50; + default: + return 0; + } +} + +} // namespace + +bool AbslParseFlag(absl::string_view text, JedecSize* flag, std::string* err) { + if (text.empty()) return false; + JedecUnit unit = ToJedecUnit(text.back()); + if (unit != JedecUnit::Unkown) { + text.remove_suffix(1); + } else { + // Are we dealing with a digit character? + if (text.back() >= '0' && text.back() <= '9') { + unit = JedecUnit::Byte; + } else { + *err = + "Supported size units are (B)yte, (K)ilo, (M)ega, (G)iga, (T)era, " + "(P)eta."; + return false; + } + } + // Try to parse a plain uint64_t value. + uint64_t size; + if (!absl::ParseFlag(text, &size, err)) { + return false; + } + flag->SetSize(size << LeftShiftAmount(unit)); + return true; +} + +std::string AbslUnparseFlag(const JedecSize& size) { + return absl::UnparseFlag(size.Size()); +} +}; // namespace cdc_ft diff --git a/absl_helper/jedec_size_flag.h b/absl_helper/jedec_size_flag.h new file mode 100644 index 0000000..fe0cf59 --- /dev/null +++ b/absl_helper/jedec_size_flag.h @@ -0,0 +1,61 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ABSL_HELPER_JEDEC_SIZE_FLAG_H_ +#define ABSL_HELPER_JEDEC_SIZE_FLAG_H_ + +#include + +#include "absl/flags/flag.h" +#include "absl/flags/marshalling.h" +#include "absl/strings/string_view.h" + +namespace cdc_ft { + +// Supported JEDEC unit suffixes. +enum class JedecUnit : char { + Unkown = 0, + Byte = 'B', // optional + Kilo = 'K', + Mega = 'M', + Giga = 'G', + Tera = 'T', + Peta = 'P', +}; + +// This class parses flag arguments that represent human readable data sizes, +// such as 1024, 2K, 3M, 4G, or 5T. +// +// See https://en.wikipedia.org/wiki/JEDEC_memory_standards. +class JedecSize { + public: + explicit JedecSize(uint64_t size = 0) : size_(size) {} + uint64_t Size() const { return size_; } + void SetSize(uint64_t size) { size_ = size; } + + private: + uint64_t size_; +}; + +// Abseil flags parser for JedecSize. +bool AbslParseFlag(absl::string_view text, JedecSize* flag, std::string* err); + +// Abseil flags unparser for JedecSize. +std::string AbslUnparseFlag(const JedecSize& size); + +}; // namespace cdc_ft + +#endif // ABSL_HELPER_JEDEC_SIZE_FLAG_H_ diff --git a/all_files.vcxitems b/all_files.vcxitems new file mode 100644 index 0000000..9428041 --- /dev/null +++ b/all_files.vcxitems @@ -0,0 +1,274 @@ + + + + $(MSBuildAllProjects);$(MSBuildThisFileFullPath) + true + {f542af2d-5a17-4f55-be40-b1a2a6182811} + + + + %(AdditionalIncludeDirectories);$(MSBuildThisFileDirectory) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/all_files.vcxitems.user b/all_files.vcxitems.user new file mode 100644 index 0000000..966b4ff --- /dev/null +++ b/all_files.vcxitems.user @@ -0,0 +1,6 @@ + + + + true + + \ No newline at end of file diff --git a/asset_stream_manager/.gitignore b/asset_stream_manager/.gitignore new file mode 100644 index 0000000..7dc8dde --- /dev/null +++ b/asset_stream_manager/.gitignore @@ -0,0 +1,3 @@ +x64/* +*.log +*.user \ No newline at end of file diff --git a/asset_stream_manager/BUILD b/asset_stream_manager/BUILD new file mode 100644 index 0000000..87c4374 --- /dev/null +++ b/asset_stream_manager/BUILD @@ -0,0 +1,186 @@ +package(default_visibility = [ + "//:__subpackages__", +]) + +cc_binary( + name = "asset_stream_manager", + srcs = ["main.cc"], + data = [":roots_pem"], + deps = [ + ":asset_stream_config", + ":session_management_server", + "//common:log", + "//common:path", + "//common:sdk_util", + "//data_store:data_provider", + ], +) + +cc_library( + name = "asset_stream_server", + srcs = [ + "asset_stream_server.cc", + "grpc_asset_stream_server.cc", + "testing_asset_stream_server.cc", + ], + hdrs = [ + "asset_stream_server.h", + "grpc_asset_stream_server.h", + "testing_asset_stream_server.h", + ], + deps = [ + "//common:grpc_status", + "//common:log", + "//common:path", + "//common:status", + "//common:status_macros", + "//common:thread_safe_map", + "//data_store", + "//manifest:manifest_updater", + "//proto:asset_stream_service_grpc_proto", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/time", + ], +) + +cc_library( + name = "asset_stream_config", + srcs = ["asset_stream_config.cc"], + hdrs = ["asset_stream_config.h"], + deps = [ + ":multi_session", + "//absl_helper:jedec_size_flag", + "//common:log", + "//common:path", + "//common:status_macros", + "@com_github_jsoncpp//:jsoncpp", + "@com_google_absl//absl/flags:parse", + ], +) + +cc_library( + name = "cdc_fuse_manager", + srcs = ["cdc_fuse_manager.cc"], + hdrs = ["cdc_fuse_manager.h"], + deps = [ + "//cdc_fuse_fs:constants", + "//common:gamelet_component", + "//common:remote_util", + "//common:status_macros", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings:str_format", + ], +) + +cc_library( + name = "session_management_server", + srcs = [ + "background_service_impl.cc", + "background_service_impl.h", + "local_assets_stream_manager_service_impl.cc", + "local_assets_stream_manager_service_impl.h", + "session_management_server.cc", + "session_manager.cc", + "session_manager.h", + ], + hdrs = ["session_management_server.h"], + deps = [ + ":multi_session", + "//common:grpc_status", + "//common:log", + "//common:status_macros", + "//common:util", + "//manifest:manifest_updater", + "//metrics", + "//proto:background_service_grpc_proto", + "//proto:local_assets_stream_manager_grpc_proto", + "@com_google_absl//absl/strings", + ], +) + +cc_library( + name = "multi_session", + srcs = [ + "multi_session.cc", + "session.cc", + "session.h", + ], + hdrs = [ + "multi_session.h", + "session_config.h", + ], + deps = [ + ":asset_stream_server", + ":cdc_fuse_manager", + ":metrics_recorder", + "//common:file_watcher", + "//common:log", + "//common:path", + "//common:port_manager", + "//common:process", + "//common:remote_util", + "//common:sdk_util", + "//common:status_macros", + "//common:stopwatch", + "//data_store:disk_data_store", + "//manifest:manifest_printer", + "//manifest:manifest_updater", + "@com_google_absl//absl/status", + ], +) + +cc_test( + name = "multi_session_test", + srcs = ["multi_session_test.cc"], + data = [":all_test_data"], + deps = [ + ":multi_session", + "//common:test_main", + "//manifest:manifest_test_base", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "metrics_recorder", + srcs = ["metrics_recorder.cc"], + hdrs = ["metrics_recorder.h"], + deps = [ + "//common:log", + "//common:util", + "//metrics", + "//metrics:enums", + "//metrics:messages", + "@com_google_absl//absl/status", + ], +) + +cc_test( + name = "metrics_recorder_test", + srcs = ["metrics_recorder_test.cc"], + deps = [ + ":metrics_recorder", + "//common:status_test_macros", + "//common:test_main", + "//metrics", + "@com_google_googletest//:gtest", + ], +) + +# Copy roots.pem to the output folder, required for authenticated gRPC. +genrule( + name = "roots_pem", + srcs = ["@com_github_grpc_grpc//:root_certificates"], + outs = ["roots.pem"], + cmd = "cp $(location @com_github_grpc_grpc//:root_certificates) $(location roots.pem)", +) + +filegroup( + name = "all_test_sources", + srcs = glob(["*_test.cc"]), +) + +filegroup( + name = "all_test_data", + srcs = glob(["testdata/**"]), +) diff --git a/asset_stream_manager/asset_stream_config.cc b/asset_stream_manager/asset_stream_config.cc new file mode 100644 index 0000000..d68d518 --- /dev/null +++ b/asset_stream_manager/asset_stream_config.cc @@ -0,0 +1,184 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "asset_stream_manager/asset_stream_config.h" + +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_join.h" +#include "absl_helper/jedec_size_flag.h" +#include "common/buffer.h" +#include "common/path.h" +#include "common/status_macros.h" +#include "json/json.h" + +ABSL_DECLARE_FLAG(std::string, src_dir); +ABSL_DECLARE_FLAG(std::string, instance_ip); +ABSL_DECLARE_FLAG(uint16_t, instance_port); +ABSL_DECLARE_FLAG(int, verbosity); +ABSL_DECLARE_FLAG(bool, debug); +ABSL_DECLARE_FLAG(bool, singlethreaded); +ABSL_DECLARE_FLAG(bool, stats); +ABSL_DECLARE_FLAG(bool, quiet); +ABSL_DECLARE_FLAG(bool, check); +ABSL_DECLARE_FLAG(bool, log_to_stdout); +ABSL_DECLARE_FLAG(cdc_ft::JedecSize, cache_capacity); +ABSL_DECLARE_FLAG(uint32_t, cleanup_timeout); +ABSL_DECLARE_FLAG(uint32_t, access_idle_timeout); +ABSL_DECLARE_FLAG(int, manifest_updater_threads); +ABSL_DECLARE_FLAG(int, file_change_wait_duration_ms); + +// Declare AS20 flags, so that AS30 can be used on older SDKs simply by +// replacing the binary. Note that the RETIRED_FLAGS macro can't be used +// because the flags contain dashes. This code mimics the macro. +absl::flags_internal::RetiredFlag RETIRED_FLAGS_session_ports; +absl::flags_internal::RetiredFlag RETIRED_FLAGS_gm_mount_point; +absl::flags_internal::RetiredFlag RETIRED_FLAGS_allow_edge; + +const auto RETIRED_FLAGS_REG_session_ports = + (RETIRED_FLAGS_session_ports.Retire("session-ports"), + ::absl::flags_internal::FlagRegistrarEmpty{}); +const auto RETIRED_FLAGS_REG_gm_mount_point = + (RETIRED_FLAGS_gm_mount_point.Retire("gamelet-mount-point"), + ::absl::flags_internal::FlagRegistrarEmpty{}); +const auto RETIRED_FLAGS_REG_allow_edge = + (RETIRED_FLAGS_allow_edge.Retire("allow-edge"), + ::absl::flags_internal::FlagRegistrarEmpty{}); + +namespace cdc_ft { + +AssetStreamConfig::AssetStreamConfig() { + src_dir_ = absl::GetFlag(FLAGS_src_dir); + instance_ip_ = absl::GetFlag(FLAGS_instance_ip); + instance_port_ = absl::GetFlag(FLAGS_instance_port); + session_cfg_.verbosity = absl::GetFlag(FLAGS_verbosity); + session_cfg_.fuse_debug = absl::GetFlag(FLAGS_debug); + session_cfg_.fuse_singlethreaded = absl::GetFlag(FLAGS_singlethreaded); + session_cfg_.stats = absl::GetFlag(FLAGS_stats); + session_cfg_.quiet = absl::GetFlag(FLAGS_quiet); + session_cfg_.fuse_check = absl::GetFlag(FLAGS_check); + log_to_stdout_ = absl::GetFlag(FLAGS_log_to_stdout); + session_cfg_.fuse_cache_capacity = absl::GetFlag(FLAGS_cache_capacity).Size(); + session_cfg_.fuse_cleanup_timeout_sec = absl::GetFlag(FLAGS_cleanup_timeout); + session_cfg_.fuse_access_idle_timeout_sec = + absl::GetFlag(FLAGS_access_idle_timeout); + session_cfg_.manifest_updater_threads = + absl::GetFlag(FLAGS_manifest_updater_threads); + session_cfg_.file_change_wait_duration_ms = + absl::GetFlag(FLAGS_file_change_wait_duration_ms); +} + +AssetStreamConfig::~AssetStreamConfig() = default; + +absl::Status AssetStreamConfig::LoadFromFile(const std::string& path) { + Buffer buffer; + RETURN_IF_ERROR(path::ReadFile(path, &buffer)); + + Json::Value config; + Json::Reader reader; + if (!reader.parse(buffer.data(), buffer.data() + buffer.size(), config, + false)) { + return absl::InvalidArgumentError( + absl::StrFormat("Failed to parse config file '%s': %s", path, + reader.getFormattedErrorMessages())); + } + +#define ASSIGN_VAR(var, flag, type) \ + do { \ + if (config.isMember(#flag)) { \ + var = config[#flag].as##type(); \ + flags_read_from_file_.insert(#flag); \ + } \ + } while (0) + + ASSIGN_VAR(src_dir_, src_dir, String); + ASSIGN_VAR(session_cfg_.verbosity, verbosity, Int); + ASSIGN_VAR(session_cfg_.fuse_debug, debug, Bool); + ASSIGN_VAR(session_cfg_.fuse_singlethreaded, singlethreaded, Bool); + ASSIGN_VAR(session_cfg_.stats, stats, Bool); + ASSIGN_VAR(session_cfg_.quiet, quiet, Bool); + ASSIGN_VAR(session_cfg_.fuse_check, check, Bool); + ASSIGN_VAR(log_to_stdout_, log_to_stdout, Bool); + ASSIGN_VAR(session_cfg_.fuse_cleanup_timeout_sec, cleanup_timeout, Int); + ASSIGN_VAR(session_cfg_.fuse_access_idle_timeout_sec, access_idle_timeout, + Int); + ASSIGN_VAR(session_cfg_.manifest_updater_threads, manifest_updater_threads, + Int); + ASSIGN_VAR(session_cfg_.file_change_wait_duration_ms, + file_change_wait_duration_ms, Int); + + // cache_capacity requires Jedec size conversion. + constexpr char kCacheCapacity[] = "cache_capacity"; + if (config.isMember(kCacheCapacity)) { + JedecSize cache_capacity; + std::string error; + if (AbslParseFlag(config[kCacheCapacity].asString(), &cache_capacity, + &error)) { + session_cfg_.fuse_cache_capacity = cache_capacity.Size(); + flags_read_from_file_.insert(kCacheCapacity); + } else { + // Note that |error| can't be logged here since this code runs before + // logging is initialized. + flag_read_errors_[kCacheCapacity] = error; + } + } + +#undef ASSIGN_VAR + + return absl::OkStatus(); +} // namespace cdc_ft + +std::string AssetStreamConfig::ToString() { + std::ostringstream ss; + ss << "src_dir = " << src_dir_ << std::endl; + ss << "verbosity = " << session_cfg_.verbosity + << std::endl; + ss << "debug = " << session_cfg_.fuse_debug + << std::endl; + ss << "singlethreaded = " << session_cfg_.fuse_singlethreaded + << std::endl; + ss << "stats = " << session_cfg_.stats << std::endl; + ss << "quiet = " << session_cfg_.quiet << std::endl; + ss << "check = " << session_cfg_.fuse_check + << std::endl; + ss << "log_to_stdout = " << log_to_stdout_ << std::endl; + ss << "cache_capacity = " << session_cfg_.fuse_cache_capacity + << std::endl; + ss << "cleanup_timeout = " + << session_cfg_.fuse_cleanup_timeout_sec << std::endl; + ss << "access_idle_timeout = " + << session_cfg_.fuse_access_idle_timeout_sec << std::endl; + ss << "manifest_updater_threads = " + << session_cfg_.manifest_updater_threads << std::endl; + ss << "file_change_wait_duration_ms = " + << session_cfg_.file_change_wait_duration_ms << std::endl; + return ss.str(); +} + +std::string AssetStreamConfig::GetFlagsReadFromFile() { + return absl::StrJoin(flags_read_from_file_, ", "); +} + +std::string AssetStreamConfig::GetFlagReadErrors() { + std::string error_str; + for (const auto& [flag, error] : flag_read_errors_) + error_str += absl::StrFormat("%sFailed to read '%s': %s", + error_str.empty() ? "" : "\n", flag, error); + return error_str; +} + +} // namespace cdc_ft diff --git a/asset_stream_manager/asset_stream_config.h b/asset_stream_manager/asset_stream_config.h new file mode 100644 index 0000000..3dd6517 --- /dev/null +++ b/asset_stream_manager/asset_stream_config.h @@ -0,0 +1,107 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ASSET_STREAM_MANAGER_ASSET_STREAM_CONFIG_H_ +#define ASSET_STREAM_MANAGER_ASSET_STREAM_CONFIG_H_ + +#include +#include +#include + +#include "absl/status/status.h" +#include "asset_stream_manager/session_config.h" + +namespace cdc_ft { + +// Class containing all configuration settings for asset streaming. +// Reads flags from the command line and optionally applies overrides from +// a json file. +class AssetStreamConfig { + public: + // Constructs the configuration by applying command line flags. + AssetStreamConfig(); + ~AssetStreamConfig(); + + // Loads a configuration from the JSON file at |path| and overrides any config + // values that are set in this file. Sample json file: + // { + // "src_dir":"C:\\path\\to\\assets", + // "verbosity":3, + // "debug":0, + // "singlethreaded":0, + // "stats":0, + // "quiet":0, + // "check":0, + // "log_to_stdout":0, + // "cache_capacity":"150G", + // "cleanup_timeout":300, + // "access_idle_timeout":5, + // "manifest_updater_threads":4, + // "file_change_wait_duration_ms":500 + // } + // Returns NotFoundError if the file does not exist. + // Returns InvalidArgumentError if the file is not valid JSON. + absl::Status LoadFromFile(const std::string& path); + + // Returns a string with all config values, suitable for logging. + std::string ToString(); + + // Gets a comma-separated list of flags that were read from the JSON file. + // These flags override command line flags. + std::string GetFlagsReadFromFile(); + + // Gets a newline-separated list of errors for each flag that could not be + // read from the JSON file. + std::string GetFlagReadErrors(); + + // Workstation directory to stream. Should usually be empty since mounts are + // triggered by the CLI or the partner portal via a gRPC call, but useful + // during development. + const std::string& src_dir() const { return src_dir_; } + + // IP address of the instance to stream to. Should usually be empty since + // mounts are triggered by the CLI or the partner portal via a gRPC call, but + // useful during development. + const std::string& instance_ip() const { return instance_ip_; } + + // IP address of the instance to stream to. Should usually be unset (0) since + // mounts are triggered by the CLI or the partner portal via a gRPC call, but + // useful during development. + const uint16_t instance_port() const { return instance_port_; } + + // Session configuration. + const SessionConfig session_cfg() const { return session_cfg_; } + + // Whether to log to a file or to stdout. + bool log_to_stdout() const { return log_to_stdout_; } + + private: + std::string src_dir_; + std::string instance_ip_; + uint16_t instance_port_ = 0; + SessionConfig session_cfg_; + bool log_to_stdout_ = false; + + // Use a set, so the flags are sorted alphabetically. + std::set flags_read_from_file_; + + // Maps flags to errors occurred while reading this flag. + std::map flag_read_errors_; +}; + +}; // namespace cdc_ft + +#endif // ASSET_STREAM_MANAGER_ASSET_STREAM_CONFIG_H_ diff --git a/asset_stream_manager/asset_stream_manager.vcxproj b/asset_stream_manager/asset_stream_manager.vcxproj new file mode 100644 index 0000000..e659952 --- /dev/null +++ b/asset_stream_manager/asset_stream_manager.vcxproj @@ -0,0 +1,90 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {84D81562-D66C-4A60-9F48-2696D7D81D26} + Win32Proj + cdc_rsync + $([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0')) + 10.0 + + + + Makefile + true + v141 + v142 + + + Makefile + false + v141 + v142 + + + + + + + + + + + + + $(SolutionDir)bazel-out\x64_windows-dbg\bin\asset_stream_manager\ + UNICODE + /std:c++17 + + + $(SolutionDir)bazel-out\x64_windows-opt\bin\asset_stream_manager\ + UNICODE + /std:c++17 + + + + {a537310c-0571-43d5-b7fe-c867f702294f} + false + false + + + + + + Console + + + + + //asset_stream_manager + asset_stream_manager.exe + ..\;..\third_party\absl;..\third_party\jsoncpp\include;..\third_party\blake3\c;..\third_party\googletest\googletest\include;..\third_party\protobuf\src;..\third_party\grpc\include;..\bazel-out\x64_windows-dbg\bin;$(VC_IncludePath);$(WindowsSDK_IncludePath) + ..\/ + + + + + + + + diff --git a/asset_stream_manager/asset_stream_manager.vcxproj.filters b/asset_stream_manager/asset_stream_manager.vcxproj.filters new file mode 100644 index 0000000..de18102 --- /dev/null +++ b/asset_stream_manager/asset_stream_manager.vcxproj.filters @@ -0,0 +1,2 @@ + + diff --git a/asset_stream_manager/asset_stream_server.cc b/asset_stream_manager/asset_stream_server.cc new file mode 100644 index 0000000..4139965 --- /dev/null +++ b/asset_stream_manager/asset_stream_server.cc @@ -0,0 +1,41 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "asset_stream_manager/asset_stream_server.h" + +#include "asset_stream_manager/grpc_asset_stream_server.h" +#include "asset_stream_manager/testing_asset_stream_server.h" + +namespace cdc_ft { + +AssetStreamServer::AssetStreamServer(std::string src_dir, + DataStoreReader* data_store_reader, + FileChunkMap* file_chunks) {} + +std::unique_ptr AssetStreamServer::Create( + AssetStreamServerType type, std::string src_dir, + DataStoreReader* data_store_reader, FileChunkMap* file_chunks, + ContentSentHandler content_sent) { + switch (type) { + case AssetStreamServerType::kGrpc: + return std::make_unique(src_dir, data_store_reader, + file_chunks, content_sent); + case AssetStreamServerType::kTest: + return std::make_unique( + src_dir, data_store_reader, file_chunks); + } + assert(false); + return nullptr; +} +} // namespace cdc_ft diff --git a/asset_stream_manager/asset_stream_server.h b/asset_stream_manager/asset_stream_server.h new file mode 100644 index 0000000..7aeacfd --- /dev/null +++ b/asset_stream_manager/asset_stream_server.h @@ -0,0 +1,91 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ASSET_STREAM_MANAGER_ASSET_STREAM_SERVER_H_ +#define ASSET_STREAM_MANAGER_ASSET_STREAM_SERVER_H_ +#include +#include + +#include "absl/status/status.h" +#include "absl/time/time.h" +#include "manifest/manifest_proto_defs.h" + +namespace cdc_ft { + +// Handles an event when content is transmitted from the workstation to a +// gamelet. +// |byte_count| number of bytes transferred during the session so far. +// |chunk_count| number of chunks transferred during the session so far. +// |instance_id| instance id, which identifies the session. +using ContentSentHandler = std::function; + +class DataStoreReader; +class FileChunkMap; + +enum class AssetStreamServerType { kGrpc, kTest }; + +class AssetStreamServer { + public: + // Returns AssetStreamServer of |type|. + // |src_dir| is the directory on the workstation to mount. + // |data_store_reader| is responsible for loading content by ID. + // |file_chunks| is used for mapping data chunk ids to file locations. + // |content_sent| handles event when data is transferred from the workstation + // to a gamelet. + static std::unique_ptr Create( + AssetStreamServerType type, std::string src_dir, + DataStoreReader* data_store_reader, FileChunkMap* file_chunks, + ContentSentHandler content_sent); + + AssetStreamServer(const AssetStreamServer& other) = delete; + AssetStreamServer& operator=(const AssetStreamServer& other) = delete; + virtual ~AssetStreamServer() = default; + + // Starts the asset stream server on the given |port|. + // Asserts that the server is not yet running. + virtual absl::Status Start(int port) = 0; + + // Sets |manifest_id| to be distributed to gamelets. + // Thread-safe. + virtual void SetManifestId(const ContentIdProto& manifest_id) = 0; + + // Waits until the FUSE for the given |instance| id has acknowledged the + // reception of the currently set manifest id. Returns a DeadlineExceeded + // error if the ack is not received within the given |timeout|. + // Thread-safe. + virtual absl::Status WaitForManifestAck(const std::string& instance, + absl::Duration timeout) = 0; + + // Stops internal services and waits for the server to shut down. + virtual void Shutdown() = 0; + + // Returns the used manifest id. + // Thread-safe. + virtual ContentIdProto GetManifestId() const = 0; + + protected: + // Creates a new asset streaming server. + // |src_dir| is the directory on the workstation to mount. + // |data_store_reader| is responsible for loading content by ID. + // |file_chunks| is used for mapping data chunk ids to file locations. + AssetStreamServer(std::string src_dir, DataStoreReader* data_store_reader, + FileChunkMap* file_chunks); +}; + +} // namespace cdc_ft + +#endif // ASSET_STREAM_MANAGER_ASSET_STREAM_SERVER_H_ diff --git a/asset_stream_manager/background_service_impl.cc b/asset_stream_manager/background_service_impl.cc new file mode 100644 index 0000000..2099baa --- /dev/null +++ b/asset_stream_manager/background_service_impl.cc @@ -0,0 +1,56 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "asset_stream_manager/background_service_impl.h" + +#include "common/grpc_status.h" +#include "common/log.h" +#include "common/util.h" +#include "grpcpp/grpcpp.h" + +namespace cdc_ft { + +BackgroundServiceImpl::BackgroundServiceImpl() {} + +BackgroundServiceImpl::~BackgroundServiceImpl() = default; + +void BackgroundServiceImpl::SetExitCallback(ExitCallback exit_callback) { + exit_callback_ = std::move(exit_callback); +} + +grpc::Status BackgroundServiceImpl::Exit(grpc::ServerContext* context, + const ExitRequest* request, + ExitResponse* response) { + LOG_INFO("RPC:Exit"); + if (exit_callback_) { + return ToGrpcStatus(exit_callback_()); + } + return grpc::Status::OK; +} + +grpc::Status BackgroundServiceImpl::GetPid(grpc::ServerContext* context, + const GetPidRequest* request, + GetPidResponse* response) { + LOG_INFO("RPC:GetPid"); + response->set_pid(static_cast(Util::GetPid())); + return grpc::Status::OK; +} + +grpc::Status BackgroundServiceImpl::HealthCheck(grpc::ServerContext* context, + const EmptyProto* request, + EmptyProto* response) { + return grpc::Status::OK; +} + +} // namespace cdc_ft diff --git a/asset_stream_manager/background_service_impl.h b/asset_stream_manager/background_service_impl.h new file mode 100644 index 0000000..05b31a6 --- /dev/null +++ b/asset_stream_manager/background_service_impl.h @@ -0,0 +1,68 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ASSET_STREAM_MANAGER_BACKGROUND_SERVICE_IMPL_H_ +#define ASSET_STREAM_MANAGER_BACKGROUND_SERVICE_IMPL_H_ + +#include "absl/status/status.h" +#include "asset_stream_manager/background_service_impl.h" +#include "asset_stream_manager/session_management_server.h" +#include "grpcpp/grpcpp.h" +#include "proto/background_service.grpc.pb.h" + +namespace cdc_ft { + +// Implements a service to manage a background process as a server. +// The corresponding client is implemented by ProcessManager. The background +// process in this case is asset_stream_manager. ProcessManager starts the +// process on demand (e.g. when `ggp instance mount --local-dir` is invoked) and +// manages its lifetime: It calls GetPid() initially, HealthCheck() periodically +// to monitor the process, and Exit() on shutdown. +// This service is owned by SessionManagementServer. +class BackgroundServiceImpl final + : public backgroundservice::BackgroundService::Service { + public: + using ExitRequest = backgroundservice::ExitRequest; + using ExitResponse = backgroundservice::ExitResponse; + using GetPidRequest = backgroundservice::GetPidRequest; + using GetPidResponse = backgroundservice::GetPidResponse; + using EmptyProto = google::protobuf::Empty; + + BackgroundServiceImpl(); + ~BackgroundServiceImpl(); + + // Exit callback gets called from the Exit() RPC. + using ExitCallback = std::function; + void SetExitCallback(ExitCallback exit_callback); + + grpc::Status Exit(grpc::ServerContext* context, const ExitRequest* request, + ExitResponse* response) override; + + grpc::Status GetPid(grpc::ServerContext* context, + const GetPidRequest* request, + GetPidResponse* response) override; + + grpc::Status HealthCheck(grpc::ServerContext* context, + const EmptyProto* request, + EmptyProto* response) override; + + private: + ExitCallback exit_callback_; +}; + +} // namespace cdc_ft + +#endif // ASSET_STREAM_MANAGER_BACKGROUND_SERVICE_IMPL_H_ diff --git a/asset_stream_manager/cdc_fuse_manager.cc b/asset_stream_manager/cdc_fuse_manager.cc new file mode 100644 index 0000000..a43bb23 --- /dev/null +++ b/asset_stream_manager/cdc_fuse_manager.cc @@ -0,0 +1,225 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "asset_stream_manager/cdc_fuse_manager.h" + +#include "absl/strings/match.h" +#include "absl/strings/str_format.h" +#include "cdc_fuse_fs/constants.h" +#include "common/gamelet_component.h" +#include "common/log.h" +#include "common/path.h" +#include "common/status.h" +#include "common/status_macros.h" + +namespace cdc_ft { +namespace { + +constexpr char kFuseFilename[] = "cdc_fuse_fs"; +constexpr char kLibFuseFilename[] = "libfuse.so"; +constexpr char kFuseStdoutPrefix[] = "cdc_fuse_fs_stdout"; +constexpr char kRemoteToolsBinDir[] = "/opt/developer/tools/bin/"; + +// Mount point for FUSE on the gamelet. +constexpr char kMountDir[] = "/mnt/workstation"; + +// Cache directory on the gamelet to store data chunks. +constexpr char kCacheDir[] = "/var/cache/asset_streaming"; + +} // namespace + +CdcFuseManager::CdcFuseManager(std::string instance, + ProcessFactory* process_factory, + RemoteUtil* remote_util) + : instance_(std::move(instance)), + process_factory_(process_factory), + remote_util_(remote_util) {} + +CdcFuseManager::~CdcFuseManager() = default; + +absl::Status CdcFuseManager::Deploy() { + assert(!fuse_process_); + + LOG_INFO("Deploying FUSE..."); + + std::string exe_dir; + RETURN_IF_ERROR(path::GetExeDir(&exe_dir), "Failed to get exe directory"); + + std::string local_exe_path = path::Join(exe_dir, kFuseFilename); + std::string local_lib_path = path::Join(exe_dir, kLibFuseFilename); + +#ifdef _DEBUG + // Sync FUSE to the gamelet in debug. Debug builds are rather large, so + // there's a gain from using sync. + LOG_DEBUG("Syncing FUSE"); + RETURN_IF_ERROR( + remote_util_->Sync({local_exe_path, local_lib_path}, kRemoteToolsBinDir), + "Failed to sync FUSE to gamelet"); + LOG_DEBUG("Syncing FUSE succeeded"); +#else + // Copy FUSE to the gamelet. This is usually faster in production since it + // doesn't have to deploy ggp__server first. + LOG_DEBUG("Copying FUSE"); + RETURN_IF_ERROR(remote_util_->Scp({local_exe_path, local_lib_path}, + kRemoteToolsBinDir, true), + "Failed to copy FUSE to gamelet"); + LOG_DEBUG("Copying FUSE succeeded"); + + // Make FUSE executable. Note that sync does it automatically. + LOG_DEBUG("Making FUSE executable"); + std::string remotePath = path::JoinUnix(kRemoteToolsBinDir, kFuseFilename); + RETURN_IF_ERROR(remote_util_->Chmod("a+x", remotePath), + "Failed to set executable flag on FUSE"); + LOG_DEBUG("Making FUSE succeeded"); +#endif + + return absl::OkStatus(); +} + +absl::Status CdcFuseManager::Start(uint16_t local_port, uint16_t remote_port, + int verbosity, bool debug, + bool singlethreaded, bool enable_stats, + bool check, uint64_t cache_capacity, + uint32_t cleanup_timeout_sec, + uint32_t access_idle_timeout_sec) { + assert(!fuse_process_); + + // Gather stats for the FUSE gamelet component to determine whether a + // re-deploy is necessary. + std::string exe_dir; + RETURN_IF_ERROR(path::GetExeDir(&exe_dir), "Failed to get exe directory"); + std::vector components; + absl::Status status = + GameletComponent::Get({path::Join(exe_dir, kFuseFilename), + path::Join(exe_dir, kLibFuseFilename)}, + &components); + if (!status.ok()) { + return absl::NotFoundError(absl::StrFormat( + "Required gamelet component not found. Make sure the files %s and %s " + "reside in the same folder as stadia_assets_stream_manager_v3.exe.", + kFuseFilename, kLibFuseFilename)); + } + std::string component_args = GameletComponent::ToCommandLineArgs(components); + + // Build the remote command. + std::string remotePath = path::JoinUnix(kRemoteToolsBinDir, kFuseFilename); + std::string remote_command = absl::StrFormat( + "LD_LIBRARY_PATH=%s %s --instance='%s' " + "--components='%s' --port=%i --cache_dir=%s " + "--verbosity=%i --cleanup_timeout=%i --access_idle_timeout=%i --stats=%i " + "--check=%i --cache_capacity=%u -- -o allow_root -o ro -o nonempty -o " + "auto_unmount %s%s%s", + kRemoteToolsBinDir, remotePath, instance_, component_args, remote_port, + kCacheDir, verbosity, cleanup_timeout_sec, access_idle_timeout_sec, + enable_stats, check, cache_capacity, kMountDir, debug ? " -d" : "", + singlethreaded ? " -s" : ""); + + bool needs_deploy = false; + RETURN_IF_ERROR( + RunFuseProcess(local_port, remote_port, remote_command, &needs_deploy)); + if (needs_deploy) { + // Deploy and try again. + RETURN_IF_ERROR(Deploy()); + RETURN_IF_ERROR( + RunFuseProcess(local_port, remote_port, remote_command, &needs_deploy)); + } + + return absl::OkStatus(); +} + +absl::Status CdcFuseManager::RunFuseProcess(uint16_t local_port, + uint16_t remote_port, + const std::string& remote_command, + bool* needs_deploy) { + assert(!fuse_process_); + assert(needs_deploy); + *needs_deploy = false; + + LOG_DEBUG("Running FUSE process"); + ProcessStartInfo start_info = + remote_util_->BuildProcessStartInfoForSshPortForwardAndCommand( + local_port, remote_port, true, remote_command); + start_info.name = kFuseFilename; + + // Capture stdout to determine whether a deploy is required. + fuse_stdout_.clear(); + fuse_startup_finished_ = false; + start_info.stdout_handler = [this, needs_deploy](const char* data, + size_t size) { + return HandleFuseStdout(data, size, needs_deploy); + }; + fuse_process_ = process_factory_->Create(start_info); + RETURN_IF_ERROR(fuse_process_->Start(), "Failed to start FUSE process"); + LOG_DEBUG("FUSE process started. Waiting for startup to finish."); + + // Run until process exits or startup finishes. + auto startup_finished = [this]() { return fuse_startup_finished_.load(); }; + RETURN_IF_ERROR(fuse_process_->RunUntil(startup_finished), + "Failed to run FUSE process"); + LOG_DEBUG("FUSE process startup complete."); + + // If the FUSE process exited before it could perform its up-to-date check, it + // most likely happens because the binary does not exist and needs to be + // deployed. + *needs_deploy |= !fuse_startup_finished_ && fuse_process_->HasExited() && + fuse_process_->ExitCode() != 0; + if (*needs_deploy) { + LOG_DEBUG("FUSE needs to be (re-)deployed."); + fuse_process_.reset(); + return absl::OkStatus(); + } + + return absl::OkStatus(); +} + +absl::Status CdcFuseManager::Stop() { + if (!fuse_process_) { + return absl::OkStatus(); + } + + LOG_DEBUG("Terminating FUSE process"); + absl::Status status = fuse_process_->Terminate(); + fuse_process_.reset(); + return status; +} + +bool CdcFuseManager::IsHealthy() const { + return fuse_process_ && !fuse_process_->HasExited(); +} + +absl::Status CdcFuseManager::HandleFuseStdout(const char* data, size_t size, + bool* needs_deploy) { + assert(needs_deploy); + + // Don't capture stdout beyond startup. + if (!fuse_startup_finished_) { + fuse_stdout_.append(data, size); + // The gamelet component prints some magic strings to stdout to indicate + // whether it's up-to-date. + if (absl::StrContains(fuse_stdout_, kFuseUpToDate)) { + fuse_startup_finished_ = true; + } else if (absl::StrContains(fuse_stdout_, kFuseNotUpToDate)) { + fuse_startup_finished_ = true; + *needs_deploy = true; + } + } + + if (!remote_util_->Quiet()) { + // Forward to logging. + return LogOutput(kFuseStdoutPrefix, data, size); + } + return absl::OkStatus(); +} + +} // namespace cdc_ft diff --git a/asset_stream_manager/cdc_fuse_manager.h b/asset_stream_manager/cdc_fuse_manager.h new file mode 100644 index 0000000..f629da9 --- /dev/null +++ b/asset_stream_manager/cdc_fuse_manager.h @@ -0,0 +1,98 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ASSET_STREAM_MANAGER_CDC_FUSE_MANAGER_H_ +#define ASSET_STREAM_MANAGER_CDC_FUSE_MANAGER_H_ + +#include "absl/status/status.h" +#include "common/remote_util.h" + +namespace cdc_ft { + +class Process; +class ProcessFactory; +class RemoteUtil; + +// Manages the gamelet-side CDC FUSE filesystem process. +class CdcFuseManager { + public: + CdcFuseManager(std::string instance, ProcessFactory* process_factory, + RemoteUtil* remote_util); + ~CdcFuseManager(); + + CdcFuseManager(CdcFuseManager&) = delete; + CdcFuseManager& operator=(CdcFuseManager&) = delete; + + // Starts the CDC FUSE and establishes a reverse SSH tunnel from the gamelet's + // |remote_port| to the workstation's |local_port|. Deploys the binary if + // necessary. + // + // |verbosity| is the log verbosity used by the filesystem. + // |debug| puts the filesystem into debug mode if set to true. This also + // causes the process to run in the foreground, so that logs are piped through + // SSH to stdout of the workstation process. + // |singlethreaded| puts the filesystem into single-threaded mode if true. + // |enable_stats| determines whether FUSE should send debug statistics. + // |check| determines whether to execute FUSE consistency check. + // |cache_capacity| defines the cache capacity in bytes. + // |cleanup_timeout_sec| defines the data provider cleanup timeout in seconds. + // |access_idle_timeout_sec| defines the number of seconds after which data + // provider is considered to be access-idling. + absl::Status Start(uint16_t local_port, uint16_t remote_port, int verbosity, + bool debug, bool singlethreaded, bool enable_stats, + bool check, uint64_t cache_capacity, + uint32_t cleanup_timeout_sec, + uint32_t access_idle_timeout_sec); + + // Stops the CDC FUSE. + absl::Status Stop(); + + // Returns true if the FUSE process is running. + bool IsHealthy() const; + + private: + // Runs the FUSE process on the gamelet from the given |remote_command| and + // establishes a reverse SSH tunnel from the gamelet's |remote_port| to the + // workstation's |local_port|. + // + // If the FUSE is not up-to-date or does not exist, sets |needs_deploy| to + // true and returns OK. In that case, Deploy() needs to be called and the FUSE + // process should be run again. + absl::Status RunFuseProcess(uint16_t local_port, uint16_t remote_port, + const std::string& remote_command, + bool* needs_deploy); + + // Deploys the gamelet components. + absl::Status Deploy(); + + // Output handler for FUSE's stdout. Sets |needs_deploy| to true if the output + // contains a magic marker to indicate that the binary has to be redeployed. + // Called in a background thread. + absl::Status HandleFuseStdout(const char* data, size_t size, + bool* needs_deploy); + + std::string instance_; + ProcessFactory* const process_factory_; + RemoteUtil* const remote_util_; + + std::unique_ptr fuse_process_; + std::string fuse_stdout_; + std::atomic fuse_startup_finished_{false}; +}; + +} // namespace cdc_ft + +#endif // ASSET_STREAM_MANAGER_CDC_FUSE_MANAGER_H_ diff --git a/asset_stream_manager/grpc_asset_stream_server.cc b/asset_stream_manager/grpc_asset_stream_server.cc new file mode 100644 index 0000000..2f9b85c --- /dev/null +++ b/asset_stream_manager/grpc_asset_stream_server.cc @@ -0,0 +1,305 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "asset_stream_manager/grpc_asset_stream_server.h" + +#include "absl/strings/str_format.h" +#include "absl/time/time.h" +#include "common/grpc_status.h" +#include "common/log.h" +#include "common/path.h" +#include "common/status.h" +#include "common/status_macros.h" +#include "data_store/data_store_reader.h" +#include "grpcpp/grpcpp.h" +#include "manifest/file_chunk_map.h" +#include "proto/asset_stream_service.grpc.pb.h" + +namespace cdc_ft { +namespace { + +using GetContentRequest = proto::GetContentRequest; +using GetContentResponse = proto::GetContentResponse; +using SendCachedContentIdsRequest = proto::SendCachedContentIdsRequest; +using SendCachedContentIdsResponse = proto::SendCachedContentIdsResponse; +using AssetStreamService = proto::AssetStreamService; + +using GetManifestIdRequest = proto::GetManifestIdRequest; +using GetManifestIdResponse = proto::GetManifestIdResponse; +using AckManifestIdReceivedRequest = proto::AckManifestIdReceivedRequest; +using AckManifestIdReceivedResponse = proto::AckManifestIdReceivedResponse; +using ConfigStreamService = proto::ConfigStreamService; + +} // namespace + +class AssetStreamServiceImpl final : public AssetStreamService::Service { + public: + AssetStreamServiceImpl(std::string src_dir, + DataStoreReader* data_store_reader, + FileChunkMap* file_chunks, InstanceIdMap* instance_ids, + ContentSentHandler content_sent) + : src_dir_(std::move(src_dir)), + data_store_reader_(data_store_reader), + file_chunks_(file_chunks), + started_(absl::Now()), + instance_ids_(instance_ids), + content_sent_(content_sent) {} + + grpc::Status GetContent(grpc::ServerContext* context, + const GetContentRequest* request, + GetContentResponse* response) override { + // See if this is a data chunk first. The hash lookup is faster than the + // file lookup from the data store. + std::string rel_path; + uint64_t offset; + size_t size; + for (const ContentIdProto& id : request->id()) { + uint32_t uint32_size; + if (file_chunks_->Lookup(id, &rel_path, &offset, &uint32_size)) { + size = uint32_size; + // File data chunk. + RETURN_GRPC_IF_ERROR(ReadFromFile(id, rel_path, offset, uint32_size, + response->add_data())); + file_chunks_->RecordStreamedChunk(id, request->thread_id()); + } else { + // Manifest chunk. + RETURN_GRPC_IF_ERROR( + ReadFromDataStore(id, response->add_data(), &size)); + } + std::string instance_id = instance_ids_->Get(context->peer()); + if (content_sent_ != nullptr) { + content_sent_(size, 1, instance_id); + } + } + return grpc::Status::OK; + } + + grpc::Status SendCachedContentIds( + grpc::ServerContext* context, const SendCachedContentIdsRequest* request, + SendCachedContentIdsResponse* response) override { + for (const ContentIdProto& id : request->id()) + file_chunks_->RecordCachedChunk(id); + return grpc::Status::OK; + } + + private: + absl::Status ReadFromFile(const ContentIdProto& id, + const std::string& rel_path, uint64_t offset, + uint32_t size, std::string* data) { + std::string path = path::Join(src_dir_, rel_path); + path::FixPathSeparators(&path); + data->resize(size); + size_t read_size; + ASSIGN_OR_RETURN( + read_size, + path::ReadFile(path, const_cast(data->data()), offset, size), + "Failed to read chunk '%s', file '%s', offset %d, size %d", + ContentId::ToHexString(id), path, offset, size); + + absl::Time now = absl::Now(); + LOG_VERBOSE("'%s', %d, '%s', '%s', %u, %u", + absl::FormatTime("%H:%M:%S", now, absl::UTCTimeZone()), + absl::ToInt64Milliseconds(now - started_), + ContentId::ToHexString(id), path, offset, size); + + return absl::OkStatus(); + } + + absl::Status ReadFromDataStore(const ContentIdProto& id, std::string* data, + size_t* size) { + Buffer buf; + RETURN_IF_ERROR(data_store_reader_->Get(id, &buf), + "Failed to read chunk '%s'", ContentId::ToHexString(id)); + + // TODO: Get rid of copy after the Buffer uses std::string. + *data = std::string(buf.data(), buf.size()); + *size = buf.size(); + absl::Time now = absl::Now(); + LOG_VERBOSE("'%s', %d, '%s', %d", + absl::FormatTime("%H:%M:%S", now, absl::UTCTimeZone()), + absl::ToInt64Milliseconds(now - started_), + ContentId::ToHexString(id), buf.size()); + + return absl::OkStatus(); + } + + const std::string src_dir_; + DataStoreReader* const data_store_reader_; + FileChunkMap* const file_chunks_; + const absl::Time started_; + InstanceIdMap* instance_ids_; + ContentSentHandler content_sent_; +}; + +class ConfigStreamServiceImpl final : public ConfigStreamService::Service { + public: + explicit ConfigStreamServiceImpl(InstanceIdMap* instance_ids) + : instance_ids_(instance_ids) {} + ~ConfigStreamServiceImpl() { Shutdown(); } + + grpc::Status GetManifestId( + grpc::ServerContext* context, const GetManifestIdRequest* request, + ::grpc::ServerWriter* stream) override { + ContentIdProto local_id; + bool running = true; + do { + // Shutdown happened. + if (!WaitForUpdate(local_id)) { + break; + } + LOG_INFO("Sending updated manifest id '%s' to the gamelet", + ContentId::ToHexString(local_id)); + GetManifestIdResponse response; + *response.mutable_id() = local_id; + bool success = stream->Write(response); + if (!success) { + LOG_WARNING("Failed to send updated manifest id '%s'", + ContentId::ToHexString(local_id)); + } + absl::ReaderMutexLock lock(&mutex_); + running = running_; + } while (running); + return grpc::Status::OK; + } + + grpc::Status AckManifestIdReceived( + grpc::ServerContext* context, const AckManifestIdReceivedRequest* request, + AckManifestIdReceivedResponse* response) override { + // Associate the peer with the gamelet ID. + instance_ids_->Set(context->peer(), request->gamelet_id()); + absl::MutexLock lock(&mutex_); + acked_manifest_ids_[request->gamelet_id()] = request->manifest_id(); + return grpc::Status::OK; + } + + void SetManifestId(const ContentIdProto& id) ABSL_LOCKS_EXCLUDED(mutex_) { + LOG_INFO("Updating manifest id '%s' in configuration service", + ContentId::ToHexString(id)); + absl::MutexLock lock(&mutex_); + id_ = id; + } + + absl::Status WaitForManifestAck(const std::string& instance, + absl::Duration timeout) { + absl::MutexLock lock(&mutex_); + auto cond = [this, &instance]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) { + AckedManifestIdsMap::iterator iter = acked_manifest_ids_.find(instance); + return iter != acked_manifest_ids_.end() && id_ == iter->second; + }; + + if (!mutex_.AwaitWithTimeout(absl::Condition(&cond), timeout)) { + return absl::DeadlineExceededError(absl::StrFormat( + "Instance '%s' did not acknowledge reception of manifest", instance)); + } + + return absl::OkStatus(); + } + + void Shutdown() ABSL_LOCKS_EXCLUDED(mutex_) { + absl::MutexLock lock(&mutex_); + if (running_) { + LOG_INFO("Shutting down configuration service"); + running_ = false; + } + } + + ContentIdProto GetStoredManifestId() const ABSL_LOCKS_EXCLUDED(mutex_) { + absl::MutexLock lock(&mutex_); + return id_; + } + + private: + // Returns false if the update process was cancelled. + bool WaitForUpdate(ContentIdProto& local_id) ABSL_LOCKS_EXCLUDED(mutex_) { + absl::MutexLock lock(&mutex_); + auto cond = [&]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) { + return !running_ || local_id != id_; + }; + mutex_.Await(absl::Condition(&cond)); + local_id = id_; + return running_; + } + + mutable absl::Mutex mutex_; + ContentIdProto id_ ABSL_GUARDED_BY(mutex_); + bool running_ ABSL_GUARDED_BY(mutex_) = true; + InstanceIdMap* instance_ids_; + + // Maps instance ids to the last acknowledged manifest id. + using AckedManifestIdsMap = std::unordered_map; + AckedManifestIdsMap acked_manifest_ids_ ABSL_GUARDED_BY(mutex_); +}; + +GrpcAssetStreamServer::GrpcAssetStreamServer(std::string src_dir, + DataStoreReader* data_store_reader, + FileChunkMap* file_chunks, + ContentSentHandler content_sent) + : AssetStreamServer(src_dir, data_store_reader, file_chunks), + asset_stream_service_(std::make_unique( + std::move(src_dir), data_store_reader, file_chunks, &instance_ids_, + content_sent)), + config_stream_service_( + std::make_unique(&instance_ids_)) {} + +GrpcAssetStreamServer::~GrpcAssetStreamServer() = default; + +absl::Status GrpcAssetStreamServer::Start(int port) { + assert(!server_); + + std::string server_address = absl::StrFormat("localhost:%i", port); + grpc::ServerBuilder builder; + int selected_port = 0; + builder.AddListeningPort(server_address, grpc::InsecureServerCredentials(), + &selected_port); + builder.RegisterService(asset_stream_service_.get()); + builder.RegisterService(config_stream_service_.get()); + server_ = builder.BuildAndStart(); + if (selected_port != port) { + return MakeStatus( + "Failed to start streaming server: Could not listen on port %i. Is the " + "port in use?", + port); + } + if (!server_) return MakeStatus("Failed to start streaming server"); + LOG_INFO("Streaming server listening on '%s'", server_address); + return absl::OkStatus(); +} + +void GrpcAssetStreamServer::SetManifestId(const ContentIdProto& manifest_id) { + LOG_INFO("Setting manifest id '%s'", ContentId::ToHexString(manifest_id)); + assert(config_stream_service_); + config_stream_service_->SetManifestId(manifest_id); +} + +absl::Status GrpcAssetStreamServer::WaitForManifestAck( + const std::string& instance, absl::Duration timeout) { + assert(config_stream_service_); + return config_stream_service_->WaitForManifestAck(instance, timeout); +} + +void GrpcAssetStreamServer::Shutdown() { + assert(config_stream_service_); + config_stream_service_->Shutdown(); + if (server_) { + server_->Shutdown(); + server_->Wait(); + } +} + +ContentIdProto GrpcAssetStreamServer::GetManifestId() const { + assert(config_stream_service_); + return config_stream_service_->GetStoredManifestId(); +} + +} // namespace cdc_ft diff --git a/asset_stream_manager/grpc_asset_stream_server.h b/asset_stream_manager/grpc_asset_stream_server.h new file mode 100644 index 0000000..619f3cc --- /dev/null +++ b/asset_stream_manager/grpc_asset_stream_server.h @@ -0,0 +1,69 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ASSET_STREAM_MANAGER_GRPC_ASSET_STREAM_SERVER_H_ +#define ASSET_STREAM_MANAGER_GRPC_ASSET_STREAM_SERVER_H_ + +#include +#include + +#include "asset_stream_manager/asset_stream_server.h" +#include "common/thread_safe_map.h" + +namespace grpc { +class Server; +} + +namespace cdc_ft { + +using InstanceIdMap = ThreadSafeMap; + +class AssetStreamServiceImpl; +class ConfigStreamServiceImpl; + +// gRpc server for streaming assets to one or more gamelets. +class GrpcAssetStreamServer : public AssetStreamServer { + public: + // Creates a new asset streaming gRpc server. + GrpcAssetStreamServer(std::string src_dir, DataStoreReader* data_store_reader, + FileChunkMap* file_chunks, + ContentSentHandler content_sent); + + ~GrpcAssetStreamServer(); + + // AssetStreamServer: + + absl::Status Start(int port) override; + + void SetManifestId(const ContentIdProto& manifest_id) override; + + absl::Status WaitForManifestAck(const std::string& instance, + absl::Duration timeout) override; + + void Shutdown() override; + + ContentIdProto GetManifestId() const override; + + private: + InstanceIdMap instance_ids_; + const std::unique_ptr asset_stream_service_; + const std::unique_ptr config_stream_service_; + std::unique_ptr server_; +}; + +} // namespace cdc_ft + +#endif // ASSET_STREAM_MANAGER_GRPC_ASSET_STREAM_SERVER_H_ diff --git a/asset_stream_manager/local_assets_stream_manager_service_impl.cc b/asset_stream_manager/local_assets_stream_manager_service_impl.cc new file mode 100644 index 0000000..86ab715 --- /dev/null +++ b/asset_stream_manager/local_assets_stream_manager_service_impl.cc @@ -0,0 +1,259 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "asset_stream_manager/local_assets_stream_manager_service_impl.h" + +#include + +#include "absl/strings/str_format.h" +#include "absl/strings/str_split.h" +#include "asset_stream_manager/multi_session.h" +#include "asset_stream_manager/session_manager.h" +#include "common/grpc_status.h" +#include "common/log.h" +#include "common/path.h" +#include "common/process.h" +#include "common/sdk_util.h" +#include "common/status.h" +#include "manifest/manifest_updater.h" + +namespace cdc_ft { +namespace { + +// Parses |instance_name| of the form +// "organizations/{org-id}/projects/{proj-id}/pools/{pool-id}/gamelets/{gamelet-id}" +// into parts. The pool id is not returned. +bool ParseInstanceName(const std::string& instance_name, + std::string* instance_id, std::string* project_id, + std::string* organization_id) { + std::string pool_id; + std::vector parts = absl::StrSplit(instance_name, '/'); + if (parts.size() != 10) return false; + if (parts[0] != "organizations" || parts[1].empty()) return false; + if (parts[2] != "projects" || parts[3].empty()) return false; + if (parts[4] != "pools" || parts[5].empty()) return false; + // Instance id is e.g. + // edge/e-europe-west3-b/49d010c7be1845ac9a19a9033c64a460ces1 + if (parts[6] != "gamelets" || parts[7].empty() || parts[8].empty() || + parts[9].empty()) + return false; + *organization_id = parts[1]; + *project_id = parts[3]; + *instance_id = absl::StrFormat("%s/%s/%s", parts[7], parts[8], parts[9]); + return true; +} + +// Parses |data| line by line for "|key|: value" and puts the first instance in +// |value| if present. Returns false if |data| does not contain "|key|: value". +// Trims whitespace. +bool ParseValue(const std::string& data, const std::string& key, + std::string* value) { + std::istringstream stream(data); + + std::string line; + while (std::getline(stream, line)) { + if (line.find(key + ":") == 0) { + // Trim value. + size_t start_pos = key.size() + 1; + while (start_pos < line.size() && isspace(line[start_pos])) { + start_pos++; + } + size_t end_pos = line.size(); + while (end_pos > start_pos && isspace(line[end_pos - 1])) { + end_pos--; + } + *value = line.substr(start_pos, end_pos - start_pos); + return true; + } + } + return false; +} + +// Why oh why? +std::string Quoted(const std::string& s) { + std::ostringstream ss; + ss << std::quoted(s); + return ss.str(); +} + +} // namespace + +LocalAssetsStreamManagerServiceImpl::LocalAssetsStreamManagerServiceImpl( + SessionManager* session_manager, ProcessFactory* process_factory, + metrics::MetricsService* metrics_service) + : session_manager_(session_manager), + process_factory_(process_factory), + metrics_service_(metrics_service) {} + +LocalAssetsStreamManagerServiceImpl::~LocalAssetsStreamManagerServiceImpl() = + default; + +grpc::Status LocalAssetsStreamManagerServiceImpl::StartSession( + grpc::ServerContext* /*context*/, const StartSessionRequest* request, + StartSessionResponse* /*response*/) { + LOG_INFO("RPC:StartSession(gamelet_name='%s', workstation_directory='%s'", + request->gamelet_name(), request->workstation_directory()); + + metrics::DeveloperLogEvent evt; + evt.as_manager_data = std::make_unique(); + evt.as_manager_data->session_start_data = + std::make_unique(); + evt.as_manager_data->session_start_data->absl_status = absl::StatusCode::kOk; + evt.as_manager_data->session_start_data->status = + metrics::SessionStartStatus::kOk; + evt.as_manager_data->session_start_data->origin = + ConvertOrigin(request->origin()); + + // Parse instance/project/org id. + absl::Status status; + MultiSession* ms = nullptr; + std::string instance_id, project_id, organization_id, instance_ip; + uint16_t instance_port = 0; + if (!ParseInstanceName(request->gamelet_name(), &instance_id, &project_id, + &organization_id)) { + status = absl::InvalidArgumentError(absl::StrFormat( + "Failed to parse instance name '%s'", request->gamelet_name())); + } else { + evt.project_id = project_id; + evt.organization_id = organization_id; + + status = InitSsh(instance_id, project_id, organization_id, &instance_ip, + &instance_port); + + if (status.ok()) { + status = session_manager_->StartSession( + instance_id, project_id, organization_id, instance_ip, instance_port, + request->workstation_directory(), &ms, + &evt.as_manager_data->session_start_data->status); + } + } + + evt.as_manager_data->session_start_data->absl_status = status.code(); + if (ms) { + evt.as_manager_data->session_start_data->concurrent_session_count = + ms->GetSessionCount(); + if (!instance_id.empty() && ms->HasSessionForInstance(instance_id)) { + ms->RecordSessionEvent(std::move(evt), metrics::EventType::kSessionStart, + instance_id); + } else { + ms->RecordMultiSessionEvent(std::move(evt), + metrics::EventType::kSessionStart); + } + } else { + metrics_service_->RecordEvent(std::move(evt), + metrics::EventType::kSessionStart); + } + + if (status.ok()) { + LOG_INFO("StartSession() succeeded"); + } else { + LOG_ERROR("StartSession() failed: %s", status.ToString()); + } + return ToGrpcStatus(status); +} + +grpc::Status LocalAssetsStreamManagerServiceImpl::StopSession( + grpc::ServerContext* /*context*/, const StopSessionRequest* request, + StopSessionResponse* /*response*/) { + LOG_INFO("RPC:StopSession(gamelet_id='%s')", request->gamelet_id()); + + absl::Status status = session_manager_->StopSession(request->gamelet_id()); + if (status.ok()) { + LOG_INFO("StopSession() succeeded"); + } else { + LOG_ERROR("StopSession() failed: %s", status.ToString()); + } + return ToGrpcStatus(status); +} + +metrics::RequestOrigin LocalAssetsStreamManagerServiceImpl::ConvertOrigin( + StartSessionRequestOrigin origin) const { + switch (origin) { + case StartSessionRequest::ORIGIN_UNKNOWN: + return metrics::RequestOrigin::kUnknown; + case StartSessionRequest::ORIGIN_CLI: + return metrics::RequestOrigin::kCli; + case StartSessionRequest::ORIGIN_PARTNER_PORTAL: + return metrics::RequestOrigin::kPartnerPortal; + default: + return metrics::RequestOrigin::kUnknown; + } +} + +absl::Status LocalAssetsStreamManagerServiceImpl::InitSsh( + const std::string& instance_id, const std::string& project_id, + const std::string& organization_id, std::string* instance_ip, + uint16_t* instance_port) { + SdkUtil sdk_util; + instance_ip->clear(); + *instance_port = 0; + + ProcessStartInfo start_info; + start_info.command = absl::StrFormat( + "%s ssh init", path::Join(sdk_util.GetDevBinPath(), "ggp")); + start_info.command += absl::StrFormat(" --instance %s", Quoted(instance_id)); + if (!project_id.empty()) { + start_info.command += absl::StrFormat(" --project %s", Quoted(project_id)); + } + if (!organization_id.empty()) { + start_info.command += + absl::StrFormat(" --organization %s", Quoted(organization_id)); + } + start_info.name = "ggp ssh init"; + + std::string output; + start_info.stdout_handler = [&output, this](const char* data, + size_t data_size) { + // Note: This is called from a background thread! + output.append(data, data_size); + return absl::OkStatus(); + }; + start_info.forward_output_to_log = true; + + std::unique_ptr process = process_factory_->Create(start_info); + absl::Status status = process->Start(); + if (!status.ok()) { + return WrapStatus(status, "Failed to start ggp process"); + } + + status = process->RunUntilExit(); + if (!status.ok()) { + return WrapStatus(status, "Failed to run ggp process"); + } + + uint32_t exit_code = process->ExitCode(); + if (exit_code != 0) { + return MakeStatus("ggp process exited with code %u", exit_code); + } + + // Parse gamelet IP. Should be "Host: ". + if (!ParseValue(output, "Host", instance_ip)) { + return MakeStatus("Failed to parse host from ggp ssh init response\n%s", + output); + } + + // Parse ssh port. Should be "Port: ". + std::string port_string; + const bool result = ParseValue(output, "Port", &port_string); + int int_port = atoi(port_string.c_str()); + if (!result || int_port == 0 || int_port <= 0 || int_port > UINT_MAX) { + return MakeStatus("Failed to parse ssh port from ggp ssh init response\n%s", + output); + } + + *instance_port = static_cast(int_port); + return absl::OkStatus(); +} + +} // namespace cdc_ft diff --git a/asset_stream_manager/local_assets_stream_manager_service_impl.h b/asset_stream_manager/local_assets_stream_manager_service_impl.h new file mode 100644 index 0000000..b109695 --- /dev/null +++ b/asset_stream_manager/local_assets_stream_manager_service_impl.h @@ -0,0 +1,90 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ASSET_STREAM_MANAGER_LOCAL_ASSETS_STREAM_MANAGER_SERVICE_IMPL_H_ +#define ASSET_STREAM_MANAGER_LOCAL_ASSETS_STREAM_MANAGER_SERVICE_IMPL_H_ + +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "asset_stream_manager/session_config.h" +#include "metrics/metrics.h" +#include "proto/local_assets_stream_manager.grpc.pb.h" + +namespace cdc_ft { + +class MultiSession; +class ProcessFactory; +class SessionManager; + +// Implements a service to start and stop streaming sessions as a server. +// The corresponding clients are implemented by the ggp CLI and SDK Proxy. +// The CLI triggers StartSession() from `ggp instance mount --local-dir` and +// StopSession() from `ggp instance unmount`. SDK Proxy invokes StartSession() +// when a user starts a new game from the partner portal and sets an `Asset +// streaming directory` in the `Advanced settings` in the `Play settings` +// dialog. +// This service is owned by SessionManagementServer. +class LocalAssetsStreamManagerServiceImpl final + : public localassetsstreammanager::LocalAssetsStreamManager::Service { + public: + using StartSessionRequest = localassetsstreammanager::StartSessionRequest; + using StartSessionRequestOrigin = + localassetsstreammanager::StartSessionRequest_Origin; + using StartSessionResponse = localassetsstreammanager::StartSessionResponse; + using StopSessionRequest = localassetsstreammanager::StopSessionRequest; + using StopSessionResponse = localassetsstreammanager::StopSessionResponse; + + LocalAssetsStreamManagerServiceImpl( + SessionManager* session_manager, ProcessFactory* process_factory, + metrics::MetricsService* const metrics_service); + ~LocalAssetsStreamManagerServiceImpl(); + + // Starts a streaming session from path |request->workstation_directory()| to + // the instance with id |request->gamelet_id()|. Stops an existing session + // if it exists. + grpc::Status StartSession(grpc::ServerContext* context, + const StartSessionRequest* request, + StartSessionResponse* response) override + ABSL_LOCKS_EXCLUDED(sessions_mutex_); + + // Stops the streaming session to the instance with id + // |request->gamelet_id()|. Returns a NotFound error if no session exists. + grpc::Status StopSession(grpc::ServerContext* context, + const StopSessionRequest* request, + StopSessionResponse* response) override + ABSL_LOCKS_EXCLUDED(sessions_mutex_); + + private: + // Convert StartSessionRequest enum to metrics enum. + metrics::RequestOrigin ConvertOrigin(StartSessionRequestOrigin origin) const; + + // Initializes an ssh connection to a gamelet by calling 'ggp ssh init'. + // |instance_id| must be set, |project_id|, |organization_id| are optional. + // Returns |instance_ip| and |instance_port| (SSH port). + absl::Status InitSsh(const std::string& instance_id, + const std::string& project_id, + const std::string& organization_id, + std::string* instance_ip, uint16_t* instance_port); + + const SessionConfig cfg_; + SessionManager* const session_manager_; + ProcessFactory* const process_factory_; + metrics::MetricsService* const metrics_service_; +}; + +} // namespace cdc_ft + +#endif // ASSET_STREAM_MANAGER_LOCAL_ASSETS_STREAM_MANAGER_SERVICE_IMPL_H_ diff --git a/asset_stream_manager/main.cc b/asset_stream_manager/main.cc new file mode 100644 index 0000000..8fc4f3c --- /dev/null +++ b/asset_stream_manager/main.cc @@ -0,0 +1,182 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "absl_helper/jedec_size_flag.h" +#include "asset_stream_manager/asset_stream_config.h" +#include "asset_stream_manager/background_service_impl.h" +#include "asset_stream_manager/local_assets_stream_manager_service_impl.h" +#include "asset_stream_manager/session_management_server.h" +#include "asset_stream_manager/session_manager.h" +#include "common/log.h" +#include "common/path.h" +#include "common/process.h" +#include "common/sdk_util.h" +#include "common/status_macros.h" +#include "data_store/data_provider.h" +#include "data_store/disk_data_store.h" +#include "metrics/metrics.h" + +namespace cdc_ft { +namespace { + +constexpr int kSessionManagementPort = 44432; + +absl::Status Run(const AssetStreamConfig& cfg) { + WinProcessFactory process_factory; + metrics::MetricsService metrics_service; + + SessionManager session_manager(cfg.session_cfg(), &process_factory, + &metrics_service); + BackgroundServiceImpl background_service; + LocalAssetsStreamManagerServiceImpl session_service( + &session_manager, &process_factory, &metrics_service); + + SessionManagementServer sm_server(&session_service, &background_service, + &session_manager); + background_service.SetExitCallback( + [&sm_server]() { return sm_server.Shutdown(); }); + + RETURN_IF_ERROR(sm_server.Start(kSessionManagementPort)); + if (!cfg.src_dir().empty()) { + MultiSession* ms_unused; + metrics::SessionStartStatus status_unused; + RETURN_IF_ERROR(session_manager.StartSession( + /*instance_id=*/cfg.instance_ip(), /*project_id=*/std::string(), + /*organization_id=*/std::string(), cfg.instance_ip(), + cfg.instance_port(), cfg.src_dir(), &ms_unused, &status_unused)); + } + sm_server.RunUntilShutdown(); + return absl::OkStatus(); +} + +void InitLogging(bool log_to_stdout, int verbosity) { + LogLevel level = cdc_ft::Log::VerbosityToLogLevel(verbosity); + if (log_to_stdout) { + cdc_ft::Log::Initialize(std::make_unique(level)); + } else { + SdkUtil util; + cdc_ft::Log::Initialize(std::make_unique( + level, util.GetLogPath("assets_stream_manager_v3").c_str())); + } +} + +// Declare AS20 flags, so that AS30 can be used on older SDKs simply by +// replacing the binary. Note that the RETIRED_FLAGS macro can't be used +// because the flags contain dashes. This code mimics the macro. +absl::flags_internal::RetiredFlag RETIRED_FLAGS_port; +absl::flags_internal::RetiredFlag RETIRED_FLAGS_session_ports; +absl::flags_internal::RetiredFlag RETIRED_FLAGS_gm_mount_point; +absl::flags_internal::RetiredFlag RETIRED_FLAGS_allow_edge; +const auto RETIRED_FLAGS_REG_port = + (RETIRED_FLAGS_port.Retire("port"), + ::absl::flags_internal::FlagRegistrarEmpty{}); +const auto RETIRED_FLAGS_REG_session_ports = + (RETIRED_FLAGS_session_ports.Retire("session-ports"), + ::absl::flags_internal::FlagRegistrarEmpty{}); +const auto RETIRED_FLAGS_REG_gm_mount_point = + (RETIRED_FLAGS_gm_mount_point.Retire("gamelet-mount-point"), + ::absl::flags_internal::FlagRegistrarEmpty{}); +const auto RETIRED_FLAGS_REG_allow_edge = + (RETIRED_FLAGS_allow_edge.Retire("allow-edge"), + ::absl::flags_internal::FlagRegistrarEmpty{}); + +} // namespace +} // namespace cdc_ft + +ABSL_FLAG(std::string, src_dir, "", + "Start a streaming session immediately from the given Windows path. " + "Used during development. Must have exactly one gamelet reserved or " + "specify the target gamelet with --instance."); +ABSL_FLAG(std::string, instance_ip, "", + "Connect to the instance with the given IP address for this session. " + "This flag is ignored unless --src_dir is set as well. Used " + "during development. "); +ABSL_FLAG(uint16_t, instance_port, 0, + "Connect to the instance through the given SSH port. " + "This flag is ignored unless --src_dir is set as well. Used " + "during development. "); +ABSL_FLAG(int, verbosity, 2, "Verbosity of the log output"); +ABSL_FLAG(bool, debug, false, "Run FUSE filesystem in debug mode"); +ABSL_FLAG(bool, singlethreaded, false, + "Run FUSE filesystem in singlethreaded mode"); +ABSL_FLAG(bool, stats, false, + "Collect and print detailed streaming statistics"); +ABSL_FLAG(bool, quiet, false, + "Do not print any output except errors and stats"); +ABSL_FLAG(int, manifest_updater_threads, 4, + "Number of threads used to compute file hashes on the workstation."); +ABSL_FLAG(int, file_change_wait_duration_ms, 500, + "Time in milliseconds to wait until pushing a file change to the " + "instance after detecting it."); +ABSL_FLAG(bool, check, false, "Check FUSE consistency and log check results"); +ABSL_FLAG(bool, log_to_stdout, false, "Log to stdout instead of to a file"); +ABSL_FLAG(cdc_ft::JedecSize, cache_capacity, + cdc_ft::JedecSize(cdc_ft::DiskDataStore::kDefaultCapacity), + "Cache capacity. Supports common unit suffixes K, M, G."); +ABSL_FLAG(uint32_t, cleanup_timeout, cdc_ft::DataProvider::kCleanupTimeoutSec, + "Period in seconds at which instance cache cleanups are run"); +ABSL_FLAG(uint32_t, access_idle_timeout, cdc_ft::DataProvider::kAccessIdleSec, + "Do not run instance cache cleanups for this many seconds after the " + "last file access"); + +int main(int argc, char* argv[]) { + absl::ParseCommandLine(argc, argv); + + // Set up config. Allow overriding this config with + // %APPDATA%\GGP\services\assets_stream_manager_v3.json. + cdc_ft::SdkUtil sdk_util; + const std::string config_path = cdc_ft::path::Join( + sdk_util.GetServicesConfigPath(), "assets_stream_manager_v3.json"); + cdc_ft::AssetStreamConfig cfg; + absl::Status cfg_load_status = cfg.LoadFromFile(config_path); + + cdc_ft::InitLogging(cfg.log_to_stdout(), cfg.session_cfg().verbosity); + + // Log status of loaded configuration. Errors are not critical. + if (cfg_load_status.ok()) { + LOG_INFO("Successfully loaded configuration file at '%s'", config_path); + } else if (absl::IsNotFound(cfg_load_status)) { + LOG_INFO("No configuration file found at '%s'", config_path); + } else { + LOG_ERROR("%s", cfg_load_status.message()); + } + + std::string flags_read = cfg.GetFlagsReadFromFile(); + if (!flags_read.empty()) { + LOG_INFO( + "The following settings were read from the configuration file and " + "override the corresponding command line flags if set: %s", + flags_read); + } + + std::string flag_errors = cfg.GetFlagReadErrors(); + if (!flag_errors.empty()) { + LOG_WARNING("%s", flag_errors); + } + + LOG_DEBUG("Configuration:\n%s", cfg.ToString()); + + absl::Status status = cdc_ft::Run(cfg); + if (!status.ok()) { + LOG_ERROR("%s", status.ToString()); + } else { + LOG_INFO("Asset stream manager shut down successfully."); + } + + cdc_ft::Log::Shutdown(); + static_assert(static_cast(absl::StatusCode::kOk) == 0, "kOk not 0"); + return static_cast(status.code()); +} diff --git a/asset_stream_manager/metrics_recorder.cc b/asset_stream_manager/metrics_recorder.cc new file mode 100644 index 0000000..b6373dc --- /dev/null +++ b/asset_stream_manager/metrics_recorder.cc @@ -0,0 +1,69 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "asset_stream_manager/metrics_recorder.h" + +#include "common/log.h" + +namespace cdc_ft { + +MetricsRecorder::MetricsRecorder(metrics::MetricsService* const metrics_service) + : metrics_service_(metrics_service) {} + +metrics::MetricsService* MetricsRecorder::GetMetricsService() const { + return metrics_service_; +} + +MultiSessionMetricsRecorder::MultiSessionMetricsRecorder( + metrics::MetricsService* const metrics_service) + : MetricsRecorder(metrics_service), + multisession_id_(Util::GenerateUniqueId()) {} + +MultiSessionMetricsRecorder::~MultiSessionMetricsRecorder() = default; + +void MultiSessionMetricsRecorder::RecordEvent(metrics::DeveloperLogEvent event, + metrics::EventType code) const { + if (!event.as_manager_data) { + event.as_manager_data = + std::make_unique(); + } + event.as_manager_data->multisession_id = multisession_id_; + metrics_service_->RecordEvent(std::move(event), code); +} + +SessionMetricsRecorder::SessionMetricsRecorder( + metrics::MetricsService* const metrics_service, + const std::string& multisession_id, const std::string& project_id, + const std::string& organization_id) + : MetricsRecorder(metrics_service), + multisession_id_(multisession_id), + project_id_(project_id), + organization_id_(organization_id), + session_id_(Util::GenerateUniqueId()) {} + +SessionMetricsRecorder::~SessionMetricsRecorder() = default; + +void SessionMetricsRecorder::RecordEvent(metrics::DeveloperLogEvent event, + metrics::EventType code) const { + if (!event.as_manager_data) { + event.as_manager_data = + std::make_unique(); + } + event.as_manager_data->multisession_id = multisession_id_; + event.as_manager_data->session_id = session_id_; + event.project_id = project_id_; + event.organization_id = organization_id_; + metrics_service_->RecordEvent(std::move(event), code); +} +} // namespace cdc_ft diff --git a/asset_stream_manager/metrics_recorder.h b/asset_stream_manager/metrics_recorder.h new file mode 100644 index 0000000..0c3b5b3 --- /dev/null +++ b/asset_stream_manager/metrics_recorder.h @@ -0,0 +1,77 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ASSET_STREAM_MANAGER_METRICS_RECORDER_H_ +#define ASSET_STREAM_MANAGER_METRICS_RECORDER_H_ + +#include "absl/status/status.h" +#include "common/util.h" +#include "metrics/enums.h" +#include "metrics/messages.h" +#include "metrics/metrics.h" + +namespace cdc_ft { + +class MetricsRecorder { + public: + virtual void RecordEvent(metrics::DeveloperLogEvent event, + metrics::EventType code) const = 0; + + virtual metrics::MetricsService* GetMetricsService() const; + + protected: + explicit MetricsRecorder(metrics::MetricsService* const metrics_service); + metrics::MetricsService* const metrics_service_; +}; + +class MultiSessionMetricsRecorder : public MetricsRecorder { + public: + explicit MultiSessionMetricsRecorder( + metrics::MetricsService* const metrics_service); + ~MultiSessionMetricsRecorder(); + + virtual void RecordEvent(metrics::DeveloperLogEvent event, + metrics::EventType code) const; + + const std::string& MultiSessionId() const { return multisession_id_; } + + private: + std::string multisession_id_; +}; + +class SessionMetricsRecorder : public MetricsRecorder { + public: + explicit SessionMetricsRecorder( + metrics::MetricsService* const metrics_service, + const std::string& multisession_id, const std::string& project_id, + const std::string& organization_id); + ~SessionMetricsRecorder(); + + virtual void RecordEvent(metrics::DeveloperLogEvent event, + metrics::EventType code) const; + + const std::string& SessionId() const { return session_id_; } + + private: + std::string multisession_id_; + std::string session_id_; + std::string project_id_; + std::string organization_id_; +}; + +} // namespace cdc_ft + +#endif // ASSET_STREAM_MANAGER_METRICS_RECORDER_H_ diff --git a/asset_stream_manager/metrics_recorder_test.cc b/asset_stream_manager/metrics_recorder_test.cc new file mode 100644 index 0000000..706ae02 --- /dev/null +++ b/asset_stream_manager/metrics_recorder_test.cc @@ -0,0 +1,131 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "asset_stream_manager/metrics_recorder.h" + +#include "common/status_test_macros.h" +#include "gtest/gtest.h" +#include "metrics/metrics.h" + +namespace cdc_ft { +namespace { + +struct MetricsRecord { + MetricsRecord(metrics::DeveloperLogEvent dev_log_event, + metrics::EventType code) + : dev_log_event(std::move(dev_log_event)), code(code) {} + metrics::DeveloperLogEvent dev_log_event; + metrics::EventType code; +}; + +class MetricsServiceForTesting : public metrics::MetricsService { + public: + MetricsServiceForTesting() { + metrics_records_ = new std::vector(); + } + + ~MetricsServiceForTesting() { delete metrics_records_; } + + void RecordEvent(metrics::DeveloperLogEvent event, + metrics::EventType code) const override { + metrics_records_->push_back(MetricsRecord(std::move(event), code)); + } + + int NumberOfRecordRequests() { return (int)metrics_records_->size(); } + + std::vector GetEventsAndClear() { + return std::move(*metrics_records_); + } + + private: + std::vector* metrics_records_; +}; + +class MetricsRecorderTest : public ::testing::Test { + public: + void SetUp() override { metrics_service_ = new MetricsServiceForTesting(); } + + void TearDown() override { delete metrics_service_; } + + protected: + MetricsServiceForTesting* metrics_service_; +}; + +TEST_F(MetricsRecorderTest, SendEventWithMultisessionId) { + MultiSessionMetricsRecorder target(metrics_service_); + metrics::DeveloperLogEvent q_evt; + q_evt.project_id = "proj/id"; + q_evt.organization_id = "org/id"; + + target.RecordEvent(std::move(q_evt), metrics::EventType::kMultiSessionStart); + EXPECT_EQ(metrics_service_->NumberOfRecordRequests(), 1); + std::vector requests = metrics_service_->GetEventsAndClear(); + EXPECT_EQ(requests[0].code, metrics::EventType::kMultiSessionStart); + metrics::DeveloperLogEvent expected_evt; + expected_evt.project_id = "proj/id"; + expected_evt.organization_id = "org/id"; + expected_evt.as_manager_data = + std::make_unique(); + expected_evt.as_manager_data->multisession_id = target.MultiSessionId(); + EXPECT_EQ(requests[0].dev_log_event, expected_evt); + EXPECT_FALSE(target.MultiSessionId().empty()); + + q_evt = metrics::DeveloperLogEvent(); + q_evt.project_id = "proj/id"; + q_evt.organization_id = "org/id"; + target.RecordEvent(std::move(q_evt), metrics::EventType::kMultiSessionStart); + EXPECT_EQ(metrics_service_->NumberOfRecordRequests(), 1); + std::vector requests2 = metrics_service_->GetEventsAndClear(); + EXPECT_EQ(requests2[0].code, metrics::EventType::kMultiSessionStart); + EXPECT_EQ(requests2[0].dev_log_event, requests[0].dev_log_event); + + MultiSessionMetricsRecorder target2(metrics_service_); + EXPECT_NE(target2.MultiSessionId(), target.MultiSessionId()); +} + +TEST_F(MetricsRecorderTest, SendEventWithSessionId) { + SessionMetricsRecorder target(metrics_service_, "id1", "m_proj", "m_org"); + metrics::DeveloperLogEvent q_evt; + q_evt.project_id = "proj/id"; + q_evt.organization_id = "org/id"; + + target.RecordEvent(std::move(q_evt), metrics::EventType::kSessionStart); + EXPECT_EQ(metrics_service_->NumberOfRecordRequests(), 1); + std::vector requests = metrics_service_->GetEventsAndClear(); + EXPECT_EQ(requests[0].code, metrics::EventType::kSessionStart); + metrics::DeveloperLogEvent expected_evt; + expected_evt.project_id = "m_proj"; + expected_evt.organization_id = "m_org"; + expected_evt.as_manager_data = + std::make_unique(); + expected_evt.as_manager_data->multisession_id = "id1"; + expected_evt.as_manager_data->session_id = target.SessionId(); + EXPECT_EQ(requests[0].dev_log_event, expected_evt); + EXPECT_FALSE(target.SessionId().empty()); + + q_evt = metrics::DeveloperLogEvent(); + q_evt.project_id = "proj/id"; + q_evt.organization_id = "org/id"; + target.RecordEvent(std::move(q_evt), metrics::EventType::kSessionStart); + EXPECT_EQ(metrics_service_->NumberOfRecordRequests(), 1); + std::vector requests2 = metrics_service_->GetEventsAndClear(); + EXPECT_EQ(requests2[0].code, metrics::EventType::kSessionStart); + EXPECT_EQ(requests2[0].dev_log_event, requests[0].dev_log_event); + + SessionMetricsRecorder target2(metrics_service_, "id2", "m_proj", "m_org"); + EXPECT_NE(target2.SessionId(), target.SessionId()); +} + +} // namespace +} // namespace cdc_ft diff --git a/asset_stream_manager/multi_session.cc b/asset_stream_manager/multi_session.cc new file mode 100644 index 0000000..1769bee --- /dev/null +++ b/asset_stream_manager/multi_session.cc @@ -0,0 +1,699 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "asset_stream_manager/multi_session.h" + +#include "asset_stream_manager/session.h" +#include "common/file_watcher_win.h" +#include "common/log.h" +#include "common/path.h" +#include "common/platform.h" +#include "common/port_manager.h" +#include "common/process.h" +#include "common/util.h" +#include "data_store/disk_data_store.h" +#include "manifest/content_id.h" +#include "manifest/manifest_iterator.h" +#include "manifest/manifest_printer.h" +#include "manifest/manifest_proto_defs.h" +#include "metrics/enums.h" +#include "metrics/messages.h" + +namespace cdc_ft { +namespace { + +// Ports used by the asset streaming service for local port forwarding on +// workstation and gamelet. +constexpr int kAssetStreamPortFirst = 44433; +constexpr int kAssetStreamPortLast = 44442; + +// Stats output period (if enabled). +constexpr double kStatsPrintDelaySec = 0.1f; + +ManifestUpdater::Operator FileWatcherActionToOperation( + FileWatcherWin::FileAction action) { + switch (action) { + case FileWatcherWin::FileAction::kAdded: + return ManifestUpdater::Operator::kAdd; + case FileWatcherWin::FileAction::kModified: + return ManifestUpdater::Operator::kUpdate; + case FileWatcherWin::FileAction::kDeleted: + return ManifestUpdater::Operator::kDelete; + } + // The switch must cover all actions. + LOG_ERROR("Unhandled action: %d", static_cast(action)); + assert(false); + return ManifestUpdater::Operator::kAdd; +} + +// Converts |modified_files| (as returned from the file watcher) into an +// OperationList (as required by the manifest updater). +ManifestUpdater::OperationList GetFileOperations( + const FileWatcherWin::FileMap& modified_files) { + AssetInfo ai; + ManifestUpdater::OperationList ops; + ops.reserve(modified_files.size()); + for (const auto& [path, info] : modified_files) { + ai.path = path; + ai.type = info.is_dir ? AssetProto::DIRECTORY : AssetProto::FILE; + ai.size = info.size; + ai.mtime = info.mtime; + ops.emplace_back(FileWatcherActionToOperation(info.action), std::move(ai)); + } + return ops; +} + +} // namespace + +MultiSessionRunner::MultiSessionRunner( + std::string src_dir, DataStoreWriter* data_store, + ProcessFactory* process_factory, bool enable_stats, + absl::Duration wait_duration, uint32_t num_updater_threads, + MultiSessionMetricsRecorder const* metrics_recorder, + ManifestUpdatedCb manifest_updated_cb) + : src_dir_(std::move(src_dir)), + data_store_(data_store), + process_factory_(process_factory), + file_chunks_(enable_stats), + wait_duration_(wait_duration), + num_updater_threads_(num_updater_threads), + manifest_updated_cb_(std::move(manifest_updated_cb)), + metrics_recorder_(metrics_recorder) { + assert(metrics_recorder_); +} + +absl::Status MultiSessionRunner::Initialize(int port, + AssetStreamServerType type, + ContentSentHandler content_sent) { + // Start the server. + assert(!server_); + server_ = AssetStreamServer::Create(type, src_dir_, data_store_, + &file_chunks_, content_sent); + assert(server_); + RETURN_IF_ERROR(server_->Start(port), + "Failed to start asset stream server for '%s'", src_dir_); + + assert(!thread_); + thread_ = std::make_unique([this]() { Run(); }); + + return absl::OkStatus(); +} + +absl::Status MultiSessionRunner::Shutdown() { + // Send shutdown signal. + { + absl::MutexLock lock(&mutex_); + shutdown_ = true; + } + if (thread_) { + if (thread_->joinable()) thread_->join(); + thread_.reset(); + } + + // Shut down asset stream server. + if (server_) { + server_->Shutdown(); + server_.reset(); + } + + return status_; +} + +absl::Status MultiSessionRunner::WaitForManifestAck( + const std::string& instance_id, absl::Duration fuse_timeout) { + { + absl::MutexLock lock(&mutex_); + + LOG_INFO("Waiting for manifest to be available"); + auto cond = [this]() { return manifest_set_ || !status_.ok(); }; + mutex_.Await(absl::Condition(&cond)); + + if (!status_.ok()) + return WrapStatus(status_, "Failed to set up streaming session for '%s'", + src_dir_); + } + + LOG_INFO("Waiting for FUSE ack"); + assert(server_); + RETURN_IF_ERROR(server_->WaitForManifestAck(instance_id, fuse_timeout)); + + return absl::OkStatus(); +} + +absl::Status MultiSessionRunner::Status() { + absl::MutexLock lock(&mutex_); + return status_; +} + +ContentIdProto MultiSessionRunner::ManifestId() const { + assert(server_); + return server_->GetManifestId(); +} + +void MultiSessionRunner::Run() { + // Create the manifest updater. + UpdaterConfig cfg; + cfg.num_threads = num_updater_threads_; + cfg.src_dir = src_dir_; + ManifestUpdater manifest_updater(data_store_, std::move(cfg)); + + // Set up file watcher. + // The streamed path should be a directory and exist at the beginning. + FileWatcherWin watcher(src_dir_); + absl::Status status = watcher.StartWatching([this]() { OnFilesChanged(); }, + [this]() { OnDirRecreated(); }); + if (!status.ok()) { + SetStatus( + WrapStatus(status, "Failed to update manifest for '%s'", src_dir_)); + return; + } + + // Push an intermediate manifest containing the full directory structure, but + // potentially missing chunks. The purpose is that the FUSE can immediately + // show the structure and inode stats. FUSE will block on file reads that + // cannot be served due to missing chunks until the manifest is ready. + auto push_intermediate_manifest = [this](const ContentIdProto& manifest_id) { + SetManifest(manifest_id); + }; + + // Bring the manifest up to date. + LOG_INFO("Updating manifest for '%s'...", src_dir_); + Stopwatch sw; + status = + manifest_updater.UpdateAll(&file_chunks_, push_intermediate_manifest); + RecordManifestUpdate(manifest_updater, sw.Elapsed(), + metrics::UpdateTrigger::kInitUpdateAll, status); + if (!status.ok()) { + SetStatus( + WrapStatus(status, "Failed to update manifest for '%s'", src_dir_)); + return; + } + RecordMultiSessionStart(manifest_updater); + SetManifest(manifest_updater.ManifestId()); + LOG_INFO("Manifest for '%s' updated in %0.3f seconds", src_dir_, + sw.ElapsedSeconds()); + + while (!shutdown_) { + FileWatcherWin::FileMap modified_files; + bool clean_manifest = false; + { + // Wait for changes. + absl::MutexLock lock(&mutex_); + + bool prev_files_changed = files_changed_; + absl::Duration timeout = + absl::Seconds(file_chunks_.HasStats() ? kStatsPrintDelaySec : 3600.0); + if (files_changed_) { + timeout = std::max(wait_duration_ - files_changed_timer_.Elapsed(), + absl::Milliseconds(1)); + } else { + files_changed_timer_.Reset(); + } + auto cond = [this]() { + return shutdown_ || files_changed_ || dir_recreated_; + }; + mutex_.AwaitWithTimeout(absl::Condition(&cond), timeout); + + // If |files_changed_| became true, wait some more time before updating + // the manifest. + if (!prev_files_changed && files_changed_) files_changed_timer_.Reset(); + + // Shut down. + if (shutdown_) { + LOG_INFO("MultiSession('%s'): Shutting down", src_dir_); + break; + } + + // Pick up modified files. + if (!dir_recreated_ && files_changed_ && + files_changed_timer_.Elapsed() > wait_duration_) { + modified_files = watcher.GetModifiedFiles(); + files_changed_ = false; + files_changed_timer_.Reset(); + } + + if (dir_recreated_) { + clean_manifest = true; + dir_recreated_ = false; + } + } // mutex_ lock + + if (clean_manifest) { + LOG_DEBUG( + "Streamed directory '%s' was possibly re-created or not all changes " + "were detected, re-building the manifest", + src_dir_); + modified_files.clear(); + sw.Reset(); + status = manifest_updater.UpdateAll(&file_chunks_); + RecordManifestUpdate(manifest_updater, sw.Elapsed(), + metrics::UpdateTrigger::kRunningUpdateAll, status); + if (!status.ok()) { + LOG_WARNING( + "Updating manifest for '%s' after re-creating directory failed: " + "'%s'", + src_dir_, status.ToString()); + SetManifest(manifest_updater.DefaultManifestId()); + } else { + SetManifest(manifest_updater.ManifestId()); + } + } else if (!modified_files.empty()) { + ManifestUpdater::OperationList ops = GetFileOperations(modified_files); + sw.Reset(); + status = manifest_updater.Update(&ops, &file_chunks_); + RecordManifestUpdate(manifest_updater, sw.Elapsed(), + metrics::UpdateTrigger::kRegularUpdate, status); + if (!status.ok()) { + LOG_WARNING("Updating manifest for '%s' failed: %s", src_dir_, + status.ToString()); + SetManifest(manifest_updater.DefaultManifestId()); + } else { + SetManifest(manifest_updater.ManifestId()); + } + } + + // Update stats output. + file_chunks_.PrintStats(); + } +} + +void MultiSessionRunner::RecordManifestUpdate( + const ManifestUpdater& manifest_updater, absl::Duration duration, + metrics::UpdateTrigger trigger, absl::Status status) { + metrics::DeveloperLogEvent evt; + evt.as_manager_data = std::make_unique(); + evt.as_manager_data->manifest_update_data = + std::make_unique(); + evt.as_manager_data->manifest_update_data->local_duration_ms = + absl::ToInt64Milliseconds(duration); + evt.as_manager_data->manifest_update_data->status = status.code(); + evt.as_manager_data->manifest_update_data->trigger = trigger; + const UpdaterStats& stats = manifest_updater.Stats(); + evt.as_manager_data->manifest_update_data->total_assets_added_or_updated = + stats.total_assets_added_or_updated; + evt.as_manager_data->manifest_update_data->total_assets_deleted = + stats.total_assets_deleted; + evt.as_manager_data->manifest_update_data->total_chunks = stats.total_chunks; + evt.as_manager_data->manifest_update_data->total_files_added_or_updated = + stats.total_files_added_or_updated; + evt.as_manager_data->manifest_update_data->total_files_failed = + stats.total_files_failed; + evt.as_manager_data->manifest_update_data->total_processed_bytes = + stats.total_processed_bytes; + metrics_recorder_->RecordEvent(std::move(evt), + metrics::EventType::kManifestUpdated); +} + +void MultiSessionRunner::RecordMultiSessionStart( + const ManifestUpdater& manifest_updater) { + metrics::DeveloperLogEvent evt; + evt.as_manager_data = std::make_unique(); + evt.as_manager_data->multi_session_start_data = + std::make_unique(); + ManifestIterator manifest_iter(data_store_); + absl::Status status = manifest_iter.Open(manifest_updater.ManifestId()); + if (status.ok()) { + const AssetProto* entry = &manifest_iter.Manifest().root_dir(); + uint32_t file_count = 0; + uint64_t total_chunks = 0; + uint64_t total_processed_bytes = 0; + do { + if (entry->type() == AssetProto::FILE) { + ++file_count; + total_chunks += entry->file_chunks_size(); + total_processed_bytes += entry->file_size(); + for (const IndirectChunkListProto& icl : + entry->file_indirect_chunks()) { + ChunkListProto list; + status = data_store_->GetProto(icl.chunk_list_id(), &list); + if (status.ok()) { + total_chunks += list.chunks_size(); + } else { + LOG_WARNING("Could not get proto by id: '%s'. %s", + ContentId::ToHexString(icl.chunk_list_id()), + status.ToString()); + } + } + } + } while ((entry = manifest_iter.NextEntry()) != nullptr); + evt.as_manager_data->multi_session_start_data->file_count = file_count; + evt.as_manager_data->multi_session_start_data->chunk_count = total_chunks; + evt.as_manager_data->multi_session_start_data->byte_count = + total_processed_bytes; + } else { + LOG_WARNING("Could not open manifest by id: '%s'. %s", + ContentId::ToHexString(manifest_updater.ManifestId()), + status.ToString()); + } + evt.as_manager_data->multi_session_start_data->min_chunk_size = + static_cast(manifest_updater.Config().min_chunk_size); + evt.as_manager_data->multi_session_start_data->avg_chunk_size = + static_cast(manifest_updater.Config().avg_chunk_size); + evt.as_manager_data->multi_session_start_data->max_chunk_size = + static_cast(manifest_updater.Config().max_chunk_size); + metrics_recorder_->RecordEvent(std::move(evt), + metrics::EventType::kMultiSessionStart); +} + +void MultiSessionRunner::SetStatus(absl::Status status) + ABSL_LOCKS_EXCLUDED(mutex_) { + absl::MutexLock lock(&mutex_); + status_ = std::move(status); +} + +void MultiSessionRunner::OnFilesChanged() { + absl::MutexLock lock(&mutex_); + files_changed_ = true; +} + +void MultiSessionRunner::OnDirRecreated() { + absl::MutexLock lock(&mutex_); + dir_recreated_ = true; +} + +void MultiSessionRunner::SetManifest(const ContentIdProto& manifest_id) { + server_->SetManifestId(manifest_id); + if (Log::Instance()->GetLogLevel() <= LogLevel::kVerbose) { + ManifestPrinter printer; + ManifestProto manifest_proto; + absl::Status status = data_store_->GetProto(manifest_id, &manifest_proto); + std::string manifest_text; + printer.PrintToString(manifest_proto, &manifest_text); + if (status.ok()) { + LOG_DEBUG("Set manifest '%s'\n'%s'", ContentId::ToHexString(manifest_id), + manifest_text); + } else { + LOG_WARNING("Could not retrieve manifest from the data store '%s'", + ContentId::ToHexString(manifest_id)); + } + } + + // Notify thread that starts the streaming session that a manifest has been + // set. + absl::MutexLock lock(&mutex_); + manifest_set_ = true; + if (manifest_updated_cb_) { + manifest_updated_cb_(); + } +} + +MultiSession::MultiSession(std::string src_dir, SessionConfig cfg, + ProcessFactory* process_factory, + MultiSessionMetricsRecorder const* metrics_recorder, + std::unique_ptr data_store) + : src_dir_(src_dir), + cfg_(cfg), + process_factory_(process_factory), + data_store_(std::move(data_store)), + metrics_recorder_(metrics_recorder) { + assert(metrics_recorder_); +} + +MultiSession::~MultiSession() { + absl::Status status = Shutdown(); + if (!status.ok()) { + LOG_ERROR("Shutdown streaming from '%s' failed: %s", src_dir_, + status.ToString()); + } +} + +absl::Status MultiSession::Initialize() { + // |data_store_| is not set in production, but it can be overridden for tests. + if (!data_store_) { + std::string cache_path; + ASSIGN_OR_RETURN(cache_path, GetCachePath(src_dir_)); + ASSIGN_OR_RETURN(data_store_, + DiskDataStore::Create(/*depth=*/0, cache_path, + /*create_dirs=*/true), + "Failed to create data store for '%s'", cache_path); + } + + // Find an available local port. + std::unordered_set ports; + ASSIGN_OR_RETURN( + ports, + PortManager::FindAvailableLocalPorts(kAssetStreamPortFirst, + kAssetStreamPortLast, "127.0.0.1", + process_factory_, true), + "Failed to find an available local port in the range [%d, %d]", + kAssetStreamPortFirst, kAssetStreamPortLast); + assert(!ports.empty()); + local_asset_stream_port_ = *ports.begin(); + + assert(!runner_); + runner_ = std::make_unique( + src_dir_, data_store_.get(), process_factory_, cfg_.stats, + absl::Milliseconds(cfg_.file_change_wait_duration_ms), + cfg_.manifest_updater_threads, metrics_recorder_); + RETURN_IF_ERROR(runner_->Initialize( + local_asset_stream_port_, AssetStreamServerType::kGrpc, + [this](uint64_t bc, uint64_t cc, std::string id) { + this->OnContentSent(bc, cc, id); + }), + "Failed to initialize session runner"); + StartHeartBeatCheck(); + return absl::OkStatus(); +} + +absl::Status MultiSession::Shutdown() { + // Stop all sessions. + // TODO: Record error on multi-session end. + metrics_recorder_->RecordEvent(metrics::DeveloperLogEvent(), + metrics::EventType::kMultiSessionEnd); + { + absl::WriterMutexLock lock(&shutdownMu_); + shutdown_ = true; + } + while (!sessions_.empty()) { + std::string instance_id = sessions_.begin()->first; + RETURN_IF_ERROR(StopSession(instance_id), + "Failed to stop session for instance id %s", instance_id); + sessions_.erase(instance_id); + } + + if (runner_) { + RETURN_IF_ERROR(runner_->Shutdown()); + } + + if (heartbeat_watcher_.joinable()) { + heartbeat_watcher_.join(); + } + + return absl::OkStatus(); +} + +absl::Status MultiSession::Status() { + return runner_ ? runner_->Status() : absl::OkStatus(); +} + +absl::Status MultiSession::StartSession(const std::string& instance_id, + const std::string& project_id, + const std::string& organization_id, + const std::string& instance_ip, + uint16_t instance_port) { + absl::MutexLock lock(&sessions_mutex_); + + if (sessions_.find(instance_id) != sessions_.end()) { + return absl::InvalidArgumentError(absl::StrFormat( + "Session for instance id '%s' already exists", instance_id)); + } + + if (!runner_) + return absl::FailedPreconditionError("MultiSession not started"); + + absl::Status runner_status = runner_->Status(); + if (!runner_status.ok()) { + return WrapStatus(runner_status, + "Failed to set up streaming session for '%s'", src_dir_); + } + + auto metrics_recorder = std::make_unique( + metrics_recorder_->GetMetricsService(), + metrics_recorder_->MultiSessionId(), project_id, organization_id); + + auto session = + std::make_unique(instance_id, instance_ip, instance_port, cfg_, + process_factory_, std::move(metrics_recorder)); + RETURN_IF_ERROR(session->Start(local_asset_stream_port_, + kAssetStreamPortFirst, kAssetStreamPortLast)); + + // Wait for the FUSE to receive the intermediate manifest. + RETURN_IF_ERROR(runner_->WaitForManifestAck(instance_id, absl::Seconds(5))); + + sessions_[instance_id] = std::move(session); + return absl::OkStatus(); +} + +absl::Status MultiSession::StopSession(const std::string& instance_id) { + absl::MutexLock lock(&sessions_mutex_); + + if (sessions_.find(instance_id) == sessions_.end()) { + return absl::NotFoundError( + absl::StrFormat("No session for instance id '%s' found", instance_id)); + } + + if (!runner_) + return absl::FailedPreconditionError("MultiSession not started"); + + RETURN_IF_ERROR(sessions_[instance_id]->Stop()); + sessions_.erase(instance_id); + return absl::OkStatus(); +} + +bool MultiSession::HasSessionForInstance(const std::string& instance_id) { + absl::ReaderMutexLock lock(&sessions_mutex_); + return sessions_.find(instance_id) != sessions_.end(); +} + +bool MultiSession::IsSessionHealthy(const std::string& instance_id) { + absl::ReaderMutexLock lock(&sessions_mutex_); + auto iter = sessions_.find(instance_id); + if (iter == sessions_.end()) return false; + Session* session = iter->second.get(); + assert(session); + return session->IsHealthy(); +} + +bool MultiSession::Empty() { + absl::ReaderMutexLock lock(&sessions_mutex_); + return sessions_.empty(); +} + +uint32_t MultiSession::GetSessionCount() { + absl::ReaderMutexLock lock(&sessions_mutex_); + return static_cast(sessions_.size()); +} + +// static +std::string MultiSession::GetCacheDir(std::string dir) { + // Get full path, or else "..\foo" and "C:\foo" are treated differently, even + // if they map to the same directory. + dir = path::GetFullPath(dir); +#if PLATFORM_WINDOWS + // On Windows, casing is ignored. + std::for_each(dir.begin(), dir.end(), [](char& c) { c = tolower(c); }); +#endif + path::EnsureEndsWithPathSeparator(&dir); + dir = path::ToUnix(std::move(dir)); + ContentIdProto id = ContentId::FromDataString(dir); + + // Replace invalid characters by _. + std::for_each(dir.begin(), dir.end(), [](char& c) { + static constexpr char invalid_chars[] = "<>:\"/\\|?*"; + if (strchr(invalid_chars, c)) c = '_'; + }); + + return dir + ContentId::ToHexString(id).substr(0, kDirHashLen); +} + +// static +absl::StatusOr MultiSession::GetCachePath( + const std::string& src_dir, size_t max_len) { + std::string appdata_path; +#if PLATFORM_WINDOWS + RETURN_IF_ERROR( + path::GetKnownFolderPath(path::FolderId::kRoamingAppData, &appdata_path), + "Failed to get roaming appdata path"); +#elif PLATFORM_LINUX + RETURN_IF_ERROR(path::GetEnv("HOME", &appdata_path)); + path::Append(&appdata_path, ".cache"); +#endif + + std::string base_dir = path::Join(appdata_path, "GGP", "asset_streaming"); + std::string cache_dir = GetCacheDir(src_dir); + + size_t total_size = base_dir.size() + 1 + cache_dir.size(); + if (total_size <= max_len) return path::Join(base_dir, cache_dir); + + // Path needs to be shortened. Remove |to_remove| many chars from the + // beginning of |cache_dir|, but keep the hash (last kDirHashLen bytes). + size_t to_remove = total_size - max_len; + assert(cache_dir.size() >= kDirHashLen); + if (to_remove > cache_dir.size() - kDirHashLen) + to_remove = cache_dir.size() - kDirHashLen; + + // Remove UTF8 code points from the beginning. + size_t start = 0; + while (start < to_remove) { + int codepoint_len = Util::Utf8CodePointLen(cache_dir.data() + start); + // For invalid code points (codepoint_len == 0), just eat byte by byte. + start += std::max(codepoint_len, 1); + } + + assert(start + kDirHashLen <= cache_dir.size()); + return path::Join(base_dir, cache_dir.substr(start)); +} + +void MultiSession::RecordMultiSessionEvent(metrics::DeveloperLogEvent event, + metrics::EventType code) { + metrics_recorder_->RecordEvent(std::move(event), code); +} + +void MultiSession::RecordSessionEvent(metrics::DeveloperLogEvent event, + metrics::EventType code, + const std::string& instance_id) { + Session* session = FindSession(instance_id); + if (session) { + session->RecordEvent(std::move(event), code); + } +} + +Session* MultiSession::FindSession(const std::string& instance_id) { + absl::ReaderMutexLock lock(&sessions_mutex_); + auto session_it = sessions_.find(instance_id); + if (session_it == sessions_.end()) { + return nullptr; + } + return session_it->second.get(); +} + +void MultiSession::OnContentSent(size_t byte_count, size_t chunck_count, + std::string instance_id) { + if (instance_id.empty()) { + // |instance_id| is empty only in case when manifest wasn't acknowledged by + // the gamelet yet (ConfigStreamServiceImpl::AckManifestIdReceived was not + // invoked). This means MultiSession::StartSession is still waiting for + // manifest acknowledge and |sessions_mutex_| is currently locked. In this + // case invoking MultiSession::FindSession and waiting for |sessions_mutex_| + // to get unlocked will block the current thread, which is also responsible + // for receiving a call at ConfigStreamServiceImpl::AckManifestIdReceived. + // This causes a deadlock and leads to a DeadlineExceeded error. + LOG_WARNING("Can not record session content for an empty instance_id."); + return; + } + Session* session = FindSession(instance_id); + if (session == nullptr) { + LOG_WARNING("Failed to find active session by instrance id: %s", + instance_id); + return; + } + session->OnContentSent(byte_count, chunck_count); +} + +void MultiSession::StartHeartBeatCheck() { + heartbeat_watcher_ = std::thread([this]() ABSL_LOCKS_EXCLUDED(shutdownMu_) { + auto cond = [this]() { return shutdown_; }; + while (!shutdownMu_.LockWhenWithTimeout(absl::Condition(&cond), + absl::Minutes(5))) { + absl::ReaderMutexLock lock(&sessions_mutex_); + for (auto it = sessions_.begin(); it != sessions_.end(); ++it) { + it->second->RecordHeartBeatIfChanged(); + } + shutdownMu_.Unlock(); + } + shutdownMu_.Unlock(); + }); +} + +} // namespace cdc_ft diff --git a/asset_stream_manager/multi_session.h b/asset_stream_manager/multi_session.h new file mode 100644 index 0000000..a96bf6c --- /dev/null +++ b/asset_stream_manager/multi_session.h @@ -0,0 +1,266 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ASSET_STREAM_MANAGER_MULTI_SESSION_H_ +#define ASSET_STREAM_MANAGER_MULTI_SESSION_H_ + +#include +#include +#include +#include + +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "asset_stream_manager/asset_stream_server.h" +#include "asset_stream_manager/metrics_recorder.h" +#include "asset_stream_manager/session_config.h" +#include "common/stopwatch.h" +#include "data_store/data_store_writer.h" +#include "manifest/file_chunk_map.h" +#include "manifest/manifest_updater.h" + +namespace cdc_ft { + +class ProcessFactory; +class Session; +using ManifestUpdatedCb = std::function; + +// Updates the manifest and runs a file watcher in a background thread. +class MultiSessionRunner { + public: + // |src_dir| is the source directory on the workstation to stream. + // |data_store| can be passed for tests to override the default store used. + // |process_factory| abstracts process creation. + // |enable_stats| shows whether statistics should be derived. + // |wait_duration| is the waiting time for changes in the streamed directory. + // |num_updater_threads| is the thread count for the manifest updater. + // |manifest_updated_cb| is the callback executed when a new manifest is set. + MultiSessionRunner( + std::string src_dir, DataStoreWriter* data_store, + ProcessFactory* process_factory, bool enable_stats, + absl::Duration wait_duration, uint32_t num_updater_threads, + MultiSessionMetricsRecorder const* metrics_recorder, + ManifestUpdatedCb manifest_updated_cb = ManifestUpdatedCb()); + + ~MultiSessionRunner() = default; + + // Starts |server_| of |type| on |port|. + absl::Status Initialize( + int port, AssetStreamServerType type, + ContentSentHandler content_sent = ContentSentHandler()); + + // Stops updating the manifest and |server_|. + absl::Status Shutdown() ABSL_LOCKS_EXCLUDED(mutex_); + + // Waits until a manifest is ready and the gamelet |instance_id| has + // acknowledged the reception of the currently set manifest id. |fuse_timeout| + // is the timeout for waiting for the FUSE manifest ack. The time required to + // generate the manifest is not part of this timeout as this could take a + // longer time for a directory with many files. + absl::Status WaitForManifestAck(const std::string& instance_id, + absl::Duration fuse_timeout); + + absl::Status Status() ABSL_LOCKS_EXCLUDED(mutex_); + + // Returns the current manifest id used by |server_|. + ContentIdProto ManifestId() const; + + private: + // Updates manifest if the content of the watched directory changes and + // distributes it to subscribed gamelets. + void Run(); + + // Record MultiSessionStart event. + void RecordMultiSessionStart(const ManifestUpdater& manifest_updater); + + // Record ManifestUpdate event. + void RecordManifestUpdate(const ManifestUpdater& manifest_updater, + absl::Duration duration, + metrics::UpdateTrigger trigger, + absl::Status status); + + void SetStatus(absl::Status status) ABSL_LOCKS_EXCLUDED(mutex_); + + // Files changed callback called from FileWatcherWin. + void OnFilesChanged() ABSL_LOCKS_EXCLUDED(mutex_); + + // Directory recreated callback called from FileWatcherWin. + void OnDirRecreated() ABSL_LOCKS_EXCLUDED(mutex_); + + // Called during manifest update when the intermediate manifest or the final + // manifest is available. Pushes the manifest to connected FUSEs. + void SetManifest(const ContentIdProto& manifest_id); + + const std::string src_dir_; + DataStoreWriter* const data_store_; + ProcessFactory* const process_factory_; + FileChunkMap file_chunks_; + const absl::Duration wait_duration_; + const uint32_t num_updater_threads_; + const ManifestUpdatedCb manifest_updated_cb_; + std::unique_ptr server_; + + // Modifications (shutdown, file changes). + absl::Mutex mutex_; + bool shutdown_ ABSL_GUARDED_BY(mutex_) = false; + bool files_changed_ ABSL_GUARDED_BY(mutex_) = false; + bool dir_recreated_ ABSL_GUARDED_BY(mutex_) = false; + bool manifest_set_ ABSL_GUARDED_BY(mutex_) = false; + Stopwatch files_changed_timer_ ABSL_GUARDED_BY(mutex_); + absl::Status status_ ABSL_GUARDED_BY(mutex_); + + // Background thread that watches files and updates the manifest. + std::unique_ptr thread_; + + MultiSessionMetricsRecorder const* metrics_recorder_; +}; + +// Manages an asset streaming session from a fixed directory on the workstation +// to an arbitrary number of gamelets. +class MultiSession { + public: + // Maximum length of cache path. We must be able to write content hashes into + // this path: + // \01234567890123456789 = 260 characters. + static constexpr size_t kDefaultMaxCachePathLen = + 260 - 1 - ContentId::kHashSize * 2 - 1; + + // Length of the hash appended to the cache directory, exposed for testing. + static constexpr size_t kDirHashLen = 8; + + // |src_dir| is the source directory on the workstation to stream. + // |cfg| contains generic configuration parameters for each session. + // |process_factory| abstracts process creation. + // |data_store| can be passed for tests to override the default store used. + // By default, the class uses a DiskDataStore that writes to + // %APPDATA%\GGP\asset_streaming| on Windows. + MultiSession(std::string src_dir, SessionConfig cfg, + ProcessFactory* process_factory, + MultiSessionMetricsRecorder const* metrics_recorder, + std::unique_ptr data_store = nullptr); + ~MultiSession(); + + // Initializes the data store if not overridden in the constructor and starts + // a background thread for updating the manifest and watching file changes. + // Does not wait for the initial manifest update to finish. Use IsRunning() + // to determine whether it is finished. + // Not thread-safe. + absl::Status Initialize(); + + // Stops all sessions and shuts down the server. + // Not thread-safe. + absl::Status Shutdown() ABSL_LOCKS_EXCLUDED(shutdownMu_); + + // Returns the |src_dir| streaming directory passed to the constructor. + const std::string& src_dir() const { return src_dir_; } + + // Returns the status of the background thread. + // Not thread-safe. + absl::Status Status(); + + // Starts a new streaming session to the instance with given |instance_id| and + // waits until the FUSE has received the initial manifest id. + // Returns an error if a session for that instance already exists. + // |instance_id| is the instance id of the target remote instance. + // |project_id| is id of the project that contains the instance. + // |organization_id| is id of the organization that contains the instance. + // |instance_ip| is the IP address of the instance. + // |instance_port| is the SSH port for connecting to the remote instance. + // Thread-safe. + absl::Status StartSession(const std::string& instance_id, + const std::string& project_id, + const std::string& organization_id, + const std::string& instance_ip, + uint16_t instance_port) + ABSL_LOCKS_EXCLUDED(sessions_mutex_); + + // Starts a new streaming session to the gamelet with given |instance_id|. + // Returns a NotFound error if a session for that instance does not exists. + // Thread-safe. + absl::Status StopSession(const std::string& instance_id) + ABSL_LOCKS_EXCLUDED(sessions_mutex_); + + // Returns true if there is an existing session for |instance_id|. + bool HasSessionForInstance(const std::string& instance_id) + ABSL_LOCKS_EXCLUDED(sessions_mutex_); + + // Returns true if the FUSE process is up and running for an existing session + // with ID |instance_id|. + bool IsSessionHealthy(const std::string& instance_id) + ABSL_LOCKS_EXCLUDED(sessions_mutex_); + + // Returns true if the MultiSession does not have any active sessions. + bool Empty() ABSL_LOCKS_EXCLUDED(sessions_mutex_); + + // Returns the number of avtive sessions. + uint32_t GetSessionCount() ABSL_LOCKS_EXCLUDED(sessions_mutex_); + + // For a given source directory |dir|, e.g. "C:\path\to\game", returns a + // sanitized version of |dir| plus a hash of |dir|, e.g. + // "c__path_to_game_abcdef01". + static std::string GetCacheDir(std::string dir); + + // Returns the directory where manifest chunks are cached, e.g. + // "%APPDATA%\GGP\asset_streaming\c__path_to_game_abcdef01" for + // "C:\path\to\game". + // The returned path is shortened to |max_len| by removing UTF8 code points + // from the beginning of the actual cache directory (c__path...) if necessary. + static absl::StatusOr GetCachePath( + const std::string& src_dir, size_t max_len = kDefaultMaxCachePathLen); + + // Record an event associated with the multi-session. + void RecordMultiSessionEvent(metrics::DeveloperLogEvent event, + metrics::EventType code); + + // Record an event for a session associated with the |instance|. + void RecordSessionEvent(metrics::DeveloperLogEvent event, + metrics::EventType code, + const std::string& instance_id); + + private: + std::string src_dir_; + SessionConfig cfg_; + ProcessFactory* const process_factory_; + std::unique_ptr data_store_; + std::thread heartbeat_watcher_; + absl::Mutex shutdownMu_; + bool shutdown_ ABSL_GUARDED_BY(shutdownMu_) = false; + + // Background thread for watching file changes and updating the manifest. + std::unique_ptr runner_; + + // Local forwarding port for the asset stream service. + int local_asset_stream_port_ = 0; + + // Maps instance id to sessions. + std::unordered_map> sessions_ + ABSL_GUARDED_BY(sessions_mutex_); + absl::Mutex sessions_mutex_; + + MultiSessionMetricsRecorder const* metrics_recorder_; + + Session* FindSession(const std::string& instance_id) + ABSL_LOCKS_EXCLUDED(sessions_mutex_); + + void OnContentSent(size_t byte_count, size_t chunck_count, + std::string instance_id); + + void StartHeartBeatCheck(); +}; + +} // namespace cdc_ft + +#endif // ASSET_STREAM_MANAGER_MULTI_SESSION_H_ diff --git a/asset_stream_manager/multi_session_test.cc b/asset_stream_manager/multi_session_test.cc new file mode 100644 index 0000000..4f18bf4 --- /dev/null +++ b/asset_stream_manager/multi_session_test.cc @@ -0,0 +1,488 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "asset_stream_manager/multi_session.h" + +#include +#include +#include +#include + +#include "absl/strings/match.h" +#include "asset_stream_manager/testing_asset_stream_server.h" +#include "common/path.h" +#include "common/platform.h" +#include "common/process.h" +#include "common/status_test_macros.h" +#include "common/test_main.h" +#include "gtest/gtest.h" +#include "manifest/manifest_test_base.h" + +namespace cdc_ft { +namespace { +constexpr char kTestDir[] = "multisession_test_dir"; +constexpr char kData[] = {10, 20, 30, 40, 50, 60, 70, 80, 90}; +constexpr size_t kDataSize = sizeof(kData); +constexpr char kInstance[] = "test_instance"; +constexpr int kPort = 44444; +constexpr absl::Duration kTimeout = absl::Milliseconds(5); +constexpr char kVeryLongPath[] = + "C:\\this\\is\\some\\really\\really\\really\\really\\really\\really\\really" + "\\really\\really\\really\\really\\really\\really\\really\\really\\really" + "\\really\\really\\really\\really\\really\\really\\really\\really\\really" + "\\really\\really\\really\\really\\really\\really\\really\\really\\really" + "\\really\\long\\path"; +constexpr uint32_t kNumThreads = 1; + +struct MetricsRecord { + MetricsRecord(metrics::DeveloperLogEvent evt, metrics::EventType code) + : evt(std::move(evt)), code(code) {} + metrics::DeveloperLogEvent evt; + metrics::EventType code; +}; + +class MetricsServiceForTest : public MultiSessionMetricsRecorder { + public: + MetricsServiceForTest() : MultiSessionMetricsRecorder(nullptr) {} + + virtual ~MetricsServiceForTest() = default; + + void RecordEvent(metrics::DeveloperLogEvent event, + metrics::EventType code) const override + ABSL_LOCKS_EXCLUDED(mutex_) { + absl::MutexLock lock(&mutex_); + metrics_records_.push_back(MetricsRecord(std::move(event), code)); + } + + std::vector GetEventsAndClear(metrics::EventType type) + ABSL_LOCKS_EXCLUDED(mutex_) { + std::vector events; + std::vector remaining; + absl::MutexLock lock(&mutex_); + for (size_t i = 0; i < metrics_records_.size(); ++i) { + if (metrics_records_[i].code == type) { + events.push_back(std::move(metrics_records_[i])); + } else { + remaining.push_back(std::move(metrics_records_[i])); + } + } + metrics_records_ = std::move(remaining); + return events; + } + + private: + mutable absl::Mutex mutex_; + mutable std::vector metrics_records_; +}; + +class MultiSessionTest : public ManifestTestBase { + public: + MultiSessionTest() : ManifestTestBase(GetTestDataDir("multi_session")) { + Log::Initialize(std::make_unique(LogLevel::kInfo)); + } + ~MultiSessionTest() { Log::Shutdown(); } + + void SetUp() override { + // Use a temporary directory to be able to test empty directories (git does + // not index empty directories) and creation/deletion of files. + EXPECT_OK(path::RemoveDirRec(test_dir_path_)); + EXPECT_OK(path::CreateDirRec(test_dir_path_)); + metrics_service_ = new MetricsServiceForTest(); + } + + void TearDown() override { + EXPECT_OK(path::RemoveDirRec(test_dir_path_)); + delete metrics_service_; + } + + protected: + // Callback if the manifest was updated == a new manifest is set. + void OnManifestUpdated() ABSL_LOCKS_EXCLUDED(mutex_) { + absl::MutexLock lock(&mutex_); + ++num_manifest_updates_; + } + + // Waits until the manifest is fully computed: the manifest id is not changed + // anymore. + bool WaitForManifestUpdated(uint32_t exp_num_manifest_updates, + absl::Duration timeout = absl::Seconds(5)) { + absl::MutexLock lock(&mutex_); + auto cond = [&]() { + return exp_num_manifest_updates == num_manifest_updates_; + }; + mutex_.AwaitWithTimeout(absl::Condition(&cond), timeout); + return exp_num_manifest_updates == num_manifest_updates_; + } + + void CheckMultiSessionStartNotRecorded() { + std::vector events = metrics_service_->GetEventsAndClear( + metrics::EventType::kMultiSessionStart); + EXPECT_EQ(events.size(), 0); + } + + void CheckMultiSessionStartRecorded(uint64_t byte_count, uint64_t chunk_count, + uint32_t file_count) { + std::vector events = metrics_service_->GetEventsAndClear( + metrics::EventType::kMultiSessionStart); + ASSERT_EQ(events.size(), 1); + metrics::MultiSessionStartData* data = + events[0].evt.as_manager_data->multi_session_start_data.get(); + EXPECT_EQ(data->byte_count, byte_count); + EXPECT_EQ(data->chunk_count, chunk_count); + EXPECT_EQ(data->file_count, file_count); + EXPECT_EQ(data->min_chunk_size, 128 << 10); + EXPECT_EQ(data->avg_chunk_size, 256 << 10); + EXPECT_EQ(data->max_chunk_size, 1024 << 10); + } + + metrics::ManifestUpdateData GetManifestUpdateData( + metrics::UpdateTrigger trigger, absl::StatusCode status, + size_t total_assets_added_or_updated, size_t total_assets_deleted, + size_t total_chunks, size_t total_files_added_or_updated, + size_t total_files_failed, size_t total_processed_bytes) { + metrics::ManifestUpdateData manifest_upd; + manifest_upd.trigger = trigger; + manifest_upd.status = status; + manifest_upd.total_assets_added_or_updated = total_assets_added_or_updated; + manifest_upd.total_assets_deleted = total_assets_deleted; + manifest_upd.total_chunks = total_chunks; + manifest_upd.total_files_added_or_updated = total_files_added_or_updated; + manifest_upd.total_files_failed = total_files_failed; + manifest_upd.total_processed_bytes = total_processed_bytes; + return manifest_upd; + } + + void CheckManifestUpdateRecorded( + std::vector manifests) { + std::vector events = metrics_service_->GetEventsAndClear( + metrics::EventType::kManifestUpdated); + ASSERT_EQ(events.size(), manifests.size()); + for (size_t i = 0; i < manifests.size(); ++i) { + metrics::ManifestUpdateData* data = + events[i].evt.as_manager_data->manifest_update_data.get(); + EXPECT_LT(data->local_duration_ms, 60000ull); + manifests[i].local_duration_ms = data->local_duration_ms; + EXPECT_EQ(*data, manifests[i]); + } + } + + const std::string test_dir_path_ = path::Join(path::GetTempDir(), kTestDir); + WinProcessFactory process_factory_; + absl::Mutex mutex_; + uint32_t num_manifest_updates_ ABSL_GUARDED_BY(mutex_) = 0; + MetricsServiceForTest* metrics_service_; +}; + +constexpr char kCacheDir[] = "c__path_to_dir_ee54bbbc"; + +TEST_F(MultiSessionTest, GetCacheDir_IgnoresTrailingPathSeparators) { + EXPECT_EQ(MultiSession::GetCacheDir("C:\\path\\to\\dir"), kCacheDir); + EXPECT_EQ(MultiSession::GetCacheDir("C:\\path\\to\\dir\\"), kCacheDir); +} + +TEST_F(MultiSessionTest, GetCacheDir_WorksWithForwardSlashes) { + EXPECT_EQ(MultiSession::GetCacheDir("C:/path/to/dir"), kCacheDir); + EXPECT_EQ(MultiSession::GetCacheDir("C:/path/to/dir/"), kCacheDir); +} + +TEST_F(MultiSessionTest, GetCacheDir_ReplacesInvalidCharacters) { + EXPECT_EQ(MultiSession::GetCacheDir("C:\\<>:\"/\\|?*"), + "c___________ae188efd"); +} + +TEST_F(MultiSessionTest, GetCacheDir_UsesFullPath) { + EXPECT_EQ(MultiSession::GetCacheDir("foo/bar"), + MultiSession::GetCacheDir(path::GetFullPath("foo/bar"))); +} + +#if PLATFORM_WINDOWS +TEST_F(MultiSessionTest, GetCacheDir_IgnoresCaseOnWindows) { + EXPECT_EQ(MultiSession::GetCacheDir("C:\\PATH\\TO\\DIR"), kCacheDir); +} +#endif + +TEST_F(MultiSessionTest, GetCachePath_ContainsExpectedParts) { + absl::StatusOr cache_path = + MultiSession::GetCachePath("C:\\path\\to\\dir"); + ASSERT_OK(cache_path); + EXPECT_TRUE(absl::EndsWith(*cache_path, kCacheDir)) << *cache_path; + EXPECT_TRUE( + absl::StrContains(*cache_path, path::Join("GGP", "asset_streaming"))) + << *cache_path; +} + +TEST_F(MultiSessionTest, GetCachePath_ShortensLongPaths) { + EXPECT_GT(strlen(kVeryLongPath), MultiSession::kDefaultMaxCachePathLen); + std::string cache_dir = MultiSession::GetCacheDir(kVeryLongPath); + absl::StatusOr cache_path = + MultiSession::GetCachePath(kVeryLongPath); + ASSERT_OK(cache_path); + EXPECT_EQ(cache_path->size(), MultiSession::kDefaultMaxCachePathLen); + EXPECT_TRUE( + absl::StrContains(*cache_path, path::Join("GGP", "asset_streaming"))) + << *cache_path; + // The hash in the end of the path is kept and not shortened. + EXPECT_EQ(cache_dir.substr(cache_dir.size() - MultiSession::kDirHashLen), + cache_path->substr(cache_path->size() - MultiSession::kDirHashLen)); +} + +TEST_F(MultiSessionTest, GetCachePath_DoesNotSplitUtfCodePoints) { + // Find out the length of the %APPDATA%\GGP\asset_streaming\" + hash part. + absl::StatusOr cache_path = MultiSession::GetCachePath(""); + ASSERT_OK(cache_path); + size_t base_len = cache_path->size(); + + // Path has are two 2-byte characters. They should not be split in the middle. + cache_path = MultiSession::GetCachePath(u8"\u0200\u0200", base_len); + ASSERT_OK(cache_path); + EXPECT_EQ(cache_path->size(), base_len); + + // %APPDATA%\GGP\asset_streaming\abcdefg + cache_path = MultiSession::GetCachePath(u8"\u0200\u0200", base_len + 1); + ASSERT_OK(cache_path); + EXPECT_EQ(cache_path->size(), base_len); + + // %APPDATA%\GGP\asset_streaming\\u0200abcdefg + cache_path = MultiSession::GetCachePath(u8"\u0200\u0200", base_len + 2); + ASSERT_OK(cache_path); + EXPECT_EQ(cache_path->size(), base_len + 2); + + // %APPDATA%\GGP\asset_streaming\\u0200abcdefg + cache_path = MultiSession::GetCachePath(u8"\u0200\u0200", base_len + 3); + ASSERT_OK(cache_path); + EXPECT_EQ(cache_path->size(), base_len + 2); +} + +// Calculate manifest for an empty directory. +TEST_F(MultiSessionTest, MultiSessionRunnerOnEmpty) { + cfg_.src_dir = test_dir_path_; + MultiSessionRunner runner(cfg_.src_dir, &data_store_, &process_factory_, + false /*enable_stats*/, kTimeout, kNumThreads, + metrics_service_, + [this]() { OnManifestUpdated(); }); + EXPECT_OK(runner.Initialize(kPort, AssetStreamServerType::kTest)); + EXPECT_OK(runner.WaitForManifestAck(kInstance, kTimeout)); + // The first update is always the empty manifest, wait for the second one. + ASSERT_TRUE(WaitForManifestUpdated(2)); + ASSERT_NO_FATAL_FAILURE(ExpectManifestEquals({}, runner.ManifestId())); + CheckMultiSessionStartRecorded(0, 0, 0); + CheckManifestUpdateRecorded(std::vector{ + GetManifestUpdateData(metrics::UpdateTrigger::kInitUpdateAll, + absl::StatusCode::kOk, 0, 0, 0, 0, 0, 0)}); + + EXPECT_OK(runner.Status()); + EXPECT_OK(runner.Shutdown()); +} + +// Calculate manifest for a non-empty directory. +TEST_F(MultiSessionTest, MultiSessionRunnerNonEmptySucceeds) { + // Contains a.txt, subdir/b.txt, subdir/c.txt, subdir/d.txt. + cfg_.src_dir = path::Join(base_dir_, "non_empty"); + MultiSessionRunner runner(cfg_.src_dir, &data_store_, &process_factory_, + false /*enable_stats*/, kTimeout, kNumThreads, + metrics_service_, + [this]() { OnManifestUpdated(); }); + EXPECT_OK(runner.Initialize(kPort, AssetStreamServerType::kTest)); + EXPECT_OK(runner.WaitForManifestAck(kInstance, kTimeout)); + // The first update is always the empty manifest, wait for the second one. + ASSERT_TRUE(WaitForManifestUpdated(2)); + CheckMultiSessionStartRecorded(46, 4, 4); + ASSERT_NO_FATAL_FAILURE(ExpectManifestEquals( + {"a.txt", "subdir", "subdir/b.txt", "subdir/c.txt", "subdir/d.txt"}, + runner.ManifestId())); + EXPECT_OK(runner.Status()); + EXPECT_OK(runner.Shutdown()); +} + +// Update manifest on adding a file. +TEST_F(MultiSessionTest, MultiSessionRunnerAddFileSucceeds) { + cfg_.src_dir = test_dir_path_; + MultiSessionRunner runner(cfg_.src_dir, &data_store_, &process_factory_, + false /*enable_stats*/, kTimeout, kNumThreads, + metrics_service_, + [this]() { OnManifestUpdated(); }); + EXPECT_OK(runner.Initialize(kPort, AssetStreamServerType::kTest)); + EXPECT_OK(runner.WaitForManifestAck(kInstance, kTimeout)); + // The first update is always the empty manifest, wait for the second one. + ASSERT_TRUE(WaitForManifestUpdated(2)); + ASSERT_OK(runner.Status()); + CheckMultiSessionStartRecorded(0, 0, 0); + ASSERT_NO_FATAL_FAILURE(ExpectManifestEquals({}, runner.ManifestId())); + CheckManifestUpdateRecorded(std::vector{ + GetManifestUpdateData(metrics::UpdateTrigger::kInitUpdateAll, + absl::StatusCode::kOk, 0, 0, 0, 0, 0, 0)}); + + const std::string file_path = path::Join(test_dir_path_, "file.txt"); + EXPECT_OK(path::WriteFile(file_path, kData, kDataSize)); + // 1 file was added = incremented exp_num_manifest_updates. + ASSERT_TRUE(WaitForManifestUpdated(3)); + ASSERT_NO_FATAL_FAILURE( + ExpectManifestEquals({"file.txt"}, runner.ManifestId())); + CheckMultiSessionStartNotRecorded(); + CheckManifestUpdateRecorded(std::vector{ + GetManifestUpdateData(metrics::UpdateTrigger::kRegularUpdate, + absl::StatusCode::kOk, 1, 0, 1, 1, 0, kDataSize)}); + + EXPECT_OK(runner.Status()); + EXPECT_OK(runner.Shutdown()); +} + +// Fail if the directory does not exist as the watching could not be started. +// At this moment we expect that the directory exists. +TEST_F(MultiSessionTest, MultiSessionRunnerNoDirFails) { + cfg_.src_dir = path::Join(base_dir_, "non_existing"); + MultiSessionRunner runner(cfg_.src_dir, &data_store_, &process_factory_, + false /*enable_stats*/, kTimeout, kNumThreads, + metrics_service_, + [this]() { OnManifestUpdated(); }); + EXPECT_OK(runner.Initialize(kPort, AssetStreamServerType::kTest)); + + ASSERT_FALSE( + absl::IsNotFound(runner.WaitForManifestAck(kInstance, kTimeout))); + ASSERT_FALSE(WaitForManifestUpdated(1, absl::Milliseconds(10))); + CheckMultiSessionStartNotRecorded(); + CheckManifestUpdateRecorded(std::vector{}); + EXPECT_NOT_OK(runner.Shutdown()); + EXPECT_TRUE(absl::StrContains(runner.Status().ToString(), + "Could not start watching")); +} + +// Do not break if the directory is recreated. +TEST_F(MultiSessionTest, MultiSessionRunnerDirRecreatedSucceeds) { + cfg_.src_dir = test_dir_path_; + EXPECT_OK(path::WriteFile(path::Join(test_dir_path_, "file.txt"), kData, + kDataSize)); + + MultiSessionRunner runner(cfg_.src_dir, &data_store_, &process_factory_, + false /*enable_stats*/, kTimeout, kNumThreads, + metrics_service_, + [this]() { OnManifestUpdated(); }); + EXPECT_OK(runner.Initialize(kPort, AssetStreamServerType::kTest)); + + { + SCOPED_TRACE("Originally, only the streamed directory contains file.txt."); + EXPECT_OK(runner.WaitForManifestAck(kInstance, kTimeout)); + // The first update is always the empty manifest, wait for the second one. + ASSERT_TRUE(WaitForManifestUpdated(2)); + CheckMultiSessionStartRecorded((uint64_t)kDataSize, 1, 1); + ASSERT_NO_FATAL_FAILURE( + ExpectManifestEquals({"file.txt"}, runner.ManifestId())); + CheckManifestUpdateRecorded( + std::vector{GetManifestUpdateData( + metrics::UpdateTrigger::kInitUpdateAll, absl::StatusCode::kOk, 1, 0, + 1, 1, 0, kDataSize)}); + } + + { + SCOPED_TRACE( + "Remove the streamed directory, the manifest should become empty."); + EXPECT_OK(path::RemoveDirRec(test_dir_path_)); + ASSERT_TRUE(WaitForManifestUpdated(3)); + ASSERT_NO_FATAL_FAILURE(ExpectManifestEquals({}, runner.ManifestId())); + CheckManifestUpdateRecorded( + std::vector{GetManifestUpdateData( + metrics::UpdateTrigger::kRunningUpdateAll, + absl::StatusCode::kNotFound, 1, 0, 1, 1, 0, kDataSize)}); + } + + { + SCOPED_TRACE( + "Create the watched directory -> an empty manifest should be " + "streamed."); + EXPECT_OK(path::CreateDirRec(test_dir_path_)); + EXPECT_TRUE(WaitForManifestUpdated(4)); + ASSERT_NO_FATAL_FAILURE(ExpectManifestEquals({}, runner.ManifestId())); + CheckManifestUpdateRecorded(std::vector{ + GetManifestUpdateData(metrics::UpdateTrigger::kRunningUpdateAll, + absl::StatusCode::kOk, 0, 0, 0, 0, 0, 0)}); + } + + { + SCOPED_TRACE("Create 'new_file.txt' -> new manifest should be created."); + EXPECT_OK(path::WriteFile(path::Join(test_dir_path_, "new_file.txt"), kData, + kDataSize)); + ASSERT_TRUE(WaitForManifestUpdated(5)); + ASSERT_NO_FATAL_FAILURE( + ExpectManifestEquals({"new_file.txt"}, runner.ManifestId())); + CheckManifestUpdateRecorded( + std::vector{GetManifestUpdateData( + metrics::UpdateTrigger::kRegularUpdate, absl::StatusCode::kOk, 1, 0, + 1, 1, 0, kDataSize)}); + CheckMultiSessionStartNotRecorded(); + } + + EXPECT_OK(runner.Status()); + EXPECT_OK(runner.Shutdown()); +} + +// Fail if the streamed source is a file. +TEST_F(MultiSessionTest, MultiSessionRunnerFileAsStreamedDirFails) { + cfg_.src_dir = path::Join(test_dir_path_, "file.txt"); + EXPECT_OK(path::WriteFile(cfg_.src_dir, kData, kDataSize)); + + MultiSessionRunner runner(cfg_.src_dir, &data_store_, &process_factory_, + false /*enable_stats*/, kTimeout, kNumThreads, + metrics_service_, + [this]() { OnManifestUpdated(); }); + EXPECT_OK(runner.Initialize(kPort, AssetStreamServerType::kTest)); + ASSERT_FALSE(WaitForManifestUpdated(1, absl::Milliseconds(10))); + CheckMultiSessionStartNotRecorded(); + CheckManifestUpdateRecorded(std::vector{}); + EXPECT_NOT_OK(runner.Shutdown()); + EXPECT_TRUE(absl::StrContains(runner.Status().ToString(), + "Failed to update manifest")) + << runner.Status().ToString(); +} + +// Stream an empty manifest if the streamed directory was re-created as a file. +TEST_F(MultiSessionTest, + MultiSessionRunnerDirRecreatedAsFileSucceedsWithEmptyManifest) { + cfg_.src_dir = path::Join(test_dir_path_, "file"); + EXPECT_OK(path::CreateDirRec(cfg_.src_dir)); + + MultiSessionRunner runner(cfg_.src_dir, &data_store_, &process_factory_, + false /*enable_stats*/, kTimeout, kNumThreads, + metrics_service_, + [this]() { OnManifestUpdated(); }); + EXPECT_OK(runner.Initialize(kPort, AssetStreamServerType::kTest)); + ASSERT_TRUE(WaitForManifestUpdated(2)); + CheckMultiSessionStartRecorded(0, 0, 0); + CheckManifestUpdateRecorded(std::vector{ + GetManifestUpdateData(metrics::UpdateTrigger::kInitUpdateAll, + absl::StatusCode::kOk, 0, 0, 0, 0, 0, 0)}); + ASSERT_NO_FATAL_FAILURE(ExpectManifestEquals({}, runner.ManifestId())); + + // Remove the streamed directory, the manifest should become empty. + EXPECT_OK(path::RemoveDirRec(cfg_.src_dir)); + ASSERT_TRUE(WaitForManifestUpdated(3)); + ASSERT_NO_FATAL_FAILURE(ExpectManifestEquals({}, runner.ManifestId())); + CheckManifestUpdateRecorded(std::vector{ + GetManifestUpdateData(metrics::UpdateTrigger::kRunningUpdateAll, + absl::StatusCode::kNotFound, 0, 0, 0, 0, 0, 0)}); + + EXPECT_OK(path::WriteFile(cfg_.src_dir, kData, kDataSize)); + EXPECT_TRUE(WaitForManifestUpdated(4)); + ASSERT_NO_FATAL_FAILURE(ExpectManifestEquals({}, runner.ManifestId())); + CheckManifestUpdateRecorded( + std::vector{GetManifestUpdateData( + metrics::UpdateTrigger::kRunningUpdateAll, + absl::StatusCode::kFailedPrecondition, 0, 0, 0, 0, 0, 0)}); + CheckMultiSessionStartNotRecorded(); + + EXPECT_OK(runner.Status()); + EXPECT_OK(runner.Shutdown()); +} + +} // namespace +} // namespace cdc_ft diff --git a/asset_stream_manager/session.cc b/asset_stream_manager/session.cc new file mode 100644 index 0000000..9666aae --- /dev/null +++ b/asset_stream_manager/session.cc @@ -0,0 +1,131 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "asset_stream_manager/session.h" + +#include "asset_stream_manager/cdc_fuse_manager.h" +#include "common/log.h" +#include "common/port_manager.h" +#include "common/status.h" +#include "common/status_macros.h" +#include "metrics/enums.h" +#include "metrics/messages.h" + +namespace cdc_ft { +namespace { + +// Timeout for initial gamelet connection. +constexpr double kInstanceConnectionTimeoutSec = 10.0f; + +metrics::DeveloperLogEvent GetEventWithHeartBeatData(size_t bytes, + size_t chunks) { + metrics::DeveloperLogEvent evt; + evt.as_manager_data = std::make_unique(); + evt.as_manager_data->session_data = std::make_unique(); + evt.as_manager_data->session_data->byte_count = bytes; + evt.as_manager_data->session_data->chunk_count = chunks; + return std::move(evt); +} + +} // namespace + +Session::Session(std::string instance_id, std::string instance_ip, + uint16_t instance_port, SessionConfig cfg, + ProcessFactory* process_factory, + std::unique_ptr metrics_recorder) + : instance_id_(std::move(instance_id)), + cfg_(std::move(cfg)), + process_factory_(process_factory), + remote_util_(cfg_.verbosity, cfg_.quiet, process_factory, + /*forward_output_to_logging=*/true), + metrics_recorder_(std::move(metrics_recorder)) { + assert(metrics_recorder_); + remote_util_.SetIpAndPort(instance_ip, instance_port); +} + +Session::~Session() { + absl::Status status = Stop(); + if (!status.ok()) { + LOG_ERROR("Failed to stop session for instance '%s': %s", instance_id_, + status.ToString()); + } +} + +absl::Status Session::Start(int local_port, int first_remote_port, + int last_remote_port) { + // Find an available remote port. + std::unordered_set ports; + ASSIGN_OR_RETURN( + ports, + PortManager::FindAvailableRemotePorts( + first_remote_port, last_remote_port, "127.0.0.1", process_factory_, + &remote_util_, kInstanceConnectionTimeoutSec, true), + "Failed to find an available remote port in the range [%d, %d]", + first_remote_port, last_remote_port); + assert(!ports.empty()); + int remote_port = *ports.begin(); + + assert(!fuse_); + fuse_ = std::make_unique(instance_id_, process_factory_, + &remote_util_); + RETURN_IF_ERROR( + fuse_->Start(local_port, remote_port, cfg_.verbosity, cfg_.fuse_debug, + cfg_.fuse_singlethreaded, cfg_.stats, cfg_.fuse_check, + cfg_.fuse_cache_capacity, cfg_.fuse_cleanup_timeout_sec, + cfg_.fuse_access_idle_timeout_sec), + "Failed to start instance component"); + return absl::OkStatus(); +} + +absl::Status Session::Stop() { + absl::ReaderMutexLock lock(&transferred_data_mu_); + // TODO: Record error on session end. + metrics_recorder_->RecordEvent( + GetEventWithHeartBeatData(transferred_bytes_, transferred_chunks_), + metrics::EventType::kSessionEnd); + if (fuse_) { + RETURN_IF_ERROR(fuse_->Stop()); + fuse_.reset(); + } + + return absl::OkStatus(); +} + +bool Session::IsHealthy() { return fuse_->IsHealthy(); } + +void Session::RecordEvent(metrics::DeveloperLogEvent event, + metrics::EventType code) const { + metrics_recorder_->RecordEvent(std::move(event), code); +} + +void Session::OnContentSent(size_t bytes, size_t chunks) { + absl::WriterMutexLock lock(&transferred_data_mu_); + transferred_bytes_ += bytes; + transferred_chunks_ += chunks; +} + +void Session::RecordHeartBeatIfChanged() { + absl::ReaderMutexLock lock(&transferred_data_mu_); + if (transferred_bytes_ == last_read_bytes_ && + transferred_chunks_ == last_read_chunks_) { + return; + } + last_read_bytes_ = transferred_bytes_; + last_read_chunks_ = transferred_chunks_; + metrics_recorder_->RecordEvent( + GetEventWithHeartBeatData(last_read_bytes_, last_read_chunks_), + metrics::EventType::kSessionHeartBeat); +} + +} // namespace cdc_ft diff --git a/asset_stream_manager/session.h b/asset_stream_manager/session.h new file mode 100644 index 0000000..f6d57dc --- /dev/null +++ b/asset_stream_manager/session.h @@ -0,0 +1,90 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ASSET_STREAM_MANAGER_SESSION_H_ +#define ASSET_STREAM_MANAGER_SESSION_H_ + +#include +#include +#include + +#include "absl/status/status.h" +#include "asset_stream_manager/metrics_recorder.h" +#include "asset_stream_manager/session_config.h" +#include "common/remote_util.h" + +namespace cdc_ft { + +class CdcFuseManager; +class ProcessFactory; +class Process; + +// Manages the connection of a workstation to a single gamelet. +class Session { + public: + // |instance_id| is a unique id for the remote instance. + // |instance_ip| is the IP address of the remote instance. + // |instance_port| is the SSH tunnel port for connecting to the instance. + // |cfg| contains generic configuration parameters for the session. + // |process_factory| abstracts process creation. + Session(std::string instance_id, std::string instance_ip, + uint16_t instance_port, SessionConfig cfg, + ProcessFactory* process_factory, + std::unique_ptr metrics_recorder); + ~Session(); + + // Starts the CDC FUSE on the instance with established port forwarding. + // |local_port| is the local reverse forwarding port to use. + // [|first_remote_port|, |last_remote_port|] are the allowed remote ports. + absl::Status Start(int local_port, int first_remote_port, + int last_remote_port); + + // Shuts down the connection to the instance. + absl::Status Stop() ABSL_LOCKS_EXCLUDED(transferred_data_mu_); + + // Returns true if the FUSE process is running. + bool IsHealthy(); + + // Record an event for the session. + void RecordEvent(metrics::DeveloperLogEvent event, + metrics::EventType code) const; + + // Is called when content was sent during the session. + void OnContentSent(size_t bytes, size_t chunks) + ABSL_LOCKS_EXCLUDED(transferred_data_mu_); + + // Records heart beat data if it has changed since last record. + void RecordHeartBeatIfChanged() ABSL_LOCKS_EXCLUDED(transferred_data_mu_); + + private: + const std::string instance_id_; + const SessionConfig cfg_; + ProcessFactory* const process_factory_; + + RemoteUtil remote_util_; + std::unique_ptr fuse_; + std::unique_ptr metrics_recorder_; + + absl::Mutex transferred_data_mu_; + uint64_t transferred_bytes_ ABSL_GUARDED_BY(transferred_data_mu_) = 0; + uint64_t transferred_chunks_ ABSL_GUARDED_BY(transferred_data_mu_) = 0; + uint64_t last_read_bytes_ = 0; + uint64_t last_read_chunks_ = 0; +}; + +} // namespace cdc_ft + +#endif // ASSET_STREAM_MANAGER_SESSION_H_ diff --git a/asset_stream_manager/session_config.h b/asset_stream_manager/session_config.h new file mode 100644 index 0000000..19bb357 --- /dev/null +++ b/asset_stream_manager/session_config.h @@ -0,0 +1,63 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ASSET_STREAM_MANAGER_SESSION_CONFIG_H_ +#define ASSET_STREAM_MANAGER_SESSION_CONFIG_H_ + +#include + +namespace cdc_ft { + +// The values set in this config do not necessarily denote the default values. +// For the defaults, see the corresponding flag values. +struct SessionConfig { + // General log verbosity. + int verbosity = 0; + + // Silence logs from process execution. + bool quiet = false; + + // Print detailed streaming stats. + bool stats = false; + + // Whether to run FUSE in debug mode. + bool fuse_debug = false; + + // Whether to run FUSE in single-threaded mode. + bool fuse_singlethreaded = false; + + // Whether to run FUSE consistency check. + bool fuse_check = false; + + // Cache capacity with a suffix. + uint64_t fuse_cache_capacity = 0; + + // Cleanup timeout in seconds. + uint32_t fuse_cleanup_timeout_sec = 0; + + // Access idling timeout in seconds. + uint32_t fuse_access_idle_timeout_sec = 0; + + // Number of threads used in the manifest updater to compute chunks/hashes. + uint32_t manifest_updater_threads = 0; + + // Time to wait until running a manifest update after detecting a file change. + uint32_t file_change_wait_duration_ms = 0; +}; + +} // namespace cdc_ft + +#endif // ASSET_STREAM_MANAGER_SESSION_CONFIG_H_ diff --git a/asset_stream_manager/session_management_server.cc b/asset_stream_manager/session_management_server.cc new file mode 100644 index 0000000..33e8a56 --- /dev/null +++ b/asset_stream_manager/session_management_server.cc @@ -0,0 +1,76 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "asset_stream_manager/session_management_server.h" + +#include "absl/strings/str_format.h" +#include "asset_stream_manager/background_service_impl.h" +#include "asset_stream_manager/local_assets_stream_manager_service_impl.h" +#include "asset_stream_manager/session_manager.h" +#include "common/log.h" +#include "common/status.h" +#include "common/status_macros.h" +#include "grpcpp/grpcpp.h" + +namespace cdc_ft { + +SessionManagementServer::SessionManagementServer( + grpc::Service* session_service, grpc::Service* background_service, + SessionManager* session_manager) + : session_service_(session_service), + background_service_(background_service), + session_manager_(session_manager) {} + +SessionManagementServer::~SessionManagementServer() = default; + +absl::Status SessionManagementServer::Start(int port) { + assert(!server_); + + // Use 127.0.0.1 here to enforce IPv4. Otherwise, if only IPv4 is blocked on + // |port|, the server is started with IPv6 only, but clients are connecting + // with IPv4. + int selected_port = 0; + std::string server_address = absl::StrFormat("127.0.0.1:%i", port); + grpc::ServerBuilder builder; + builder.AddListeningPort(server_address, grpc::InsecureServerCredentials(), + &selected_port); + builder.RegisterService(session_service_); + builder.RegisterService(background_service_); + server_ = builder.BuildAndStart(); + if (selected_port != port) { + return MakeStatus( + "Failed to start session management server: Could not listen on port " + "%i. Is the port in use?", + port); + } + if (!server_) { + return MakeStatus( + "Failed to start session management server. Check asset_stream_manager " + "logs."); + } + LOG_INFO("Session management server listening on '%s'", server_address); + return absl::OkStatus(); +} + +void SessionManagementServer::RunUntilShutdown() { server_->Wait(); } + +absl::Status SessionManagementServer::Shutdown() { + assert(server_); + RETURN_IF_ERROR(session_manager_->Shutdown(), + "Failed to shut down session manager"); + server_->Shutdown(); + server_->Wait(); +} + +} // namespace cdc_ft diff --git a/asset_stream_manager/session_management_server.h b/asset_stream_manager/session_management_server.h new file mode 100644 index 0000000..a1ff154 --- /dev/null +++ b/asset_stream_manager/session_management_server.h @@ -0,0 +1,63 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ASSET_STREAM_MANAGER_SESSION_MANAGEMENT_SERVER_H_ +#define ASSET_STREAM_MANAGER_SESSION_MANAGEMENT_SERVER_H_ + +#include + +#include "absl/status/status.h" + +namespace grpc { +class Server; +class Service; +} // namespace grpc + +namespace cdc_ft { + +class SessionManager; +class ProcessFactory; + +// gRPC server for managing streaming sessions. Contains these services: +// - LocalAssetsStreamManager +// - Background +class SessionManagementServer { + public: + SessionManagementServer(grpc::Service* session_service, + grpc::Service* background_service, + SessionManager* session_manager); + ~SessionManagementServer(); + + // Starts the server on the local port |port|. + absl::Status Start(int port); + + // Waits until ProcessManager issues an Exit() request to the background + // service. + void RunUntilShutdown(); + + // Shuts down the session manager and the server. + absl::Status Shutdown(); + + private: + grpc::Service* session_service_; + grpc::Service* background_service_; + SessionManager* const session_manager_; + std::unique_ptr server_; +}; + +} // namespace cdc_ft + +#endif // ASSET_STREAM_MANAGER_SESSION_MANAGEMENT_SERVER_H_ diff --git a/asset_stream_manager/session_manager.cc b/asset_stream_manager/session_manager.cc new file mode 100644 index 0000000..66829b4 --- /dev/null +++ b/asset_stream_manager/session_manager.cc @@ -0,0 +1,193 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "asset_stream_manager/session_manager.h" + +#include "absl/strings/str_split.h" +#include "asset_stream_manager/multi_session.h" +#include "common/log.h" +#include "common/process.h" +#include "common/status.h" +#include "common/status_macros.h" +#include "manifest/manifest_updater.h" + +namespace cdc_ft { +namespace { + +// Returns a key to uniquely map a streaming directory |src_dir| to a +// MultiSession instance. +std::string GetMultiSessionKey(const std::string src_dir) { + // Use the cache dir as a key to identify MultiSessions. That way, different + // representations of the same dir (e.g. dir and dir\) map to the same + // MultiSession. + return MultiSession::GetCacheDir(src_dir); +} + +} // namespace + +SessionManager::SessionManager(SessionConfig cfg, + ProcessFactory* process_factory, + metrics::MetricsService* metrics_service) + : cfg_(cfg), + process_factory_(process_factory), + metrics_service_(metrics_service) {} + +SessionManager::~SessionManager() = default; + +absl::Status SessionManager::Shutdown() { + absl::MutexLock lock(&sessions_mutex_); + + for (const auto& [key, ms] : sessions_) { + LOG_INFO("Shutting down MultiSession for path '%s'", ms->src_dir()); + RETURN_IF_ERROR(ms->Shutdown(), + "Failed to shut down MultiSession for path '%s'", + ms->src_dir()); + } + sessions_.clear(); + return absl::OkStatus(); +} + +absl::Status SessionManager::StartSession( + const std::string& instance_id, const std::string& project_id, + const std::string& organization_id, const std::string& instance_ip, + uint16_t instance_port, const std::string& src_dir, + MultiSession** multi_session, metrics::SessionStartStatus* metrics_status) { + *multi_session = nullptr; + *metrics_status = metrics::SessionStartStatus::kOk; + + absl::MutexLock lock(&sessions_mutex_); + + // Check if the directory is correct as early as possible. + absl::Status status = ManifestUpdater::IsValidDir(src_dir); + if (!status.ok()) { + absl::Status stop_status = StopSessionInternal(instance_id); + if (!stop_status.ok() && !absl::IsNotFound(stop_status)) { + LOG_ERROR("Failed to stop previous session for instance '%s': '%s'", + instance_id, stop_status.ToString()); + } + *metrics_status = metrics::SessionStartStatus::kInvalidDirError; + return WrapStatus(status, "Failed to start session for path '%s'", src_dir); + } + + // Early out if we are streaming the workstation dir to the given gamelet. + MultiSession* ms = GetMultiSession(src_dir); + *multi_session = ms; + if (ms && ms->HasSessionForInstance(instance_id)) { + if (ms->IsSessionHealthy(instance_id)) { + LOG_INFO("Reusing existing session"); + return absl::OkStatus(); + } + + LOG_INFO("Existing session for instance '%s' is not healthy. Restarting.", + instance_id); + + // We could also fall through, but this might restart the MultiSession. + status = ms->StopSession(instance_id); + if (status.ok()) { + status = ms->StartSession(instance_id, project_id, organization_id, + instance_ip, instance_port); + } + if (!status.ok()) { + *metrics_status = metrics::SessionStartStatus::kRestartSessionError; + } + return WrapStatus(status, "Failed to restart session for instance '%s'", + instance_id); + } + + // If we are already streaming to the given gamelet, but from another + // workstation directory, stop that session. + // Note that NotFoundError is OK and expected (it means no session exists). + status = StopSessionInternal(instance_id); + if (!status.ok() && !absl::IsNotFound(status)) { + *metrics_status = metrics::SessionStartStatus::kStopSessionError; + return WrapStatus(status, + "Failed to stop previous session for instance '%s'", + instance_id); + } + + // Get or create the MultiSession for the given workstation directory. + absl::StatusOr ms_res = GetOrCreateMultiSession(src_dir); + if (!ms_res.ok()) { + *metrics_status = metrics::SessionStartStatus::kCreateMultiSessionError; + return WrapStatus(ms_res.status(), + "Failed to create MultiSession for path '%s'", src_dir); + } + ms = ms_res.value(); + *multi_session = ms; + + // Start the session. + LOG_INFO("Starting streaming session from path '%s' to instance '%s'", + src_dir, instance_id); + status = ms->StartSession(instance_id, project_id, organization_id, + instance_ip, instance_port); + if (!status.ok()) { + *metrics_status = metrics::SessionStartStatus::kStartSessionError; + } + return status; +} + +absl::Status SessionManager::StopSession(const std::string& instance_id) { + absl::MutexLock lock(&sessions_mutex_); + return StopSessionInternal(instance_id); +} + +MultiSession* SessionManager::GetMultiSession(const std::string& src_dir) { + const std::string key = GetMultiSessionKey(src_dir); + SessionMap::iterator iter = sessions_.find(key); + return iter != sessions_.end() ? iter->second.get() : nullptr; +} + +absl::StatusOr SessionManager::GetOrCreateMultiSession( + const std::string& src_dir) { + const std::string key = GetMultiSessionKey(src_dir); + SessionMap::iterator iter = sessions_.find(key); + if (iter == sessions_.end()) { + LOG_INFO("Creating new MultiSession for path '%s'", src_dir); + auto ms = std::make_unique( + src_dir, cfg_, process_factory_, + new MultiSessionMetricsRecorder(metrics_service_)); + RETURN_IF_ERROR(ms->Initialize(), "Failed to initialize MultiSession"); + iter = sessions_.insert({key, std::move(ms)}).first; + } + + return iter->second.get(); +} + +absl::Status SessionManager::StopSessionInternal(const std::string& instance) { + absl::Status status; + for (const auto& [key, ms] : sessions_) { + if (!ms->HasSessionForInstance(instance)) continue; + + LOG_INFO("Stopping session streaming from '%s' to instance '%s'", + ms->src_dir(), instance); + RETURN_IF_ERROR(ms->StopSession(instance), + "Failed to stop session for instance '%s'", instance); + + // Session was stopped. If the MultiSession is empty now, delete it. + if (ms->Empty()) { + LOG_INFO("Shutting down MultiSession for path '%s'", ms->src_dir()); + RETURN_IF_ERROR(ms->Shutdown(), + "Failed to shut down MultiSession for path '%s'", + ms->src_dir()); + sessions_.erase(key); + } + + return absl::OkStatus(); + } + + return absl::NotFoundError( + absl::StrFormat("No session for instance id '%s' found", instance)); +} + +} // namespace cdc_ft diff --git a/asset_stream_manager/session_manager.h b/asset_stream_manager/session_manager.h new file mode 100644 index 0000000..dccd428 --- /dev/null +++ b/asset_stream_manager/session_manager.h @@ -0,0 +1,100 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ASSET_STREAM_MANAGER_SESSION_MANAGER_H_ +#define ASSET_STREAM_MANAGER_SESSION_MANAGER_H_ + +#include +#include + +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/synchronization/mutex.h" +#include "asset_stream_manager/session_config.h" +#include "metrics/metrics.h" + +namespace cdc_ft { + +class MultiSession; +class ProcessFactory; + +// Implements a service to start and stop streaming sessions as a server. +// The corresponding clients are implemented by the ggp CLI and SDK Proxy. +// The CLI triggers StartSession() from `ggp instance mount --local-dir` and +// StopSession() from `ggp instance unmount`. SDK Proxy invokes StartSession() +// when a user starts a new game from the partner portal and sets an `Asset +// streaming directory` in the `Advanced settings` in the `Play settings` +// dialog. +// This service is owned by SessionManagementServer. +class SessionManager { + public: + SessionManager(SessionConfig cfg, ProcessFactory* process_factory, + metrics::MetricsService* metrics_service); + ~SessionManager(); + + // Starts a session and populates |multi_session| and |metrics_status|. + absl::Status StartSession(const std::string& instance_id, + const std::string& project_id, + const std::string& organization_id, + const std::string& instance_ip, + uint16_t instance_port, const std::string& src_dir, + MultiSession** multi_session, + metrics::SessionStartStatus* metrics_status) + ABSL_LOCKS_EXCLUDED(sessions_mutex_); + + // Stops the session for the given |instance|. Returns a NotFound error if no + // session exists. + absl::Status StopSession(const std::string& instance) + ABSL_LOCKS_EXCLUDED(sessions_mutex_); + + // Shuts down all existing MultiSessions. + absl::Status Shutdown() ABSL_LOCKS_EXCLUDED(sessions_mutex_); + + private: + // Stops the session for the given |instance|. Returns a NotFound error if no + // session exists. + absl::Status StopSessionInternal(const std::string& instance) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(sessions_mutex_); + + // Returns the MultiSession for the given workstation directory |src_dir| or + // nullptr if it does not exist. + MultiSession* GetMultiSession(const std::string& src_dir) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(sessions_mutex_); + + // Gets an existing MultiSession or creates a new one for the given + // workstation directory |src_dir|. + absl::StatusOr GetOrCreateMultiSession( + const std::string& src_dir) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(sessions_mutex_); + + // Sets session start status for a metrics event. + void SetSessionStartStatus(metrics::DeveloperLogEvent* evt, + absl::Status absl_status, + metrics::SessionStartStatus status) const; + + const SessionConfig cfg_; + ProcessFactory* const process_factory_; + metrics::MetricsService* const metrics_service_; + + absl::Mutex sessions_mutex_; + using SessionMap = + std::unordered_map>; + SessionMap sessions_ ABSL_GUARDED_BY(sessions_mutex_); +}; + +} // namespace cdc_ft + +#endif // ASSET_STREAM_MANAGER_SESSION_MANAGER_H_ diff --git a/asset_stream_manager/testdata/multi_session/non_empty/a.txt b/asset_stream_manager/testdata/multi_session/non_empty/a.txt new file mode 100644 index 0000000..cb8ac06 --- /dev/null +++ b/asset_stream_manager/testdata/multi_session/non_empty/a.txt @@ -0,0 +1 @@ +aaaaaaaa \ No newline at end of file diff --git a/asset_stream_manager/testdata/multi_session/non_empty/subdir/b.txt b/asset_stream_manager/testdata/multi_session/non_empty/subdir/b.txt new file mode 100644 index 0000000..5a5be04 --- /dev/null +++ b/asset_stream_manager/testdata/multi_session/non_empty/subdir/b.txt @@ -0,0 +1 @@ +bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb \ No newline at end of file diff --git a/asset_stream_manager/testdata/multi_session/non_empty/subdir/c.txt b/asset_stream_manager/testdata/multi_session/non_empty/subdir/c.txt new file mode 100644 index 0000000..f2ad6c7 --- /dev/null +++ b/asset_stream_manager/testdata/multi_session/non_empty/subdir/c.txt @@ -0,0 +1 @@ +c diff --git a/asset_stream_manager/testdata/multi_session/non_empty/subdir/d.txt b/asset_stream_manager/testdata/multi_session/non_empty/subdir/d.txt new file mode 100644 index 0000000..4bcfe98 --- /dev/null +++ b/asset_stream_manager/testdata/multi_session/non_empty/subdir/d.txt @@ -0,0 +1 @@ +d diff --git a/asset_stream_manager/testdata/root.txt b/asset_stream_manager/testdata/root.txt new file mode 100644 index 0000000..e69de29 diff --git a/asset_stream_manager/testing_asset_stream_server.cc b/asset_stream_manager/testing_asset_stream_server.cc new file mode 100644 index 0000000..4a737dc --- /dev/null +++ b/asset_stream_manager/testing_asset_stream_server.cc @@ -0,0 +1,50 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "asset_stream_manager/testing_asset_stream_server.h" + +#include "data_store/data_store_reader.h" +#include "manifest/file_chunk_map.h" + +namespace cdc_ft { + +TestingAssetStreamServer::TestingAssetStreamServer( + std::string src_dir, DataStoreReader* data_store_reader, + FileChunkMap* file_chunks) + : AssetStreamServer(src_dir, data_store_reader, file_chunks) {} + +TestingAssetStreamServer::~TestingAssetStreamServer() = default; + +absl::Status TestingAssetStreamServer::Start(int port) { + return absl::OkStatus(); +} + +void TestingAssetStreamServer::SetManifestId( + const ContentIdProto& manifest_id) { + absl::MutexLock lock(&mutex_); + manifest_id_ = manifest_id; +} + +absl::Status TestingAssetStreamServer::WaitForManifestAck( + const std::string& instance, absl::Duration timeout) { + return absl::OkStatus(); +} + +void TestingAssetStreamServer::Shutdown() {} + +ContentIdProto TestingAssetStreamServer::GetManifestId() const { + absl::MutexLock lock(&mutex_); + return manifest_id_; +} +} // namespace cdc_ft diff --git a/asset_stream_manager/testing_asset_stream_server.h b/asset_stream_manager/testing_asset_stream_server.h new file mode 100644 index 0000000..153ff3d --- /dev/null +++ b/asset_stream_manager/testing_asset_stream_server.h @@ -0,0 +1,60 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ASSET_STREAM_MANAGER_TESTING_ASSET_STREAM_SERVER_H_ +#define ASSET_STREAM_MANAGER_TESTING_ASSET_STREAM_SERVER_H_ + +#include +#include + +#include "absl/base/thread_annotations.h" +#include "absl/status/status.h" +#include "absl/synchronization/mutex.h" +#include "asset_stream_manager/grpc_asset_stream_server.h" +#include "manifest/manifest_proto_defs.h" + +namespace cdc_ft { + +// Not thread-safe testing server for streaming assets. +class TestingAssetStreamServer : public AssetStreamServer { + public: + TestingAssetStreamServer(std::string src_dir, + DataStoreReader* data_store_reader, + FileChunkMap* file_chunks); + + ~TestingAssetStreamServer(); + + // AssetStreamServer: + + absl::Status Start(int port) override; + + void SetManifestId(const ContentIdProto& manifest_id) + ABSL_LOCKS_EXCLUDED(mutex_) override; + + absl::Status WaitForManifestAck(const std::string& instance, + absl::Duration timeout) override; + void Shutdown() override; + + ContentIdProto GetManifestId() const ABSL_LOCKS_EXCLUDED(mutex_) override; + + private: + mutable absl::Mutex mutex_; + ContentIdProto manifest_id_ ABSL_GUARDED_BY(mutex_); +}; + +} // namespace cdc_ft + +#endif // ASSET_STREAM_MANAGER_TESTING_ASSET_STREAM_SERVER_H_ diff --git a/cdc_fuse_fs/.gitignore b/cdc_fuse_fs/.gitignore new file mode 100644 index 0000000..5b5531b --- /dev/null +++ b/cdc_fuse_fs/.gitignore @@ -0,0 +1,3 @@ +GGP/* +*.log +*.user \ No newline at end of file diff --git a/cdc_fuse_fs/BUILD b/cdc_fuse_fs/BUILD new file mode 100644 index 0000000..bae6baf --- /dev/null +++ b/cdc_fuse_fs/BUILD @@ -0,0 +1,136 @@ +package(default_visibility = ["//:__subpackages__"]) + +cc_binary( + name = "cdc_fuse_fs", + srcs = ["main.cc"], + deps = [ + ":cdc_fuse_fs_lib", + ":constants", + "//absl_helper:jedec_size_flag", + "//common:gamelet_component", + "//common:log", + "//data_store:data_provider", + "//data_store:disk_data_store", + "//data_store:grpc_reader", + "@com_google_absl//absl/flags:parse", + ], +) + +# Dependencies for cdc_fuse_fs_lib, except for FUSE. +cdc_fuse_fs_lib_shared_deps = [ + ":asset", + ":asset_stream_client", + ":config_stream_client", + "//common:log", + "//common:path", + "//common:platform", + "//common:util", + "//common:threadpool", + "@com_github_jsoncpp//:jsoncpp", +] + +cc_library( + name = "cdc_fuse_fs_lib", + srcs = ["cdc_fuse_fs.cc"], + hdrs = ["cdc_fuse_fs.h"], + target_compatible_with = ["@platforms//os:linux"], + deps = cdc_fuse_fs_lib_shared_deps + ["@com_github_fuse//:fuse_shared"], +) + +cc_library( + name = "cdc_fuse_fs_lib_mocked", + srcs = ["cdc_fuse_fs.cc"], + hdrs = ["cdc_fuse_fs.h"], + copts = ["-DUSE_MOCK_LIBFUSE=1"], + deps = cdc_fuse_fs_lib_shared_deps + [":mock_libfuse"], +) + +cc_test( + name = "cdc_fuse_fs_test", + srcs = ["cdc_fuse_fs_test.cc"], + deps = [ + ":cdc_fuse_fs_lib_mocked", + "//common:status_test_macros", + "//data_store", + "//data_store:mem_data_store", + "//manifest:fake_manifest_builder", + "//manifest:manifest_builder", + "@com_google_absl//absl/status", + "@com_google_googletest//:gtest", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "mock_libfuse", + srcs = ["mock_libfuse.cc"], + hdrs = ["mock_libfuse.h"], + deps = ["//common:platform"], +) + +cc_library( + name = "constants", + hdrs = ["constants.h"], +) + +cc_library( + name = "asset_stream_client", + srcs = ["asset_stream_client.cc"], + hdrs = ["asset_stream_client.h"], + deps = [ + "//common:log", + "//common:status_macros", + "//common:stopwatch", + "//manifest:manifest_proto_defs", + "//proto:asset_stream_service_grpc_proto", + "@com_google_absl//absl/status:statusor", + ], +) + +cc_library( + name = "asset", + srcs = ["asset.cc"], + hdrs = ["asset.h"], + deps = [ + "//common:buffer", + "//common:status", + "//data_store", + "//manifest:content_id", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + ], +) + +cc_test( + name = "asset_test", + srcs = ["asset_test.cc"], + deps = [ + ":asset", + "//common:path", + "//common:platform", + "//common:status_test_macros", + "//data_store:mem_data_store", + "@com_google_googletest//:gtest", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "config_stream_client", + srcs = ["config_stream_client.cc"], + hdrs = ["config_stream_client.h"], + deps = [ + "//common:grpc_status", + "//common:log", + "//manifest:content_id", + "//proto:asset_stream_service_grpc_proto", + "@com_google_absl//absl/status", + ], +) + +filegroup( + name = "all_test_sources", + srcs = glob(["*_test.cc"]), +) diff --git a/cdc_fuse_fs/asset.cc b/cdc_fuse_fs/asset.cc new file mode 100644 index 0000000..d83fa7e --- /dev/null +++ b/cdc_fuse_fs/asset.cc @@ -0,0 +1,520 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_fuse_fs/asset.h" + +#include "common/buffer.h" +#include "common/status.h" +#include "data_store/data_store_reader.h" + +namespace cdc_ft { + +Asset::Asset() = default; + +Asset::~Asset() = default; + +void Asset::Initialize(ino_t parent_ino, DataStoreReader* data_store_reader, + const AssetProto* proto) { + parent_ino_ = parent_ino; + + assert(!data_store_reader_ && data_store_reader); + data_store_reader_ = data_store_reader; + + assert(!proto_ && proto); + proto_ = proto; + + // Create a lookup for the direct assets, if any. + // Lock the mutex for convenience, it's not strictly necessary here as no + // other thread has access to this object. + absl::WriterMutexLock lock(&mutex_); + UpdateProtoLookup(proto_->dir_assets()); +} + +absl::StatusOr> Asset::GetAllChildProtos() { + mutex_.AssertNotHeld(); + assert(proto_); + + if (proto_->type() != AssetProto::DIRECTORY) { + return absl::InvalidArgumentError( + absl::StrFormat("Asset '%s' is not a directory asset", proto_->name())); + } + + // Fetch all indirect dir asset lists. + for (;;) { + bool list_was_fetched; + ASSIGN_OR_RETURN(list_was_fetched, FetchNextDirAssetList(), + "Failed to fetch directory assets"); + if (!list_was_fetched) break; + } + return GetLoadedChildProtos(); +} + +std::vector Asset::GetLoadedChildProtos() const { + absl::ReaderMutexLock read_lock(&mutex_); + + // Push all directory asset protos to a vector. + std::vector protos; + protos.reserve(proto_lookup_.size()); + for (const std::pair& kv : + proto_lookup_) { + protos.push_back(kv.second); + } + return protos; +} + +absl::StatusOr Asset::Lookup(const char* name) { + mutex_.AssertNotHeld(); + assert(proto_); + if (proto_->type() != AssetProto::DIRECTORY) { + return absl::InvalidArgumentError( + absl::StrFormat("Asset '%s' is not a directory asset", proto_->name())); + } + + for (;;) { + { + absl::ReaderMutexLock read_lock(&mutex_); + + // Check if we already have the asset. + std::unordered_map::iterator it = + proto_lookup_.find(name); + if (it != proto_lookup_.end()) { + return it->second; + } + } + + // Fetch one more indirect asset list. + bool list_was_fetched; + ASSIGN_OR_RETURN(list_was_fetched, FetchNextDirAssetList(), + "Failed to fetch directory assets"); + if (!list_was_fetched) { + // All lists were fetched, but asset still wasn't found. + return nullptr; + } + } +} + +absl::StatusOr Asset::Read(uint64_t offset, void* data, + uint64_t size) { + mutex_.AssertNotHeld(); + assert(proto_); + if (proto_->type() != AssetProto::FILE) + return absl::InvalidArgumentError("Not a file asset"); + + if (size == 0) return 0; + + // Find a chunk list such that list offset <= offset < next list offset. + int list_idx = FindChunkList(offset); + const RepeatedChunkRefProto* chunk_refs; + ASSIGN_OR_RETURN(chunk_refs, GetChunkRefList(list_idx), + "Failed to fetch indirect chunk list %i", list_idx); + uint64_t chunk_list_offset = ChunkListOffset(list_idx); + if (!chunk_refs) return 0; // Out of bounds. + + // Find a chunk such that chunk offset <= offset < next chunk offset. + int chunk_idx = FindChunk(*chunk_refs, chunk_list_offset, offset); + if (chunk_idx < 0 || chunk_idx >= chunk_refs->size()) { + // Data is malformed, e.g. empty chunk list with non-zero file size. + return MakeStatus( + "Invalid chunk ref list %i. Found chunk index %i not in [0, %u).", + list_idx, chunk_idx, chunk_refs->size()); + } + + uint64_t data_bytes_left = size; + uint64_t prefetch_bytes_left = data_store_reader_->PrefetchSize(size); + // Collect the chunk IDs required to satisfy the read request. + ChunkTransferList chunks; + while (chunk_refs) { + const ChunkRefProto& chunk_ref = chunk_refs->at(chunk_idx); + + // Figure out how much data we have to read from the current chunk. + uint64_t chunk_absolute_offset = chunk_list_offset + chunk_ref.offset(); + uint64_t chunk_offset = + offset > chunk_absolute_offset ? offset - chunk_absolute_offset : 0; + uint64_t chunk_size = ChunkSize(list_idx, chunk_idx, chunk_refs); + assert(chunk_size >= chunk_offset); + uint64_t bytes_to_read = + std::min(chunk_size - chunk_offset, data_bytes_left); + uint64_t bytes_to_prefetch = + std::min(chunk_size - chunk_offset, prefetch_bytes_left); + + // Enqueue a chunk transfer task. + chunks.emplace_back(chunk_ref.chunk_id(), chunk_offset, + bytes_to_read ? data : nullptr, bytes_to_read); + data = static_cast(data) + bytes_to_read; + data_bytes_left = + data_bytes_left > bytes_to_read ? data_bytes_left - bytes_to_read : 0; + prefetch_bytes_left -= bytes_to_prefetch; + offset += bytes_to_prefetch; + + // If we request enough data, we are done. + if (!prefetch_bytes_left) break; + + // Otherwise find next chunk. + ++chunk_idx; + while (chunk_idx >= chunk_refs->size()) { + // Go to next list. + chunk_idx = 0; + ++list_idx; + ASSIGN_OR_RETURN(chunk_refs, GetChunkRefList(list_idx), + "Failed to fetch indirect chunk list %i", list_idx); + chunk_list_offset = ChunkListOffset(list_idx); + if (!chunk_refs) { + // Out of bounds. If we're not at the file size now, it's an error. + if (offset != proto_->file_size()) { + return MakeStatus( + "Read error at position %u. Expected to be at file size %u.", + offset, proto_->file_size()); + } + break; + } + } + + if (chunk_refs) { + // We should be exactly at a chunk boundary now. + uint64_t chunk_rel_offset = chunk_refs->at(chunk_idx).offset(); + if (offset != chunk_list_offset + chunk_rel_offset) { + return MakeStatus("Unexpected chunk offset %u, expected %u + %u = %u", + offset, chunk_list_offset, chunk_rel_offset, + chunk_list_offset + chunk_rel_offset); + } + } + } + + // Read all data. + absl::Status status = data_store_reader_->Get(&chunks); + if (!status.ok() || !chunks.ReadDone()) { + std::string msg = absl::StrFormat( + "Failed to fetch chunk(s) [%s] for file '%s', offset %u, size %u", + chunks.ToHexString( + [](auto const& chunk) { return chunk.size && !chunk.done; }), + proto_->name(), offset, size); + return status.ok() ? absl::DataLossError(msg) + : WrapStatus(status, "%s", msg); + } + return size - data_bytes_left; +} + +size_t Asset::GetNumFetchedFileChunkListsForTesting() { + mutex_.AssertNotHeld(); + absl::ReaderMutexLock read_lock(&mutex_); + + // In contrast to |dir_asset_lists_|, |file_chunk_lists_| might be fetched + // out-of-order, e.g. if someone tried to read the end of the file. + // Unfetched lists are nullptrs. + int num_fetched = 0; + for (const std::unique_ptr& list : file_chunk_lists_) { + if (list) { + ++num_fetched; + } + } + return num_fetched; +} + +size_t Asset::GetNumFetchedDirAssetsListsForTesting() { + mutex_.AssertNotHeld(); + absl::ReaderMutexLock read_lock(&mutex_); + + return dir_asset_lists_.size(); +} + +void Asset::UpdateProto(const AssetProto* proto) { + absl::WriterMutexLock write_lock(&mutex_); + proto_lookup_.clear(); + file_chunk_lists_.clear(); + dir_asset_lists_.clear(); + proto_ = proto; + if (proto_) { + UpdateProtoLookup(proto_->dir_assets()); + } +} + +bool Asset::IsConsistent(std::string* warning) const { + assert(proto_ && warning); + absl::ReaderMutexLock read_lock(&mutex_); + switch (proto_->type()) { + case AssetProto::FILE: + if (!proto_lookup_.empty() || !proto_->dir_assets().empty() || + !proto_->dir_indirect_assets().empty()) { + *warning = "File asset contains sub-assets"; + return false; + } + if (!proto_->symlink_target().empty()) { + *warning = "File asset contains a symlink"; + return false; + } + break; + case AssetProto::DIRECTORY: + if (!proto_->file_chunks().empty() || !file_chunk_lists_.empty() || + !proto_->file_indirect_chunks().empty()) { + *warning = "Directory asset contains file chunks"; + return false; + } + if (!proto_->symlink_target().empty()) { + *warning = "Directory asset contains a symlink"; + return false; + } + if (proto_->file_size() > 0) { + *warning = "File size is defined for a directory asset"; + return false; + } + break; + case AssetProto::SYMLINK: + if (!proto_lookup_.empty() || !proto_->dir_assets().empty() || + !proto_->dir_indirect_assets().empty()) { + *warning = "Symlink asset contains sub-assets"; + return false; + } + if (!proto_->file_chunks().empty() || !file_chunk_lists_.empty() || + !proto_->file_indirect_chunks().empty()) { + *warning = "Symlink asset contains file chunks"; + return false; + } + if (proto_->file_size() > 0) { + *warning = "File size is defined for a symlink asset"; + return false; + } + break; + default: + *warning = "Undefined asset type"; + return false; + } + + // Directory assets should not have any file chunks. + // Absolute file chunk offsets for all loaded direct and indirect chunks + // should be monotonically increasing. + if (proto_->type() == AssetProto::FILE) { + // Check direct chunks. + size_t total_offset = 0; + for (int idx = 0; idx < proto_->file_chunks_size(); ++idx) { + if (proto_->file_chunks(idx).offset() < total_offset) { + *warning = absl::StrFormat( + "Disordered direct chunks: idx=%u, total_offset=%u, " + "chunk_offset=%u", + idx, total_offset, proto_->file_chunks(idx).offset()); + return false; + } + total_offset = proto_->file_chunks(idx).offset(); + } + + // Check indirect lists. + size_t prev_list_offset = total_offset; + for (int list_idx = 0; list_idx < proto_->file_indirect_chunks_size(); + ++list_idx) { + size_t list_offset = ChunkListOffset(list_idx); + if (list_idx == 0 && proto_->file_chunks_size() == 0 && + list_offset != 0) { + *warning = absl::StrFormat( + "Disordered indirect chunk list: the list offset should be 0, as " + "there are no direct file chunks: " + "list_offset=%u, previous list_offset=%u", + list_offset, prev_list_offset); + return false; + } else if (list_idx > 0 && (prev_list_offset >= list_offset || + total_offset >= list_offset)) { + *warning = absl::StrFormat( + "Disordered indirect chunk list: the list offset should increase: " + "list_offset=%u, previous list_offset=%u, total_offset=%u", + list_offset, prev_list_offset, total_offset); + return false; + } + if (file_chunk_lists_.size() <= list_idx || + !file_chunk_lists_[list_idx]) { + total_offset = list_offset; + continue; + } + // If the list is fetched, check its chunks' order. + for (int chunk_idx = 0; + chunk_idx < file_chunk_lists_[list_idx]->chunks_size(); + ++chunk_idx) { + const ChunkRefProto& chunk = + file_chunk_lists_[list_idx]->chunks(chunk_idx); + if (chunk_idx == 0 && chunk.offset() != 0) { + *warning = absl::StrFormat( + "The offset of the first chunk in the list should be 0: " + "list_idx=%u, list_offset=%u, chunk_offset=%u", + list_idx, list_offset, chunk.offset()); + return false; + } + if (chunk.offset() + list_offset < total_offset) { + *warning = absl::StrFormat( + "Disordered indirect chunk list: list_idx=%u, list_offset=%u, " + "offset=%u, chunk_offset=%u", + list_idx, list_offset, total_offset, chunk.offset()); + return false; + } + total_offset = list_offset + chunk.offset(); + } + } + if (total_offset == 0 && proto_->file_size() == 0) { + return true; + } + // The last absolute offset should be less than the file size. + if (total_offset >= proto_->file_size()) { + *warning = absl::StrFormat( + "The last absolute file offset exceeds the file size: %u >= %u", + total_offset, proto_->file_size()); + return false; + } + } + return true; +} + +absl::StatusOr Asset::FetchNextDirAssetList() { + mutex_.AssertNotHeld(); + assert(proto_); + + { + absl::ReaderMutexLock read_lock(&mutex_); + + // Shortcut to prevent acquiring a write lock if everything has been loaded. + if (dir_asset_lists_.size() >= + static_cast(proto_->dir_indirect_assets_size())) { + return false; + } + } + + absl::WriterMutexLock write_lock(&mutex_); + + // Check again in case some other thread has run this in the meantime. + if (dir_asset_lists_.size() >= + static_cast(proto_->dir_indirect_assets_size())) { + return false; + } + + // Read next indirect asset list. + const ContentIdProto& id = + proto_->dir_indirect_assets(static_cast(dir_asset_lists_.size())); + auto list = std::make_unique(); + RETURN_IF_ERROR(data_store_reader_->GetProto(id, list.get()), + "Failed to fetch AssetList proto with id %s", + ContentId::ToHexString(id)); + dir_asset_lists_.push_back(std::move(list)); + UpdateProtoLookup(dir_asset_lists_.back()->assets()); + + return true; +} + +void Asset::UpdateProtoLookup(const RepeatedAssetProto& list) { + assert((mutex_.AssertHeld(), true)); + + for (const AssetProto& asset : list) { + proto_lookup_[asset.name().c_str()] = &asset; + } +} + +int Asset::FindChunkList(uint64_t offset) { + assert(proto_); + const RepeatedIndirectChunkListProto& lists = proto_->file_indirect_chunks(); + if (offset >= proto_->file_size()) { + // |offset| is not inside the file. + return proto_->file_indirect_chunks_size(); + } + + // TODO: Optimize search by using average chunk size. + auto it = + std::upper_bound(lists.begin(), lists.end(), offset, + [](uint64_t value, const IndirectChunkListProto& list) { + return value < list.offset(); + }); + return it - lists.begin() - 1; +} + +int Asset::FindChunk(const RepeatedChunkRefProto& chunks, + uint64_t chunk_list_offset, uint64_t chunk_offset) { + assert(chunk_list_offset <= chunk_offset); + uint64_t rel_offset = chunk_offset - chunk_list_offset; + // TODO: Optimize search by using average chunk size. + auto it = std::upper_bound(chunks.begin(), chunks.end(), rel_offset, + [](uint64_t value, const ChunkRefProto& ch) { + return value < ch.offset(); + }); + return it - chunks.begin() - 1; +} + +uint64_t Asset::ChunkListOffset(int list_idx) const { + assert(list_idx >= -1 && proto_ && + list_idx <= proto_->file_indirect_chunks_size()); + + if (list_idx == -1) return 0; + if (list_idx < proto_->file_indirect_chunks_size()) + return proto_->file_indirect_chunks(list_idx).offset(); + return proto_->file_size(); +} + +uint64_t Asset::ChunkSize(int list_idx, int chunk_idx, + const RepeatedChunkRefProto* chunk_refs) { + assert(chunk_idx >= 0 && chunk_idx < chunk_refs->size()); + assert(list_idx >= -1 && proto_ && + list_idx <= proto_->file_indirect_chunks_size()); + + // If the next chunk is in the same chunk_refs list, just return offset diff. + if (chunk_idx + 1 < chunk_refs->size()) { + return chunk_refs->at(chunk_idx + 1).offset() - + chunk_refs->at(chunk_idx).offset(); + } + + // If the next chunk is on another list, use the next list's offset. + // Note that this also works for the last list, where + // GetChunkListOffset(list_idx + 1) returns the file size. + uint64_t chunk_absolute_offset = + chunk_refs->at(chunk_idx).offset() + ChunkListOffset(list_idx); + return ChunkListOffset(list_idx + 1) - chunk_absolute_offset; +} + +absl::StatusOr Asset::GetChunkRefList( + int list_idx) { + mutex_.AssertNotHeld(); + assert(list_idx >= -1 && proto_ && + list_idx <= proto_->file_indirect_chunks_size()); + + if (list_idx == -1) { + // Direct chunk list. + return &proto_->file_chunks(); + } + + if (list_idx == proto_->file_indirect_chunks_size()) { + // Indicates EOF. + return nullptr; + } + + { + absl::ReaderMutexLock read_lock(&mutex_); + + // Do a quick check first if the list is already loaded. + // This only requires a read lock. + if (static_cast(list_idx) < file_chunk_lists_.size() && + file_chunk_lists_[list_idx]) { + return &file_chunk_lists_[list_idx]->chunks(); + } + } + + absl::WriterMutexLock write_lock(&mutex_); + + // Indirect chunk list. Check if it has to be fetched. + if (file_chunk_lists_.size() < static_cast(list_idx) + 1) { + file_chunk_lists_.resize(list_idx + 1); + } + if (!file_chunk_lists_[list_idx]) { + auto list = std::make_unique(); + const ContentIdProto& list_id = + proto_->file_indirect_chunks(list_idx).chunk_list_id(); + RETURN_IF_ERROR(data_store_reader_->GetProto(list_id, list.get()), + "Failed to fetch ChunkListProto with id %s", + ContentId::ToHexString(list_id)); + file_chunk_lists_[list_idx] = std::move(list); + } + return &file_chunk_lists_[list_idx]->chunks(); +} + +} // namespace cdc_ft diff --git a/cdc_fuse_fs/asset.h b/cdc_fuse_fs/asset.h new file mode 100644 index 0000000..8f94533 --- /dev/null +++ b/cdc_fuse_fs/asset.h @@ -0,0 +1,182 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_FUSE_FS_ASSET_H_ +#define CDC_FUSE_FS_ASSET_H_ + +#include + +#include "absl/base/thread_annotations.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "absl/synchronization/mutex.h" +#include "manifest/content_id.h" + +namespace cdc_ft { + +class Buffer; +class DataStoreReader; + +// Wraps an asset proto for reading and adds additional functionality like name +// lookup maps and lazy loading of directory assets and file chunks. +// This class is accessed from multiple threads and has to be THREAD-SAFE. +class Asset { + public: + // Inode key type (cmp. fuse_ino_t). + using ino_t = uint64_t; + + // Creates a new asset. Must call Initialize() before using it. + Asset(); + ~Asset(); + + // Make it non-copyable, non-assignable to prevent accidental misuse. + Asset(const Asset& other) = delete; + Asset& operator=(const Asset& other) = delete; + + // Initialize the class. Must be called right after creation. + // NOT thread-safe! (OK as usually no other threads have access at this time.) + void Initialize(ino_t parent_ino, DataStoreReader* data_store_reader, + const AssetProto* proto); + + // Returns the parent inode id passed to Initialize(). + // Thread-safe. + ino_t parent_ino() const { return parent_ino_; } + + // Returns the asset proto passed to Initialize(). + // Thread-safe. + const AssetProto* proto() const { return proto_; } + + // Returns all child asset protos. Loads them if necessary. + // Returns an error if loading an indirect asset list fails. + // Returns an InvalidArugmentError if *this is not a directory asset. + // |proto_| must be set. + // Thread-safe. + absl::StatusOr> GetAllChildProtos() + ABSL_LOCKS_EXCLUDED(mutex_); + + // Returns loaded children's protos. Thread-safe. + std::vector GetLoadedChildProtos() const; + + // For directory assets, looks up a child asset by name. Loads indirect asset + // lists if needed. Returns an error if loading asset lists fails. + // Returns nullptr if the asset cannot be found. + // Returns an InvalidArugmentError if *this is not a directory asset. + // |proto_| must be set. + // Thread-safe. + absl::StatusOr Lookup(const char* name) + ABSL_LOCKS_EXCLUDED(mutex_); + + // For file assets, reads |size| bytes from the file, starting from |offset|, + // and puts the result into |data|. Returns the number of bytes read or 0 if + // |offset| >= file size. Loads indirect chunk lists if needed. + // Returns an error if loading chunk lists fails. + // Returns an InvalidArugmentError if *this is not a file asset. + // |proto_| must be set. + // Thread-safe. + absl::StatusOr Read(uint64_t offset, void* data, uint64_t size); + + size_t GetNumFetchedFileChunkListsForTesting() ABSL_LOCKS_EXCLUDED(mutex_); + size_t GetNumFetchedDirAssetsListsForTesting() ABSL_LOCKS_EXCLUDED(mutex_); + + // Updates asset proto, all corresponding internal structures are cleaned up. + // This is an expensive operation as the previously created internal + // structures are removed. Thread-safe. + void UpdateProto(const AssetProto* proto) ABSL_LOCKS_EXCLUDED(mutex_); + + // Checks consistency of the asset, for example: directory assets should not + // contain any file chunks. Any discovered inconsistencies are defined in + // |warning|. + bool IsConsistent(std::string* warning) const ABSL_LOCKS_EXCLUDED(mutex_); + + private: + // Loads the next indirect directory asset list. + // Returns true if a list was fetched. + // Returns false if all lists have already been fetched. + // Returns an error if fetching an indirect asset list failed. + // |proto_| must be set. + absl::StatusOr FetchNextDirAssetList() ABSL_LOCKS_EXCLUDED(mutex_); + + // Puts all assets from |list| into |proto_lookup_|. + void UpdateProtoLookup(const RepeatedAssetProto& list) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Returns the index of the indirect chunk list that |offset| falls into or -1 + // if |offset| is contained in the direct chunk list. Returns the number of + // indirect lists in |proto_| if |offset| is larger or equal to the file size. + // |proto_| must be set. + int FindChunkList(uint64_t offset); + + // Returns the index of the chunk that |chunk_offset| falls into. The offsets + // in list |chunks| are interpreted relative to |chunk_list_offset|. + int FindChunk(const RepeatedChunkRefProto& chunks, uint64_t chunk_list_offset, + uint64_t chunk_offset); + + // Gets the direct or an indirect chunk list. Fetches indirect chunk lists if + // necessary. |list_idx| must be in [-1, number of indirect chunk lists]. + // + // Returns the direct chunk list if |list_idx| is -1. Returns nullptr if + // |list_idx| equals the number of indirect chunk lists. Returns the indirect + // chunk list at index |list_idx| otherwise. Returns an error if fetching an + // indirect chunk list fails. + // |proto_| must be set. + absl::StatusOr GetChunkRefList(int list_idx) + ABSL_LOCKS_EXCLUDED(mutex_); + + // Returns the absolute offset of the chunk list with index |list_idx|. + // |list_idx| must be in [-1, number of indirect chunk lists]. -1 refers to + // the direct chunk list, in which case 0 is returned. If |list_idx| equals + // the number of indirect chunk lists, the file size is returned. Otherwise, + // the corresponding indirect chunk list's offset is returned. + // |proto_| must be set. + uint64_t ChunkListOffset(int list_idx) const; + + // Returns the chunk size of the chunk with index |chunk_idx| on the chunk + // list with index |list_idx| and corresponding proto |chunk_refs|. + // |list_idx| must be in [-1, number of indirect chunk lists - 1]. + // |chunk_idx| must be in [0, chunk_refs->size()]. + // |proto_| must be set. + uint64_t ChunkSize(int list_idx, int chunk_idx, + const RepeatedChunkRefProto* chunk_refs); + + // Parent inode, for ".." in dir listings. + ino_t parent_ino_ = 0; + + // Interface for loading content (chunks, assets). + DataStoreReader* data_store_reader_ = nullptr; + + // Corresponding asset proto. + const AssetProto* proto_ = nullptr; + + // RW mutex for increased thread-safetiness. + mutable absl::Mutex mutex_; + + // Maps asset proto names to asset protos for all protos loaded so far. + // The string views point directly into asset protos. + std::unordered_map proto_lookup_ + ABSL_GUARDED_BY(mutex_); + + // Fetched |file_indirect_chunks| chunk lists. + std::vector> file_chunk_lists_ + ABSL_GUARDED_BY(mutex_); + + // Fetched |dir_indirect_assets| fields so far. + std::vector> dir_asset_lists_ + ABSL_GUARDED_BY(mutex_); +}; + +} // namespace cdc_ft + +#endif // CDC_FUSE_FS_ASSET_H_ diff --git a/cdc_fuse_fs/asset_stream_client.cc b/cdc_fuse_fs/asset_stream_client.cc new file mode 100644 index 0000000..3b13d7c --- /dev/null +++ b/cdc_fuse_fs/asset_stream_client.cc @@ -0,0 +1,112 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_fuse_fs/asset_stream_client.h" + +#include + +#include "common/log.h" +#include "common/stopwatch.h" + +namespace cdc_ft { + +using GetContentRequest = proto::GetContentRequest; +using GetContentResponse = proto::GetContentResponse; +using SendCachedContentIdsRequest = proto::SendCachedContentIdsRequest; +using SendCachedContentIdsResponse = proto::SendCachedContentIdsResponse; + +AssetStreamClient::AssetStreamClient(std::shared_ptr channel, + bool enable_stats) + : enable_stats_(enable_stats) { + stub_ = AssetStreamService::NewStub(std::move(channel)); +} + +AssetStreamClient::~AssetStreamClient() = default; + +size_t TotalDataSize(const RepeatedStringProto& data) { + size_t total_size = 0; + for (const std::string& s : data) { + total_size += s.size(); + } + return total_size; +} + +absl::StatusOr AssetStreamClient::GetContent( + const ContentIdProto& id) { + GetContentRequest request; + *request.add_id() = id; + if (enable_stats_) + request.set_thread_id(thread_id_hash_(std::this_thread::get_id())); + + grpc::ClientContext context; + GetContentResponse response; + + Stopwatch sw; + grpc::Status status = stub_->GetContent(&context, request, &response); + LOG_DEBUG("GRPC TIME %0.3f sec for %u chunks with %u bytes", + sw.ElapsedSeconds(), response.data().size(), + TotalDataSize(response.data())); + + if (!status.ok()) { + return absl::Status(static_cast(status.error_code()), + status.error_message()); + } + assert(response.data_size() == 1); + return std::move(*response.mutable_data(0)); +} + +absl::StatusOr AssetStreamClient::GetContent( + RepeatedContentIdProto chunk_ids) { + if (chunk_ids.empty()) return RepeatedStringProto(); + + GetContentRequest request; + *request.mutable_id() = std::move(chunk_ids); + if (enable_stats_) + request.set_thread_id(thread_id_hash_(std::this_thread::get_id())); + + grpc::ClientContext context; + GetContentResponse response; + + Stopwatch sw; + grpc::Status status = stub_->GetContent(&context, request, &response); + + if (!status.ok()) { + return absl::Status(static_cast(status.error_code()), + status.error_message()); + } + LOG_DEBUG("GRPC TIME %0.3f sec for %zu bytes", sw.ElapsedSeconds(), + TotalDataSize(response.data())); + + return std::move(*response.mutable_data()); +} + +absl::Status AssetStreamClient::SendCachedContentIds( + std::vector content_ids) { + SendCachedContentIdsRequest request; + for (ContentIdProto& id : content_ids) *request.add_id() = std::move(id); + + grpc::ClientContext context; + SendCachedContentIdsResponse response; + + grpc::Status status = + stub_->SendCachedContentIds(&context, request, &response); + if (!status.ok()) { + return absl::Status(static_cast(status.error_code()), + status.error_message()); + } + + return absl::OkStatus(); +} + +} // namespace cdc_ft diff --git a/cdc_fuse_fs/asset_stream_client.h b/cdc_fuse_fs/asset_stream_client.h new file mode 100644 index 0000000..1cd28fd --- /dev/null +++ b/cdc_fuse_fs/asset_stream_client.h @@ -0,0 +1,62 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_FUSE_FS_ASSET_STREAM_CLIENT_H_ +#define CDC_FUSE_FS_ASSET_STREAM_CLIENT_H_ + +#include +#include +#include + +#include "absl/status/statusor.h" +#include "grpcpp/channel.h" +#include "manifest/manifest_proto_defs.h" +#include "proto/asset_stream_service.grpc.pb.h" + +namespace grpc_impl { +class Channel; +} + +namespace cdc_ft { + +// gRpc client for streaming assets to a gamelets. The client runs inside the +// CDC Fuse filesystem and requests chunks from the workstation. +class AssetStreamClient { + public: + // |channel| is a grpc channel to use. + // |enable_stats| determines whether additional statistics are sent. + AssetStreamClient(std::shared_ptr channel, bool enable_stats); + ~AssetStreamClient(); + + // Gets the content of the chunk with given |id|. + absl::StatusOr GetContent(const ContentIdProto& id); + absl::StatusOr GetContent( + RepeatedContentIdProto chunk_ids); + + // Sends the IDs of all cached chunks to the workstation for statistical + // purposes. + absl::Status SendCachedContentIds(std::vector content_ids); + + private: + using AssetStreamService = proto::AssetStreamService; + std::unique_ptr stub_; + bool enable_stats_; + std::hash thread_id_hash_; +}; + +} // namespace cdc_ft + +#endif // CDC_FUSE_FS_ASSET_STREAM_CLIENT_H_ diff --git a/cdc_fuse_fs/asset_test.cc b/cdc_fuse_fs/asset_test.cc new file mode 100644 index 0000000..b8dd813 --- /dev/null +++ b/cdc_fuse_fs/asset_test.cc @@ -0,0 +1,820 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_fuse_fs/asset.h" + +#include "absl/strings/match.h" +#include "common/buffer.h" +#include "common/path.h" +#include "common/status_test_macros.h" +#include "data_store/mem_data_store.h" +#include "gtest/gtest.h" + +namespace cdc_ft { +namespace { + +class AssetTest : public ::testing::Test { + public: + AssetTest() + : bad_id_(ContentId::FromDataString(std::string("does not exist"))) { + for (size_t n = 0; n < kNumChildProtos; ++n) { + child_protos_[n].set_name("file" + std::to_string(n)); + } + } + + protected: + static constexpr Asset::ino_t kParentIno = 1; + + // Adds chunks with the data given by |data_vec| to the store, and + // adds references to the chunks to |list|. Updates |offset|. + void AddChunks(std::vector> data_vec, uint64_t* offset, + RepeatedChunkRefProto* list) { + for (auto& data : data_vec) { + ChunkRefProto* chunk_ref = list->Add(); + chunk_ref->set_offset(*offset); + *offset += data.size(); + *chunk_ref->mutable_chunk_id() = store_.AddData(std::move(data)); + } + } + + // Adds chunks with the data given by |data_vec| to the store, + // creates an indirect chunk list from those chunks and adds a reference to + // that list to |list|. + void AddIndirectChunks(std::vector> data_vec, + uint64_t* offset, + RepeatedIndirectChunkListProto* list) { + uint64_t indirect_list_offset = *offset; + *offset = 0; + + ChunkListProto chunk_list; + AddChunks(data_vec, offset, chunk_list.mutable_chunks()); + + IndirectChunkListProto* indirect_list = list->Add(); + indirect_list->set_offset(indirect_list_offset); + *indirect_list->mutable_chunk_list_id() = store_.AddProto(chunk_list); + *offset += indirect_list_offset; + } + + // Checks if the given list |protos| contains an asset having |name|. + static bool ContainsAsset(const std::vector& protos, + const std::string& name) { + return std::find_if(protos.begin(), protos.end(), [=](const AssetProto* p) { + return p->name() == name; + }) != protos.end(); + } + + MemDataStore store_; + AssetProto proto_; + Asset asset_; + + static constexpr size_t kNumChildProtos = 4; + AssetProto child_protos_[kNumChildProtos]; + + const ContentIdProto bad_id_; + std::string asset_check_; +}; + +TEST_F(AssetTest, BasicGetters) { + asset_.Initialize(kParentIno, &store_, &proto_); + + EXPECT_EQ(asset_.parent_ino(), kParentIno); + EXPECT_EQ(asset_.proto(), &proto_); + EXPECT_FALSE(asset_.IsConsistent(&asset_check_)); + EXPECT_STREQ("Undefined asset type", asset_check_.c_str()); +} + +TEST_F(AssetTest, GetAllChildProtosDirectSucceeds) { + // Put all children into the direct asset list. + for (size_t n = 0; n < kNumChildProtos; ++n) + *proto_.add_dir_assets() = child_protos_[n]; + proto_.set_type(AssetProto::DIRECTORY); + + asset_.Initialize(kParentIno, &store_, &proto_); + absl::StatusOr> protos = + asset_.GetAllChildProtos(); + + ASSERT_OK(protos); + ASSERT_EQ(protos->size(), kNumChildProtos); + for (size_t n = 0; n < kNumChildProtos; ++n) { + EXPECT_TRUE(ContainsAsset(protos.value(), child_protos_[n].name())) + << "Could not find asset " << child_protos_[n].name(); + } + EXPECT_TRUE(asset_.IsConsistent(&asset_check_)); +} + +TEST_F(AssetTest, GetAllChildProtosIndirectSucceeds) { + // Put child0 into the direct asset list and children 1-N into indirect lists. + *proto_.add_dir_assets() = child_protos_[0]; + for (size_t n = 1; n < kNumChildProtos; ++n) { + AssetListProto list; + *list.add_assets() = child_protos_[n]; + *proto_.add_dir_indirect_assets() = store_.AddProto(list); + } + proto_.set_type(AssetProto::DIRECTORY); + + asset_.Initialize(kParentIno, &store_, &proto_); + absl::StatusOr> protos = + asset_.GetAllChildProtos(); + + EXPECT_EQ(asset_.GetNumFetchedDirAssetsListsForTesting(), + kNumChildProtos - 1); + ASSERT_OK(protos); + ASSERT_EQ(protos->size(), kNumChildProtos); + for (size_t n = 0; n < kNumChildProtos; ++n) { + EXPECT_TRUE(ContainsAsset(protos.value(), child_protos_[n].name())) + << "Could not find asset " << child_protos_[n].name(); + } + EXPECT_TRUE(asset_.IsConsistent(&asset_check_)); +} + +TEST_F(AssetTest, GetAllChildProtosWithBadListIdFails) { + *proto_.add_dir_indirect_assets() = bad_id_; + proto_.set_type(AssetProto::DIRECTORY); + + asset_.Initialize(kParentIno, &store_, &proto_); + absl::StatusOr> protos = + asset_.GetAllChildProtos(); + + ASSERT_NOT_OK(protos); + EXPECT_TRUE(absl::StrContains(protos.status().message(), + "Failed to fetch directory assets")); +} + +TEST_F(AssetTest, GetAllChildProtosWithWrongTypeFails) { + proto_.set_type(AssetProto::FILE); + + asset_.Initialize(kParentIno, &store_, &proto_); + absl::StatusOr> protos = + asset_.GetAllChildProtos(); + + ASSERT_NOT_OK(protos); + EXPECT_TRUE(absl::IsInvalidArgument(protos.status())); +} + +TEST_F(AssetTest, GetLoadedChildProtosSucceedsForEmpty) { + asset_.Initialize(kParentIno, &store_, &proto_); + EXPECT_TRUE(asset_.GetLoadedChildProtos().empty()); + EXPECT_FALSE(asset_.IsConsistent(&asset_check_)); + EXPECT_STREQ("Undefined asset type", asset_check_.c_str()); +} + +TEST_F(AssetTest, GetLoadedChildProtosSucceedsForNonEmpty) { + // Put child0 into the direct asset list and children 1-N into indirect lists. + *proto_.add_dir_assets() = child_protos_[0]; + for (size_t n = 1; n < kNumChildProtos; ++n) { + AssetListProto list; + *list.add_assets() = child_protos_[n]; + *proto_.add_dir_indirect_assets() = store_.AddProto(list); + } + proto_.set_type(AssetProto::DIRECTORY); + + // The direct list is always loaded. + asset_.Initialize(kParentIno, &store_, &proto_); + std::vector protos = asset_.GetLoadedChildProtos(); + ASSERT_EQ(protos.size(), 1); + EXPECT_EQ(protos[0]->name(), child_protos_[0].name()); + + // A lookup for the first child triggers loading of the first indirect list. + EXPECT_OK(asset_.Lookup(child_protos_[1].name().c_str())); + protos = asset_.GetLoadedChildProtos(); + ASSERT_EQ(protos.size(), 2); + EXPECT_TRUE(ContainsAsset(protos, child_protos_[0].name())); + EXPECT_TRUE(ContainsAsset(protos, child_protos_[1].name())); + + // GetAllChildProtos() triggers loading of all indirect lists. + EXPECT_OK(asset_.GetAllChildProtos()); + protos = asset_.GetLoadedChildProtos(); + ASSERT_EQ(protos.size(), 4u); + for (size_t n = 0; n < protos.size(); ++n) { + EXPECT_TRUE(ContainsAsset(protos, child_protos_[n].name())) + << "Could not find asset " << child_protos_[n].name(); + } + EXPECT_TRUE(asset_.IsConsistent(&asset_check_)); +} + +TEST_F(AssetTest, LookupSucceeds) { + // Put child0 into the direct asset list and children 1-N into indirect lists. + *proto_.add_dir_assets() = child_protos_[0]; + for (size_t n = 1; n < kNumChildProtos; ++n) { + AssetListProto list; + *list.add_assets() = child_protos_[n]; + *proto_.add_dir_indirect_assets() = store_.AddProto(list); + } + proto_.set_type(AssetProto::DIRECTORY); + + // Indirect asset lists should be fetched in a lazy fashion. + asset_.Initialize(kParentIno, &store_, &proto_); + absl::StatusOr file0 = asset_.Lookup("file0"); + EXPECT_EQ(asset_.GetNumFetchedDirAssetsListsForTesting(), 0); + absl::StatusOr file1 = asset_.Lookup("file1"); + EXPECT_EQ(asset_.GetNumFetchedDirAssetsListsForTesting(), 1); + absl::StatusOr file3 = asset_.Lookup("file3"); + EXPECT_EQ(asset_.GetNumFetchedDirAssetsListsForTesting(), 3); + + ASSERT_OK(file0); + ASSERT_OK(file1); + ASSERT_OK(file3); + + ASSERT_NE(*file0, nullptr); + ASSERT_NE(*file1, nullptr); + ASSERT_NE(*file3, nullptr); + + EXPECT_EQ((*file0)->name(), child_protos_[0].name()); + EXPECT_EQ((*file1)->name(), child_protos_[1].name()); + EXPECT_EQ((*file3)->name(), child_protos_[3].name()); + + EXPECT_TRUE(asset_.IsConsistent(&asset_check_)); +} + +TEST_F(AssetTest, LookupNotFoundSucceeds) { + // Put child0 into the direct asset list and children 1-N into indirect lists. + *proto_.add_dir_assets() = child_protos_[0]; + for (size_t n = 1; n < kNumChildProtos; ++n) { + AssetListProto list; + *list.add_assets() = child_protos_[n]; + *proto_.add_dir_indirect_assets() = store_.AddProto(list); + } + proto_.set_type(AssetProto::DIRECTORY); + + asset_.Initialize(kParentIno, &store_, &proto_); + absl::StatusOr proto = asset_.Lookup("non_existing"); + + EXPECT_EQ(asset_.GetNumFetchedDirAssetsListsForTesting(), + kNumChildProtos - 1); + ASSERT_OK(proto); + ASSERT_EQ(*proto, nullptr); + + EXPECT_TRUE(asset_.IsConsistent(&asset_check_)); +} + +TEST_F(AssetTest, LookupWithWrongTypeFails) { + proto_.set_type(AssetProto::FILE); + + asset_.Initialize(kParentIno, &store_, &proto_); + absl::StatusOr proto = asset_.Lookup("foo"); + + ASSERT_NOT_OK(proto); + EXPECT_TRUE(absl::IsInvalidArgument(proto.status())); +} + +TEST_F(AssetTest, LookupWithBadListIdFails) { + *proto_.add_dir_assets() = child_protos_[0]; + *proto_.add_dir_indirect_assets() = bad_id_; + proto_.set_type(AssetProto::DIRECTORY); + + asset_.Initialize(kParentIno, &store_, &proto_); + + // This should succeed since 'file0' on the direct assets list. + ASSERT_OK(asset_.Lookup("file0")); + + // This should fail since it should trigger loading the bad id. + absl::StatusOr proto = asset_.Lookup("file1"); + ASSERT_NOT_OK(proto); + EXPECT_TRUE(absl::StrContains(proto.status().message(), + "Failed to fetch directory assets")); +} + +TEST_F(AssetTest, ReadDirectSucceeds) { + uint64_t offset = 0; + AddChunks({{1, 2}, {3, 4}}, &offset, proto_.mutable_file_chunks()); + proto_.set_file_size(offset); + proto_.set_type(AssetProto::FILE); + + asset_.Initialize(kParentIno, &store_, &proto_); + + std::vector data(4); + absl::StatusOr bytes_read = + asset_.Read(0, data.data(), data.size()); + + ASSERT_OK(bytes_read); + EXPECT_EQ(*bytes_read, 4); + EXPECT_EQ(data, std::vector({1, 2, 3, 4})); + EXPECT_TRUE(asset_.IsConsistent(&asset_check_)); +} + +TEST_F(AssetTest, ReadIndirectSucceeds) { + uint64_t offset = 0; + AddChunks({{1, 2}}, &offset, proto_.mutable_file_chunks()); + AddIndirectChunks({{3}, {4, 5, 6}}, &offset, + proto_.mutable_file_indirect_chunks()); + AddIndirectChunks({{7, 8, 9}}, &offset, + proto_.mutable_file_indirect_chunks()); + proto_.set_file_size(offset); + proto_.set_type(AssetProto::FILE); + + asset_.Initialize(kParentIno, &store_, &proto_); + + std::vector data(9); + absl::StatusOr bytes_read = + asset_.Read(0, data.data(), data.size()); + + ASSERT_OK(bytes_read); + EXPECT_EQ(*bytes_read, 9); + EXPECT_EQ(data, std::vector({1, 2, 3, 4, 5, 6, 7, 8, 9})); + EXPECT_TRUE(asset_.IsConsistent(&asset_check_)); +} + +TEST_F(AssetTest, ReadIndirectOnlySucceeds) { + uint64_t offset = 0; + AddIndirectChunks({{1, 2}}, &offset, proto_.mutable_file_indirect_chunks()); + proto_.set_file_size(offset); + proto_.set_type(AssetProto::FILE); + + asset_.Initialize(kParentIno, &store_, &proto_); + + std::vector data(2); + absl::StatusOr bytes_read = + asset_.Read(0, data.data(), data.size()); + + ASSERT_OK(bytes_read); + EXPECT_EQ(*bytes_read, 2); + EXPECT_EQ(data, std::vector({1, 2})); + EXPECT_TRUE(asset_.IsConsistent(&asset_check_)); + EXPECT_STREQ("", asset_check_.c_str()); +} + +TEST_F(AssetTest, ReadWithWrongType) { + proto_.set_type(AssetProto::DIRECTORY); + + asset_.Initialize(kParentIno, &store_, &proto_); + + std::vector data(1); + absl::StatusOr bytes_read = + asset_.Read(0, data.data(), data.size()); + + ASSERT_NOT_OK(bytes_read); + EXPECT_TRUE(absl::IsInvalidArgument(bytes_read.status())); +} + +TEST_F(AssetTest, ReadIndirectWithBadListIdFails) { + IndirectChunkListProto* indirect_list = proto_.add_file_indirect_chunks(); + indirect_list->set_offset(0); + *indirect_list->mutable_chunk_list_id() = bad_id_; + proto_.set_file_size(1); + proto_.set_type(AssetProto::FILE); + + asset_.Initialize(kParentIno, &store_, &proto_); + + std::vector data(1); + absl::StatusOr bytes_read = + asset_.Read(0, data.data(), data.size()); + + ASSERT_NOT_OK(bytes_read); + EXPECT_TRUE(absl::StrContains(bytes_read.status().message(), + "Failed to fetch indirect chunk list 0")); +} + +TEST_F(AssetTest, ReadFetchesIndirectListsLazily) { + uint64_t offset = 0; + AddChunks({{0, 1, 2}}, &offset, proto_.mutable_file_chunks()); + AddIndirectChunks({{3}}, &offset, proto_.mutable_file_indirect_chunks()); + AddIndirectChunks({{4, 5, 6}, {7}}, &offset, + proto_.mutable_file_indirect_chunks()); + AddIndirectChunks({{8, 9}}, &offset, proto_.mutable_file_indirect_chunks()); + proto_.set_file_size(offset); + proto_.set_type(AssetProto::FILE); + + asset_.Initialize(kParentIno, &store_, &proto_); + EXPECT_EQ(asset_.GetNumFetchedFileChunkListsForTesting(), 0); + + // Read direct chunks. Should not trigger indirect reads. + std::vector data(10); + absl::StatusOr bytes_read = asset_.Read(0, data.data(), 3); + EXPECT_EQ(asset_.GetNumFetchedFileChunkListsForTesting(), 0); + + // Read an indirect chunk near the end ({ {8, 9} }). + bytes_read = asset_.Read(8, data.data(), 1); + EXPECT_EQ(asset_.GetNumFetchedFileChunkListsForTesting(), 1); + + // Read an indirect chunk in the beginning ({ {3} }). + bytes_read = asset_.Read(3, data.data(), 1); + EXPECT_EQ(asset_.GetNumFetchedFileChunkListsForTesting(), 2); + + // Read an indirect chunk in the middle ({ {4, 5, 6}, {7} }). + bytes_read = asset_.Read(4, data.data(), 4); + EXPECT_EQ(asset_.GetNumFetchedFileChunkListsForTesting(), 3); + EXPECT_TRUE(asset_.IsConsistent(&asset_check_)); +} + +TEST_F(AssetTest, ReadEmptySucceeds) { + asset_.Initialize(kParentIno, &store_, &proto_); + proto_.set_type(AssetProto::FILE); + + std::vector data(4); + absl::StatusOr bytes_read = + asset_.Read(0, data.data(), data.size()); + + ASSERT_OK(bytes_read); + EXPECT_EQ(*bytes_read, 0); +} + +TEST_F(AssetTest, ReadEmptyDirectChunkSucceeds) { + uint64_t offset = 0; + AddChunks({{}, {1}}, &offset, proto_.mutable_file_chunks()); + proto_.set_file_size(offset); + proto_.set_type(AssetProto::FILE); + + asset_.Initialize(kParentIno, &store_, &proto_); + + std::vector data(4); + absl::StatusOr bytes_read = + asset_.Read(0, data.data(), data.size()); + + ASSERT_OK(bytes_read); + EXPECT_EQ(*bytes_read, 1); + data.resize(1); + EXPECT_EQ(data, std::vector({1})); +} + +TEST_F(AssetTest, ReadEmptyIndirectChunkListFails) { + uint64_t offset = 0; + AddChunks({{1}}, &offset, proto_.mutable_file_chunks()); + AddIndirectChunks({}, &offset, proto_.mutable_file_indirect_chunks()); + AddIndirectChunks({{}}, &offset, proto_.mutable_file_indirect_chunks()); + AddIndirectChunks({{2}}, &offset, proto_.mutable_file_indirect_chunks()); + proto_.set_file_size(offset); + proto_.set_type(AssetProto::FILE); + + asset_.Initialize(kParentIno, &store_, &proto_); + + std::vector data(4); + absl::StatusOr bytes_read = + asset_.Read(0, data.data(), data.size()); + + ASSERT_OK(bytes_read); + EXPECT_EQ(*bytes_read, 2); + data.resize(2); + EXPECT_EQ(data, std::vector({1, 2})); +} + +TEST_F(AssetTest, ReadWithBadFileSizeFails) { + // Construct a case where the second chunk is empty, but file size indicates + // that it should be 1 byte long. Reading that byte should fail. + uint64_t offset = 0; + AddChunks({{1}, {}}, &offset, proto_.mutable_file_chunks()); + proto_.set_file_size(offset + 1); + proto_.set_type(AssetProto::FILE); + + asset_.Initialize(kParentIno, &store_, &proto_); + + std::vector data(1); + absl::StatusOr bytes_read = + asset_.Read(1, data.data(), data.size()); + + ASSERT_NOT_OK(bytes_read); + EXPECT_TRUE( + absl::StrContains(bytes_read.status().message(), + "requested offset 0 is larger or equal than size 0")); +} + +TEST_F(AssetTest, ReadWithBadChunkIdSizeFails) { + uint64_t offset = 0; + AddChunks({{1}}, &offset, proto_.mutable_file_chunks()); + *proto_.mutable_file_chunks(0)->mutable_chunk_id() = bad_id_; + proto_.set_file_size(offset); + proto_.set_type(AssetProto::FILE); + + asset_.Initialize(kParentIno, &store_, &proto_); + + std::vector data(1); + absl::StatusOr bytes_read = + asset_.Read(0, data.data(), data.size()); + + ASSERT_NOT_OK(bytes_read); + EXPECT_TRUE(absl::StrContains(bytes_read.status().message(), + "Failed to fetch chunk(s)")); +} + +TEST_F(AssetTest, ReadWithBadOffsetFails) { + uint64_t offset = 0; + AddChunks({{1, 2, 3}, {4, 5, 6}}, &offset, proto_.mutable_file_chunks()); + proto_.mutable_file_chunks(1)->set_offset(4); // Instead of 3. + proto_.set_file_size(offset); + proto_.set_type(AssetProto::FILE); + + asset_.Initialize(kParentIno, &store_, &proto_); + + std::vector data(6); + absl::StatusOr bytes_read = + asset_.Read(0, data.data(), data.size()); + + ASSERT_NOT_OK(bytes_read); + EXPECT_TRUE(absl::StrContains(bytes_read.status().message(), + "requested size 4 at offset 0")); +} + +TEST_F(AssetTest, ReadEmptyWithBadFileSize) { + uint64_t offset = 0; + AddChunks({}, &offset, proto_.mutable_file_chunks()); + proto_.set_file_size(1); + proto_.set_type(AssetProto::FILE); + + asset_.Initialize(kParentIno, &store_, &proto_); + + std::vector data(1); + absl::StatusOr bytes_read = + asset_.Read(0, data.data(), data.size()); + + ASSERT_NOT_OK(bytes_read); + EXPECT_TRUE(absl::StrContains(bytes_read.status().message(), + "Invalid chunk ref list")); +} + +TEST_F(AssetTest, ReadWithOffsetAndSizeSucceeds) { + uint64_t offset = 0; + AddChunks({{0, 1}, {}, {2}}, &offset, proto_.mutable_file_chunks()); + AddIndirectChunks({{3}, {4, 5, 6}}, &offset, + proto_.mutable_file_indirect_chunks()); + AddIndirectChunks({}, &offset, proto_.mutable_file_indirect_chunks()); + AddIndirectChunks({{7, 8, 9}}, &offset, + proto_.mutable_file_indirect_chunks()); + proto_.set_file_size(offset); + proto_.set_type(AssetProto::FILE); + + asset_.Initialize(kParentIno, &store_, &proto_); + + // Test all kinds of different permutations of offsets and sizes. + std::vector expected_data; + for (offset = 0; offset < 12; ++offset) { + for (uint64_t size = 0; size < 12; ++size) { + expected_data.clear(); + for (uint64_t n = offset; n < std::min(offset + size, 10); + ++n) { + expected_data.push_back(static_cast(n)); + } + + std::vector data(size); + absl::StatusOr bytes_read = + asset_.Read(offset, data.data(), data.size()); + + ASSERT_OK(bytes_read); + EXPECT_EQ(*bytes_read, expected_data.size()); + data.resize(expected_data.size()); + EXPECT_EQ(data, expected_data); + } + } +} + +TEST_F(AssetTest, UpdateProtoWithEmptyAssetSucceeds) { + proto_.set_type(AssetProto::DIRECTORY); + // Put all children into the direct asset list. + for (size_t n = 0; n < kNumChildProtos; ++n) + *proto_.add_dir_assets() = child_protos_[n]; + asset_.Initialize(kParentIno, &store_, &proto_); + absl::StatusOr> protos = + asset_.GetAllChildProtos(); + ASSERT_OK(protos); + ASSERT_EQ(protos->size(), kNumChildProtos); + + AssetProto proto_updated; + proto_updated.set_type(AssetProto::DIRECTORY); + asset_.UpdateProto(&proto_updated); + protos = asset_.GetAllChildProtos(); + ASSERT_OK(protos); + ASSERT_TRUE(protos->empty()); + EXPECT_TRUE(asset_.IsConsistent(&asset_check_)); +} + +TEST_F(AssetTest, UpdateProtoFromEmptyAssetSucceeds) { + AssetProto empty_proto; + empty_proto.set_type(AssetProto::DIRECTORY); + asset_.Initialize(kParentIno, &store_, &empty_proto); + absl::StatusOr> protos = + asset_.GetAllChildProtos(); + ASSERT_OK(protos); + ASSERT_TRUE(protos->empty()); + + proto_.set_type(AssetProto::DIRECTORY); + // Put all children into the direct asset list. + for (size_t n = 0; n < kNumChildProtos; ++n) + *proto_.add_dir_assets() = child_protos_[n]; + asset_.UpdateProto(&proto_); + + protos = asset_.GetAllChildProtos(); + ASSERT_OK(protos); + ASSERT_EQ(protos->size(), kNumChildProtos); + EXPECT_TRUE(asset_.IsConsistent(&asset_check_)); +} + +TEST_F(AssetTest, AssetProtoComparison) { + AssetProto a; + AssetProto b; + EXPECT_EQ(a, b); + + a.set_type(AssetProto::DIRECTORY); + b.set_type(AssetProto::FILE); + EXPECT_NE(a, b); + + b.set_type(AssetProto::DIRECTORY); + EXPECT_EQ(a, b); + + for (size_t n = 0; n < kNumChildProtos; ++n) + *a.add_dir_assets() = child_protos_[n]; + EXPECT_NE(a, b); + + for (size_t n = 0; n < kNumChildProtos; ++n) + *b.add_dir_assets() = child_protos_[n]; + EXPECT_EQ(a, b); +} + +TEST_F(AssetTest, IsConsistentFailsFileWithDirAssets) { + // Put all children into the direct asset list. + for (size_t n = 0; n < kNumChildProtos; ++n) + *proto_.add_dir_assets() = child_protos_[n]; + proto_.set_type(AssetProto::FILE); + asset_.Initialize(kParentIno, &store_, &proto_); + + EXPECT_FALSE(asset_.IsConsistent(&asset_check_)); + EXPECT_STREQ(asset_check_.c_str(), "File asset contains sub-assets"); +} + +TEST_F(AssetTest, IsConsistentFailsFileWithSymlink) { + proto_.set_symlink_target("symlink"); + proto_.set_type(AssetProto::FILE); + asset_.Initialize(kParentIno, &store_, &proto_); + + EXPECT_FALSE(asset_.IsConsistent(&asset_check_)); + EXPECT_STREQ(asset_check_.c_str(), "File asset contains a symlink"); +} + +TEST_F(AssetTest, IsConsistentFailsDirWithFileChunks) { + uint64_t offset = 0; + AddChunks({{1, 2}}, &offset, proto_.mutable_file_chunks()); + proto_.set_type(AssetProto::DIRECTORY); + asset_.Initialize(kParentIno, &store_, &proto_); + + EXPECT_FALSE(asset_.IsConsistent(&asset_check_)); + EXPECT_STREQ(asset_check_.c_str(), "Directory asset contains file chunks"); +} + +TEST_F(AssetTest, IsConsistentFailsDirWithIndirectFileChunks) { + uint64_t offset = 0; + AddIndirectChunks({{3}, {4, 5, 6}}, &offset, + proto_.mutable_file_indirect_chunks()); + proto_.set_file_size(offset); + proto_.set_type(AssetProto::DIRECTORY); + + asset_.Initialize(kParentIno, &store_, &proto_); + EXPECT_FALSE(asset_.IsConsistent(&asset_check_)); + EXPECT_STREQ(asset_check_.c_str(), "Directory asset contains file chunks"); +} + +TEST_F(AssetTest, IsConsistentFailsDirWithSymlink) { + proto_.set_symlink_target("symlink"); + proto_.set_type(AssetProto::DIRECTORY); + + asset_.Initialize(kParentIno, &store_, &proto_); + + EXPECT_FALSE(asset_.IsConsistent(&asset_check_)); + EXPECT_STREQ(asset_check_.c_str(), "Directory asset contains a symlink"); +} + +TEST_F(AssetTest, IsConsistentFailsDirWithFileSize) { + proto_.set_file_size(2); + proto_.set_type(AssetProto::DIRECTORY); + asset_.Initialize(kParentIno, &store_, &proto_); + + EXPECT_FALSE(asset_.IsConsistent(&asset_check_)); + EXPECT_STREQ(asset_check_.c_str(), + "File size is defined for a directory asset"); +} + +TEST_F(AssetTest, IsConsistentFailsSymlinkWithDirAssets) { + // Put all children into the direct asset list. + for (size_t n = 0; n < kNumChildProtos; ++n) + *proto_.add_dir_assets() = child_protos_[n]; + proto_.set_type(AssetProto::SYMLINK); + asset_.Initialize(kParentIno, &store_, &proto_); + + EXPECT_FALSE(asset_.IsConsistent(&asset_check_)); + EXPECT_STREQ(asset_check_.c_str(), "Symlink asset contains sub-assets"); +} + +TEST_F(AssetTest, IsConsistentFailsSymlinkWithIndirectFileChunks) { + uint64_t offset = 0; + AddIndirectChunks({{3}, {4, 5, 6}}, &offset, + proto_.mutable_file_indirect_chunks()); + proto_.set_file_size(offset); + proto_.set_type(AssetProto::SYMLINK); + + asset_.Initialize(kParentIno, &store_, &proto_); + EXPECT_FALSE(asset_.IsConsistent(&asset_check_)); + EXPECT_STREQ(asset_check_.c_str(), "Symlink asset contains file chunks"); +} + +TEST_F(AssetTest, IsConsistentFailsSymlinkWithFileSize) { + proto_.set_file_size(2); + proto_.set_type(AssetProto::SYMLINK); + + asset_.Initialize(kParentIno, &store_, &proto_); + + EXPECT_FALSE(asset_.IsConsistent(&asset_check_)); + EXPECT_STREQ(asset_check_.c_str(), + "File size is defined for a symlink asset"); +} + +TEST_F(AssetTest, IsConsistentFailsUndefinedAssetType) { + proto_.set_type(AssetProto::UNKNOWN); + asset_.Initialize(kParentIno, &store_, &proto_); + + EXPECT_FALSE(asset_.IsConsistent(&asset_check_)); + EXPECT_STREQ(asset_check_.c_str(), "Undefined asset type"); +} + +TEST_F(AssetTest, IsConsistentFailsFileChunkWrongOffsets) { + uint64_t offset = 10; + AddChunks({{1}}, &offset, proto_.mutable_file_chunks()); + offset = 5; + AddChunks({{2}}, &offset, proto_.mutable_file_chunks()); + proto_.set_file_size(2); + proto_.set_type(AssetProto::FILE); + asset_.Initialize(kParentIno, &store_, &proto_); + + EXPECT_FALSE(asset_.IsConsistent(&asset_check_)); + EXPECT_STREQ( + asset_check_.c_str(), + "Disordered direct chunks: idx=1, total_offset=10, chunk_offset=5"); +} + +TEST_F(AssetTest, IsConsistentFailsWrongFileSize) { + uint64_t offset = 0; + AddChunks({{1}, {2}}, &offset, proto_.mutable_file_chunks()); + AddIndirectChunks({{3}, {4}, {5}, {6}}, &offset, + proto_.mutable_file_indirect_chunks()); + AddIndirectChunks({{7}, {8}, {9}}, &offset, + proto_.mutable_file_indirect_chunks()); + proto_.set_file_size(5); + proto_.set_type(AssetProto::FILE); + asset_.Initialize(kParentIno, &store_, &proto_); + + std::vector data(9); + absl::StatusOr bytes_read = + asset_.Read(0, data.data(), data.size()); + + ASSERT_OK(bytes_read); + EXPECT_FALSE(asset_.IsConsistent(&asset_check_)); + EXPECT_STREQ(asset_check_.c_str(), + "The last absolute file offset exceeds the file size: 8 >= 5"); +} + +TEST_F(AssetTest, IsConsistentFailsNonZeroFirstIndirectListOffset) { + uint64_t offset = 10; + AddIndirectChunks({{1}}, &offset, proto_.mutable_file_indirect_chunks()); + proto_.set_file_size(1); + proto_.set_type(AssetProto::FILE); + asset_.Initialize(kParentIno, &store_, &proto_); + + EXPECT_FALSE(asset_.IsConsistent(&asset_check_)); + EXPECT_STREQ( + asset_check_.c_str(), + "Disordered indirect chunk list: the list offset should be 0, as there " + "are no direct file chunks: list_offset=10, previous list_offset=0"); +} + +TEST_F(AssetTest, IsConsistentFailsNonIncreasingIndirectListOffset) { + uint64_t offset = 0; + AddIndirectChunks({{1}, {2}, {3}}, &offset, + proto_.mutable_file_indirect_chunks()); + offset = 1; + AddIndirectChunks({{3}}, &offset, proto_.mutable_file_indirect_chunks()); + proto_.set_file_size(3); + proto_.set_type(AssetProto::FILE); + asset_.Initialize(kParentIno, &store_, &proto_); + + // Read the first indirect list to fill the internal structure. + std::vector data(3); + absl::StatusOr bytes_read = + asset_.Read(0, data.data(), data.size()); + + ASSERT_OK(bytes_read); + EXPECT_EQ(*bytes_read, 3); + + EXPECT_FALSE(asset_.IsConsistent(&asset_check_)); + EXPECT_STREQ( + asset_check_.c_str(), + "Disordered indirect chunk list: the list offset should increase: " + "list_offset=1, previous list_offset=0, total_offset=2"); +} + +TEST_F(AssetTest, IsConsistentEmptyFileSucceeds) { + proto_.set_type(AssetProto::FILE); + asset_.Initialize(kParentIno, &store_, &proto_); + proto_.set_file_size(0); + + EXPECT_TRUE(asset_.IsConsistent(&asset_check_)); + EXPECT_TRUE(asset_check_.empty()); +} + +} // namespace +} // namespace cdc_ft diff --git a/cdc_fuse_fs/cdc_fuse_fs.cc b/cdc_fuse_fs/cdc_fuse_fs.cc new file mode 100644 index 0000000..f5cc9ba --- /dev/null +++ b/cdc_fuse_fs/cdc_fuse_fs.cc @@ -0,0 +1,1553 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_fuse_fs/cdc_fuse_fs.h" + +#include +#include + +#include +#include +#include +#include + +#include "cdc_fuse_fs/asset.h" +#include "common/buffer.h" +#include "common/log.h" +#include "common/path.h" +#include "common/platform.h" +#include "common/status.h" +#include "common/status_macros.h" +#include "common/threadpool.h" +#include "common/util.h" +#include "data_store/data_store_reader.h" +#include "json/json.h" +#include "manifest/content_id.h" +#include "manifest/manifest_proto_defs.h" + +#ifndef USE_MOCK_LIBFUSE +#include "include/fuse.h" +#include "include/fuse_lowlevel.h" +#else +// This code is tested using a fake testing FUSE layer. +#include "cdc_fuse_fs/mock_libfuse.h" +#endif + +namespace cdc_ft { +namespace { + +enum class InodeState { + kInitialized, + kUpdatedProto, // inode's proto was changed, but the content is the same. + kUpdated, // inode was updated and its file should be reopened. + kInvalid // the corresponding file was removed. +}; + +const char* InodeStateToString(const InodeState& state) { + switch (state) { + case InodeState::kInitialized: + return "INITIALIZED"; + case InodeState::kUpdatedProto: + return "UPDATED_PROTO"; + case InodeState::kUpdated: + return "UPDATED"; + case InodeState::kInvalid: + return "INVALID"; + default: + return "UNKNOWN"; + } +} + +struct Inode; + +fuse_ino_t GetIno(const Inode& inode); + +struct Inode { + Asset asset; + + // Inode nlookup: how many times the file was accessed. It is reduced by + // forget(). The inode is removed if nlookup = 0 and children_nlookup = 0. + std::atomic_uint64_t nlookup{0}; + + // The number of accessed children (used for directories), whose nlookup > 0. + std::atomic_uint64_t children_nlookup{0}; + + // Shows if this inode is a FUSE root inode. + bool is_root = false; + + // The state during manifest swap. + std::atomic state{InodeState::kInitialized}; + + Inode() = default; + + // Delete copy/move constructor and assignments. We don't need any. + Inode(const Inode&) = delete; + Inode(Inode&& inode) = delete; + Inode& operator=(Inode&& inode) = delete; + Inode& operator=(const Inode&) = delete; + + bool IsInitialized() const { return state == InodeState::kInitialized; } + bool IsUpdated() const { return state == InodeState::kUpdated; } + bool IsUpdatedProto() const { return state == InodeState::kUpdatedProto; } + bool IsValid() const { return state != InodeState::kInvalid; } + + Json::Value ToJson(bool with_proto) const { + Json::Value value; + value["ino"] = GetIno(*this); + value["parent_ino"] = asset.parent_ino(); + value["nlookup"] = nlookup.load(); + value["children_nlookup"] = children_nlookup.load(); + value["state"] = InodeStateToString(state); + value["proto"] = asset.proto(); + if (with_proto) { + if (asset.proto()) { + value["name"] = asset.proto()->name(); + value["type"] = asset.proto()->type(); + } else { + value["message"] = "Proto message is not set"; + } + } + return value; + } +}; + +// Asset proto -> inode map. +using InodeMap = std::unordered_map>; + +// Queued request to open a file that has not been processed yet and should be +// processed once the manifest is updated. +struct OpenRequest { + fuse_req_t req; + fuse_ino_t ino; + struct fuse_file_info* fi; +}; + +// Global context. Fuse is based on loose callbacks, so this holds the fs state. +struct CdcFuseFsContext { +#ifndef USE_MOCK_LIBFUSE + // Fuse state. + fuse_args args = FUSE_ARGS_INIT(0, nullptr); + fuse_chan* channel = nullptr; + char* mountpoint = nullptr; + fuse_session* session = nullptr; + bool signal_handlers_set = false; + int multithreaded = 1; +#endif + bool initialized = false; + + // Interface for loading chunks (assets, data etc.). + DataStoreReader* data_store_reader = nullptr; + + // Mutex to protect manifest update process. + absl::Mutex manifest_mutex ABSL_ACQUIRED_BEFORE(inodes_mutex); + + // Loaded manifest. + std::unique_ptr manifest ABSL_GUARDED_BY(manifest_mutex) = + std::make_unique(); + + // Root inode (points to manifest->root_dir()). + std::shared_ptr root ABSL_GUARDED_BY(manifest_mutex) = + std::make_shared(); + + // Mutex to protect inodes. + absl::Mutex inodes_mutex ABSL_ACQUIRED_AFTER(manifest_mutex); + + // Maps asset protos to Inodes, which contains the proto + metadata. + InodeMap inodes ABSL_GUARDED_BY(inodes_mutex); + + // One buffer per thread to serve read, readdir etc. requests. + static thread_local Buffer buffer; + + // Configuration client to get configuration updates from the workstation. + std::unique_ptr config_stream_client_; + + // Queue for requests to open files that have not been processed yet. + absl::Mutex queued_open_requests_mutex_; + std::vector queued_open_requests_ + ABSL_GUARDED_BY(queued_open_requests_mutex_); + + // Identifies whether FUSE consistency should be inspected after manifest + // update. + bool consistency_check = false; + + // Contains invalid inodes, which should be deleted after they are forgotten. + std::unordered_map> invalid_inodes + ABSL_GUARDED_BY(inodes_mutex); +}; + +thread_local Buffer CdcFuseFsContext::buffer; + +// Global context for the (static!) Fuse callbacks. +CdcFuseFsContext* ctx; + +// Inode IDs (fuse_ino_t) are just the Inode pointer addresses. +// That allows quick lock-free access to inodes. +static_assert(sizeof(Inode*) == sizeof(fuse_ino_t), "Size mismatch!"); + +#ifndef USE_MOCK_LIBFUSE +// Sanity check for correct compiler options. +// Note: There doesn't seem to be a way to make this 64 bit on Windows in a way +// that doesn't cause havoc (but that's for testing only, anyway). +static_assert(sizeof(off_t) == 8, "off_t must be 64 bit"); +static_assert(sizeof(ino_t) == 8, "ino_t must be 64 bit"); +static_assert(sizeof(stat::st_ino) == 8, "st_ino must be 64 bit"); +#endif + +// Converts Inode to fuse_ino_t (cheap typecast). +fuse_ino_t GetIno(const Inode& inode) { + if (inode.is_root) { + return FUSE_ROOT_ID; + } + return reinterpret_cast(&inode); +} + +// Converts fuse_ino_t to Inode (root inode for FUSE_ROOT_ID, otherwise cheap +// typecast). +Inode& GetInode(fuse_ino_t ino) + ABSL_SHARED_LOCKS_REQUIRED(ctx->manifest_mutex) { + if (ino == FUSE_ROOT_ID) { + return *ctx->root; + } + + // |ino| is just the inode pointer. + return *reinterpret_cast(ino); +} + +// Converts asset.permissions() to a file mode by OR'ing the file type flag. +uint32_t GetMode(const AssetProto& asset) { + switch (asset.type()) { + case AssetProto::FILE: + return asset.permissions() | path::MODE_IFREG; + case AssetProto::DIRECTORY: + return asset.permissions() | path::MODE_IFDIR; + default: + return asset.permissions(); + } +} + +// Fills |stbuf| with data from the asset pointed to by |ino|. +void FillStatBuffer(fuse_ino_t ino, struct stat* stbuf) + ABSL_SHARED_LOCKS_REQUIRED(ctx->manifest_mutex) { + assert(stbuf); + const AssetProto& asset = *GetInode(ino).asset.proto(); + stbuf->st_ino = ino; + stbuf->st_mode = GetMode(asset); + // For directories, this is going to be 0 (does that matter?). + stbuf->st_size = asset.file_size(); + // Number of hard links to the file (number of directories with entries for + // this file). Should always be 1 for this read-only filesystem. + stbuf->st_nlink = internal::kCdcFuseDefaultNLink; +#ifndef USE_MOCK_LIBFUSE + stbuf->st_mtim.tv_sec = asset.mtime_seconds(); +#else + stbuf->st_mtime = asset.mtime_seconds(); +#endif + stbuf->st_uid = internal::kCdcFuseCloudcastUid; + stbuf->st_gid = internal::kCdcFuseCloudcastGid; + + LOG_DEBUG("FillStatBuffer, ino=%u, size=%u, mode=%u, time=%u", ino, + stbuf->st_size, stbuf->st_mode, asset.mtime_seconds()); +} + +// Gets or creates an inode for |proto|. +Inode* GetOrCreateInode(Inode& parent, const AssetProto* proto) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(ctx->inodes_mutex) { + std::shared_ptr& inode = ctx->inodes[proto]; + if (inode) { + assert(inode->asset.proto()); + // Found existing inode. + ++inode->nlookup; + } else { + // A new inode was created. + // Note: No other thread can access this node right now. + inode = std::make_shared(); + inode->asset.Initialize(GetIno(parent), ctx->data_store_reader, proto); + inode->nlookup = 1; + ++parent.children_nlookup; + } + return inode.get(); +} + +// Adds an entry with given |name| and stat info from the asset at the given +// |ino|. Usually, |name| matches the asset name, except for the "." and ".." +// directories. Stores the entry in some Fuse-internal format in |buffer|. +void AddDirectoryEntry(fuse_req_t req, Buffer* buffer, const char* name, + fuse_ino_t ino) + ABSL_SHARED_LOCKS_REQUIRED(ctx->manifest_mutex) { + struct stat stbuf; + memset(&stbuf, 0, sizeof(stbuf)); + + // Note: fuse_add_direntry() only uses those two entries. + stbuf.st_ino = ino; + stbuf.st_mode = GetMode(*GetInode(ino).asset.proto()); + + // Call fuse_add_direntry with null args to get the size of the entry. + size_t old_size = buffer->size(); + size_t entry_size = fuse_add_direntry(req, NULL, 0, name, NULL, 0); + + // Append the new entry at the end of the buffer. + buffer->resize(old_size + entry_size); + fuse_add_direntry(req, buffer->data() + old_size, buffer->size() - old_size, + name, &stbuf, static_cast(buffer->size())); +} + +void ForgetChild(fuse_ino_t ino) ABSL_SHARED_LOCKS_REQUIRED(ctx->manifest_mutex) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(ctx->inodes_mutex) { + Inode& inode = GetInode(ino); + assert(inode.children_nlookup > 0); + --inode.children_nlookup; + + // Maintain children_nlookup on the root, but never remove it. + if (ino == FUSE_ROOT_ID) { + return; + } + if (inode.nlookup == 0 && inode.children_nlookup == 0) { + const AssetProto* proto = inode.asset.proto(); + ForgetChild(inode.asset.parent_ino()); + ctx->inodes.erase(proto); + } +} + +void ForgetOne(fuse_ino_t ino, uint64_t nlookup) + ABSL_SHARED_LOCKS_REQUIRED(ctx->manifest_mutex) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(ctx->inodes_mutex) { + // Supports forgetting outdated inodes - do not need to check validity. + Inode& inode = GetInode(ino); + LOG_DEBUG("Current nlookup %u to reduce by %u", inode.nlookup.load(), + nlookup); + inode.nlookup = inode.nlookup > nlookup ? inode.nlookup - nlookup : 0; + // Maintain nlookup on the root, but never remove it. + if (ino == FUSE_ROOT_ID) { + return; + } + if (inode.nlookup == 0 && inode.children_nlookup == 0) { + const AssetProto* proto = inode.asset.proto(); + ForgetChild(inode.asset.parent_ino()); + size_t count = 0; + if (!proto) { + count = ctx->invalid_inodes.erase(ino); + LOG_DEBUG("Erased invalid inode"); + } else { + count = ctx->inodes.erase(proto); + LOG_DEBUG("Erased inode"); + } + assert(count); + (void)count; + } +} + +// Returns inos of previously accessed children inodes for |asset|. +std::vector CollectLoadedChildInos(const Asset& asset) + ABSL_LOCKS_EXCLUDED(ctx->inodes_mutex) { + std::vector protos = asset.GetLoadedChildProtos(); + std::vector children; + absl::ReaderMutexLock inode_lock(&ctx->inodes_mutex); + for (const AssetProto* proto : protos) { + InodeMap::iterator it = ctx->inodes.find(proto); + if (it != ctx->inodes.end()) { + children.push_back(GetIno(*it->second.get())); + } + } + return children; +} + +// Returns true if |inode| with |ino| is valid (it was not changed by any +// manifest update). +bool ValidateInode(fuse_req_t req, Inode& inode, fuse_ino_t ino) { + if (!inode.IsValid()) { + LOG_WARNING("Ino %u was outdated after the manifest update", ino); + fuse_reply_err(req, ENOENT); + return false; + } + return true; +} +} // namespace + +// Implementation of the Fuse lookup() method. +// See include/fuse_lowlevel.h. +void CdcFuseLookup(fuse_req_t req, fuse_ino_t parent_ino, const char* name) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex, ctx->inodes_mutex) { + LOG_DEBUG("CdcFuseLookup, parent_ino=%u, name='%s'", parent_ino, name); + absl::ReaderMutexLock manifest_lock(&ctx->manifest_mutex); + Inode& parent = GetInode(parent_ino); + if (!ValidateInode(req, parent, parent_ino)) { + return; + } + absl::StatusOr proto = parent.asset.Lookup(name); + if (!proto.ok()) { + LOG_ERROR("Lookup of '%s' in ino %u failed: '%s'", name, parent_ino, + proto.status().ToString().c_str()); + fuse_reply_err(req, ENOENT); + return; + } + if (!*proto) { + fuse_reply_err(req, ENOENT); + return; + } + + Inode* inode; + { + absl::MutexLock inode_lock(&ctx->inodes_mutex); + inode = GetOrCreateInode(parent, *proto); + } + if (!ValidateInode(req, *inode, GetIno(*inode))) { + return; + } + + fuse_entry_param e; + memset(&e, 0, sizeof(e)); + e.attr_timeout = internal::kCdcFuseInodeTimeoutSec; + e.entry_timeout = internal::kCdcFuseInodeTimeoutSec; + e.ino = GetIno(*inode); + FillStatBuffer(e.ino, &e.attr); + fuse_reply_entry(req, &e); +} + +// Implementation of the Fuse getattr() method. +// See include/fuse_lowlevel.h. +void CdcFuseGetAttr(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info* /*fi*/) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_DEBUG("CdcFuseGetAttr, ino=%u", ino); + absl::ReaderMutexLock manifest_lock(&ctx->manifest_mutex); + if (!ValidateInode(req, GetInode(ino), ino)) { + return; + } + + struct stat stbuf; + memset(&stbuf, 0, sizeof(stbuf)); + FillStatBuffer(ino, &stbuf); + fuse_reply_attr(req, &stbuf, internal::kCdcFuseInodeTimeoutSec); +} + +void CdcFuseSetAttr(fuse_req_t req, fuse_ino_t ino, struct stat* attr, + int to_set, struct fuse_file_info* fi) { + LOG_DEBUG("CdcFuseSetAttr, ino=%u to_set=%04x mode=%04o", ino, to_set, + attr->st_mode); + // TODO: Verify that the bits are already set or store the new permissions in + // a separate variable. + CdcFuseGetAttr(req, ino, fi); +} + +// Implementation of the FUSE open() method. +// See include/fuse_lowlevel.h. +void CdcFuseOpen(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_DEBUG("CdcFuseOpen, ino=%u, flags=%u", ino, fi->flags); + absl::ReaderMutexLock manifest_lock(&ctx->manifest_mutex); + Inode& inode = GetInode(ino); + if (!ValidateInode(req, inode, ino)) { + return; + } + + const AssetProto* proto = inode.asset.proto(); + if (proto->type() == AssetProto::DIRECTORY) { + fuse_reply_err(req, EISDIR); + return; + } + + // TODO: Handle links. + if (proto->type() != AssetProto::FILE) { + fuse_reply_err(req, EINVAL); + return; + } + + if ((fi->flags & 3) != O_RDONLY) { + fuse_reply_err(req, EACCES); + return; + } + + if (proto->file_size() > 0 && proto->file_chunks_size() == 0 && + proto->file_indirect_chunks_size() == 0) { + // This file has not been processed yet. Queue up the request Block until an + // updated manifest is available. + LOG_DEBUG("Request to open ino %u queued (file not ready)", ino); + absl::MutexLock lock(&ctx->queued_open_requests_mutex_); + ctx->queued_open_requests_.push_back({req, ino, fi}); + return; + } + + if (fi->flags & O_DIRECT) { + fi->keep_cache = 0; + fi->direct_io = 1; + } else { + fi->keep_cache = 1; + fi->direct_io = 0; + } + + // If the manifest was changed, open files "from scratch" to be able to get + // the updated data. + if (inode.IsUpdated()) { + fi->keep_cache = 0; + inode.state = InodeState::kInitialized; + } + fuse_reply_open(req, fi); +} + +// Implementation of the FUSE read() method. +// See include/fuse_lowlevel.h. +void CdcFuseRead(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, + struct fuse_file_info* /*fi*/) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_DEBUG("CdcFuseRead, ino=%u, size=%u, off=%u", ino, size, off); + absl::ReaderMutexLock manifest_lock(&ctx->manifest_mutex); + Inode& inode = GetInode(ino); + if (!ValidateInode(req, inode, ino)) { + return; + } + if (inode.IsUpdated()) { + LOG_ERROR("Manifest has been updated, the file '%s' should be reopened", + inode.asset.proto()->name()); + fuse_reply_err(req, EIO); + return; + } + ctx->buffer.resize(size); + absl::StatusOr bytes_read = + inode.asset.Read(off, ctx->buffer.data(), size); + if (!bytes_read.ok()) { + LOG_ERROR("Reading %u bytes from offset %u of asset '%s' failed: '%s'", + size, off, inode.asset.proto()->name().c_str(), + bytes_read.status().ToString().c_str()); + fuse_reply_err(req, EIO); + return; + } + fuse_reply_buf(req, ctx->buffer.data(), *bytes_read); +} + +// Implementation of the FUSE release() method. +// See include/fuse_lowlevel.h. +void CdcFuseRelease(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_DEBUG("CdcFuseRelease, ino=%u", ino); + absl::ReaderMutexLock manifest_lock(&ctx->manifest_mutex); + Inode& inode = GetInode(ino); + if (!ValidateInode(req, inode, ino)) { + return; + } + + const AssetProto* proto = inode.asset.proto(); + if (proto->type() == AssetProto::DIRECTORY) { + fuse_reply_err(req, EISDIR); + return; + } + + if (proto->type() != AssetProto::FILE) { + fuse_reply_err(req, EINVAL); + return; + } + fuse_reply_err(req, 0); +} + +// Implementation of the FUSE opendir() method. +// See include/fuse_lowlevel.h. +void CdcFuseOpenDir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_DEBUG("CdcFuseOpenDir, ino=%u", ino); + absl::ReaderMutexLock manifest_lock(&ctx->manifest_mutex); + Inode& inode = GetInode(ino); + + if (!ValidateInode(req, inode, ino)) { + return; + } + + const AssetProto* proto = inode.asset.proto(); + if (proto->type() != AssetProto::DIRECTORY) { + fuse_reply_err(req, ENOTDIR); + return; + } + fuse_reply_open(req, fi); +} + +// Implementation of the FUSE readdir() method. +// See include/fuse_lowlevel.h. +void CdcFuseReadDir(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, + fuse_file_info* /*fi*/) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex, ctx->inodes_mutex) { + LOG_DEBUG("CdcFuseReadDir, ino=%u, size=%u, off=%u", ino, size, off); + absl::ReaderMutexLock manifest_lock(&ctx->manifest_mutex); + Inode& inode = GetInode(ino); + if (!ValidateInode(req, inode, ino) || + !ValidateInode(req, GetInode(inode.asset.parent_ino()), + inode.asset.parent_ino())) { + return; + } + + if (inode.asset.proto()->type() != AssetProto::DIRECTORY) { + fuse_reply_err(req, ENOTDIR); + return; + } + + // TODO: This is called at least twice for each ls call. Cache buffer or + // similar. + Buffer buffer; + AddDirectoryEntry(req, &buffer, ".", ino); + AddDirectoryEntry(req, &buffer, "..", inode.asset.parent_ino()); + + { + absl::StatusOr> protos = + inode.asset.GetAllChildProtos(); + if (!protos.ok()) { + LOG_ERROR("ReadDir of ino %u failed: '%s'", ino, + protos.status().ToString().c_str()); + fuse_reply_err(req, EBADF); + return; + } + absl::MutexLock inode_lock(&ctx->inodes_mutex); + for (const AssetProto* child_proto : *protos) { + const Inode& child_inode = *GetOrCreateInode(inode, child_proto); + if (!child_inode.IsValid()) continue; + AddDirectoryEntry(req, &buffer, child_proto->name().c_str(), + GetIno(child_inode)); + } + } + + if (off >= static_cast(buffer.size())) { + // Out of bounds read. + fuse_reply_buf(req, nullptr, 0); + } else { + // Return the part that the caller asks for. + fuse_reply_buf(req, buffer.data() + off, + std::min(buffer.size() - off, size)); + } +} + +void CdcFuseReleaseDir(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info* fi) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_DEBUG("CdcFuseReleaseDir, ino=%u", ino); + absl::ReaderMutexLock manifest_lock(&ctx->manifest_mutex); + Inode& inode = GetInode(ino); + if (!ValidateInode(req, inode, ino)) { + return; + } + + if (inode.asset.proto()->type() != AssetProto::DIRECTORY) { + fuse_reply_err(req, ENOTDIR); + return; + } + fuse_reply_err(req, 0); +} + +// Implementation of the FUSE forget() method. +// See include/fuse_lowlevel.h. +void CdcFuseForget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex, ctx->inodes_mutex) { + LOG_DEBUG("CdcFuseForget, ino=%u, nlookup=%u", ino, nlookup); + assert(ctx && ctx->initialized); + absl::ReaderMutexLock manifest_lock(&ctx->manifest_mutex); + absl::MutexLock ctx_lock(&ctx->inodes_mutex); + ForgetOne(ino, nlookup); + fuse_reply_none(req); +} + +// Implementation of the FUSE forget_multi() method. +// See include/fuse_lowlevel.h. +void CdcFuseForgetMulti(fuse_req_t req, size_t count, + struct fuse_forget_data* forgets) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex, ctx->inodes_mutex) { + LOG_DEBUG("CdcFuseForgetMulti, count=%u", count); + assert(forgets); + absl::ReaderMutexLock manifest_lock(&ctx->manifest_mutex); + absl::MutexLock ctx_lock(&ctx->inodes_mutex); + for (size_t i = 0; i < count; ++i) { + ForgetOne(forgets[i].ino, forgets[i].nlookup); + } + fuse_reply_none(req); +} + +// Implementation of the FUSE access() method. +// See include/fuse_lowlevel.h. +void CdcFuseAccess(fuse_req_t req, fuse_ino_t ino, int mask) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_DEBUG("CdcFuseAccess, ino=%u, mask=%u", ino, mask); + + absl::ReaderMutexLock manifest_lock(&ctx->manifest_mutex); + struct fuse_context* context = fuse_get_context(); + // Root always has access rights. + if (context->uid == internal::kCdcFuseRootUid || + context->gid == internal::kCdcFuseRootGid) { + fuse_reply_err(req, 0); + return; + } + if (!ValidateInode(req, GetInode(ino), ino)) { + return; + } + + struct stat stbuf; + memset(&stbuf, 0, sizeof(stbuf)); + FillStatBuffer(ino, &stbuf); + + int process_permission = stbuf.st_mode & 0x7; // world + if (stbuf.st_gid == static_cast(context->gid)) { + process_permission |= stbuf.st_mode >> 3 & 0x7; // group + } + if (stbuf.st_uid == static_cast(context->uid)) { + process_permission |= stbuf.st_mode >> 6 & 0x7; // user + } + + if ((process_permission & mask) != mask) { + fuse_reply_err(req, EACCES); + return; + } + fuse_reply_err(req, 0); +} + +// Not-implemented functions for read-only FUSE. +void CdcFuseReadLink(fuse_req_t req, fuse_ino_t ino) { + LOG_WARNING("CdcFuseReadLink not implemented, ino=%u", ino); + fuse_reply_err(req, ENOSYS); +} + +void CdcFuseFlush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_WARNING("CdcFuseFlush not implemented, ino=%u", ino); + fuse_reply_err(req, ENOSYS); +} + +void CdcFuseFSync(fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info* fi) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_WARNING("CdcFuseFSync not implemented, ino=%u", ino); + fuse_reply_err(req, ENOSYS); +} + +void CdcFuseFSyncDir(fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info* fi) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_WARNING("CdcFuseFSyncDir not implemented, ino=%u", ino); + fuse_reply_err(req, ENOSYS); +} + +void CdcFuseStatFS(fuse_req_t req, fuse_ino_t ino) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_WARNING("CdcFuseStatFS not implemented, ino=%u", ino); + // Mimic the default behavior of the FUSE library. + struct statvfs buf; + buf.f_bsize = 512; + buf.f_namemax = 255; + fuse_reply_statfs(req, &buf); +} + +void CdcFuseSetXAttr(fuse_req_t req, fuse_ino_t ino, const char* name, + const char* value, size_t size, int flags) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_WARNING("CdcFuseSetXAttr not implemented, ino=%u", ino); + fuse_reply_err(req, ENOSYS); +} + +void CdcFuseGetXAttr(fuse_req_t req, fuse_ino_t ino, const char* name, + size_t size) ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_WARNING("CdcFuseGetXAttr not implemented, ino=%u", ino); + fuse_reply_err(req, ENOSYS); +} + +void CdcFuseListXAttr(fuse_req_t req, fuse_ino_t ino, size_t size) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_WARNING("CdcFuseListXAttr not implemented, ino=%u", ino); + fuse_reply_err(req, ENOSYS); +} + +void CdcFuseGetLk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi, + struct flock* lock) ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_WARNING("CdcFuseGetLk not implemented, ino=%u", ino); + fuse_reply_err(req, ENOSYS); +} + +void CdcFuseSetLk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi, + struct flock* lock, int sleep) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_WARNING("CdcFuseSetLk not implemented, ino=%u", ino); + fuse_reply_err(req, ENOSYS); +} + +void CdcFuseBMap(fuse_req_t req, fuse_ino_t ino, size_t blocksize, uint64_t idx) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_WARNING("CdcFuseBMap not implemented, ino=%u", ino); + fuse_reply_err(req, ENOSYS); +} + +void CdcFuseIoctl(fuse_req_t req, fuse_ino_t ino, int cmd, void* arg, + struct fuse_file_info* fi, unsigned flags, const void* in_buf, + size_t in_bufsz, size_t out_bufsz) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_WARNING("CdcFuseIoctl not implemented, ino=%u", ino); + fuse_reply_err(req, ENOSYS); +} + +void CdcFusePoll(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi, + struct fuse_pollhandle* ph) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_WARNING("CdcFusePoll not implemented, ino=%u", ino); + fuse_reply_err(req, ENOSYS); +} + +void CdcFuseRetrieveReply(fuse_req_t req, void* cookie, fuse_ino_t ino, + off_t offset, struct fuse_bufvec* bufv) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_WARNING("CdcFuseRetrieveReply not implemented, ino=%u", ino); + fuse_reply_err(req, ENOSYS); +} + +void CdcFuseFLock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi, + int op) ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_WARNING("CdcFuseFLock not implemented, ino=%u", ino); + fuse_reply_err(req, ENOSYS); +} + +void CdcFuseFAllocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, + off_t length, struct fuse_file_info* fi) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex) { + LOG_WARNING("CdcFuseFAllocate not implemented, ino=%u", ino); + fuse_reply_err(req, ENOSYS); +} + +size_t CdcFuseGetInodeCountForTesting() ABSL_LOCKS_EXCLUDED(ctx->inodes_mutex) { + assert(ctx); + absl::MutexLock lock(&ctx->inodes_mutex); + return ctx->inodes.size(); +} + +size_t CdcFuseGetInvalidInodeCountForTesting() + ABSL_LOCKS_EXCLUDED(ctx->inodes_mutex) { + assert(ctx); + absl::MutexLock lock(&ctx->inodes_mutex); + return ctx->invalid_inodes.size(); +} + +namespace cdc_fuse_fs { + +absl::Status Initialize(int argc, char** argv) { + assert(!ctx); + ctx = new CdcFuseFsContext(); + +#ifndef USE_MOCK_LIBFUSE + // Parse command line args. + ctx->args = FUSE_ARGS_INIT(argc, argv); + if (fuse_parse_cmdline(&ctx->args, &ctx->mountpoint, &ctx->multithreaded, + /*foreground=*/nullptr) == -1) { + Shutdown(); + return MakeStatus("fuse_parse_cmdline() failed"); + } + + // Initialize channel. + ctx->channel = fuse_mount(ctx->mountpoint, &ctx->args); + if (!ctx->channel) { + Shutdown(); + return MakeStatus("fuse_mount() failed"); + } + + // Initialize session. + fuse_lowlevel_ops fs_operations = {.lookup = CdcFuseLookup, + .forget = CdcFuseForget, + .getattr = CdcFuseGetAttr, + .setattr = CdcFuseSetAttr, + .readlink = CdcFuseReadLink, + // .mknod // Read-only file system + // .mkdir // Read-only file system + // .unlink // Read-only file system + // .rmdir // Read-only file system + // .symlink // Read-only file system + // .rename // Read-only file system + // .link // Read-only file system + .open = CdcFuseOpen, + .read = CdcFuseRead, + // .write // Read-only file system + .flush = CdcFuseFlush, + .release = CdcFuseRelease, + .fsync = CdcFuseFSync, + .opendir = CdcFuseOpenDir, + .readdir = CdcFuseReadDir, + .releasedir = CdcFuseReleaseDir, + .fsyncdir = CdcFuseFSyncDir, + .statfs = CdcFuseStatFS, + .setxattr = CdcFuseSetXAttr, + .getxattr = CdcFuseGetXAttr, + .listxattr = CdcFuseListXAttr, + // .removexattr // Read-only file system + .access = CdcFuseAccess, + // .create // Read-only file system + .getlk = CdcFuseGetLk, + .setlk = CdcFuseSetLk, + .bmap = CdcFuseBMap, + .ioctl = CdcFuseIoctl, + .poll = CdcFusePoll, + // .write_buf // Read-only file system + .retrieve_reply = CdcFuseRetrieveReply, + .forget_multi = CdcFuseForgetMulti, + .flock = CdcFuseFLock, + .fallocate = CdcFuseFAllocate}; + ctx->session = fuse_lowlevel_new(&ctx->args, &fs_operations, + sizeof(fs_operations), nullptr); + if (!ctx->session) { + Shutdown(); + return MakeStatus("fuse_lowlevel_new() failed"); + } + + // Set signal handlers. + if (fuse_set_signal_handlers(ctx->session) == -1) { + Shutdown(); + return MakeStatus("fuse_set_signal_handlers() failed"); + } + ctx->signal_handlers_set = true; + + fuse_session_add_chan(ctx->session, ctx->channel); + +#else + // This code is not unit tested. +#endif + + ctx->initialized = true; + return absl::OkStatus(); +} + +void Shutdown() { + assert(ctx); + +#ifndef USE_MOCK_LIBFUSE + // Exact opposite of Create(). + if (ctx->signal_handlers_set) { + ctx->signal_handlers_set = false; + fuse_session_remove_chan(ctx->channel); + fuse_remove_signal_handlers(ctx->session); + } + + if (ctx->session) { + fuse_session_destroy(ctx->session); + ctx->session = nullptr; + } + + if (ctx->channel) { + fuse_unmount(ctx->mountpoint, ctx->channel); + ctx->channel = nullptr; + } + + if (ctx->mountpoint) { + free(ctx->mountpoint); + ctx->mountpoint = nullptr; + } + + fuse_opt_free_args(&ctx->args); +#else + // This code is not unit tested. +#endif + + ctx->initialized = false; + delete ctx; + ctx = nullptr; +} + +// Adds a warning message to |warnings| if |inode| does not point to +// |context_proto|. +void CheckProtoMismatch(const std::shared_ptr& inode, + const AssetProto* context_proto, + Json::Value& warnings) { + if (context_proto != inode->asset.proto()) { + LOG_WARNING("Proto mismatch %u", GetIno(*inode.get())); + Json::Value value; + value["ino"] = GetIno(*inode.get()); + value["state"] = InodeStateToString(inode->state); + value["context_proto"] = context_proto; + value["actual_proto"] = inode->asset.proto(); + warnings.append(value); + } +} + +// Adds a warning message to |warnings| if the proto of |inode| is not nullptr. +// This check is relevant for invalidated inodes (corresponding files and +// directories were removed from the manifest). +void CheckProtoNotNull(const std::shared_ptr& inode, + Json::Value& warnings) { + if (inode->asset.proto()) { + LOG_WARNING("Proto for invalidated inode is not NULL %u", + GetIno(*inode.get())); + Json::Value value; + value["ino"] = GetIno(*inode.get()); + warnings.append(value); + } +} + +Json::Value CreateWarningMessage(const Inode* inode, std::string&& message) { + Json::Value warning; + warning["ino"] = GetIno(*inode); + warning["name"] = inode->asset.proto()->name(); + warning["message"] = message; + return warning; +} + +// Adds a set of warning messages to |warnings| if inodes have wrong +// properties, for example: a non-directory asset has directory assets. +void CheckConsistencyIndividualInodes(const std::vector& inodes, + Json::Value& warnings) { + LOG_DEBUG("Checking consistency of individual inodes"); + Json::Value inodes_wrong_properties; + for (const Inode* inode : inodes) { + std::string asset_check; + if (!inode->asset.IsConsistent(&asset_check)) { + inodes_wrong_properties.append( + CreateWarningMessage(inode, std::move(asset_check))); + } + + // Inode should be referenced. + if (inode->nlookup + inode->children_nlookup == 0) { + inodes_wrong_properties.append( + CreateWarningMessage(inode, "Inode is not referenced")); + } + + if (!inodes_wrong_properties.empty()) { + warnings["inodes_wrong_properties"] = inodes_wrong_properties; + } + } +} + +// Adds a set of warning messages to |warnings| if inodes have invalid parents +// and thus cannot be reached from the updated manifest. It checks the +// consistency of tree directory structure. +void CheckConsistencyInodesHierarchy(const std::vector& inodes, + Json::Value& warnings) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(ctx->manifest_mutex) { + LOG_DEBUG("Checking consistency of inodes hierarchy"); + std::deque inodes_queue; + inodes_queue.insert(inodes_queue.end(), inodes.begin(), inodes.end()); + Json::Value inodes_wrong_parent; + std::unordered_set visited; + while (!inodes_queue.empty()) { + const Inode* inode = inodes_queue.front(); + inodes_queue.pop_front(); + if (visited.find(inode) != visited.end()) { + continue; + } + visited.insert(inode); + Inode& parent = GetInode(inode->asset.parent_ino()); + // Only valid inodes can be on the list. + if (!parent.IsValid()) { + Json::Value message; + message["ino"] = GetIno(*inode); + message["parent"] = inode->asset.parent_ino(); + message["name"] = inode->asset.proto()->name(); + message["message"] = "Invalid parent"; + inodes_wrong_parent.append(message); + continue; + } + // Add the parent to the deque, as |inodes| includes only kUpdatedProto and + // kUpdated. + if (visited.find(&parent) == visited.end()) { + inodes_queue.push_back(&parent); + } + } + if (!visited.empty() && visited.find(ctx->root.get()) == visited.end()) { + Json::Value message; + message["message"] = + "Inode hierarchy is not consistent: the root node was not reached"; + inodes_wrong_parent.append(message); + } + if (!inodes_wrong_parent.empty()) { + warnings["inodes_wrong_parent"] = inodes_wrong_parent; + } +} + +// Checks if the proto messages are reachable from ctx->manifest. +// Returns the set of inodes with unreachable protos. +std::set CheckProtoReachability(Json::Value& warnings) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(ctx->manifest_mutex) + ABSL_LOCKS_EXCLUDED(ctx->inodes_mutex) { + LOG_DEBUG("Checking proto reachability"); + Json::Value reachability_warning; + std::set unreachable_inodes; + if (&ctx->manifest->root_dir() != ctx->root->asset.proto()) { + Json::Value message; + message["message"] = "Root inode does not point to the manifest proto"; + reachability_warning.append(message); + unreachable_inodes.emplace(ctx->root.get()); + } + + absl::MutexLock lock(&ctx->inodes_mutex); + std::vector root_protos = + ctx->root->asset.GetLoadedChildProtos(); + std::unordered_set manifest_protos(root_protos.begin(), + root_protos.end()); + + // Start with the root node and its children, add children protos on the + // way. + std::deque collected_protos; + collected_protos.insert(collected_protos.end(), manifest_protos.begin(), + manifest_protos.end()); + // Collect all protos reachable from the manifest. + while (!collected_protos.empty()) { + const AssetProto* proto = collected_protos.front(); + collected_protos.pop_front(); + InodeMap::iterator it = ctx->inodes.find(proto); + // Collect child protos of all directories. + if (it == ctx->inodes.end() || + it->second->asset.proto()->type() != AssetProto::DIRECTORY) { + continue; + } + std::vector subprotos = + it->second->asset.GetLoadedChildProtos(); + collected_protos.insert(collected_protos.end(), subprotos.begin(), + subprotos.end()); + manifest_protos.insert(subprotos.begin(), subprotos.end()); + } + + for (const auto& [proto, inode] : ctx->inodes) { + if (manifest_protos.find(proto) == manifest_protos.end()) { + Json::Value message; + message["message"] = absl::StrFormat( + "Proto for inode %i is not reachable from the manifest", + reinterpret_cast(&(*inode))); + reachability_warning.append(message); + unreachable_inodes.emplace(inode.get()); + } + } + if (!reachability_warning.empty()) { + warnings["proto_reachability"] = reachability_warning; + } + return unreachable_inodes; +} + +// Checks if the FUSE state is consistent after the manifest update. In case of +// any inconsistencies it prints out a pretty JSON string. |inodes_size| +// describes the number of inodes before the manifest was set. +void CheckFUSEConsistency(size_t inodes_size) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(ctx->manifest_mutex) + ABSL_LOCKS_EXCLUDED(ctx->inodes_mutex) { + LOG_DEBUG("Starting FUSE consistency check"); + + std::vector inodes_to_check; + Json::Value warnings; + // Step I. Root consistency. + LOG_DEBUG("Checking the root"); + if (!ctx->root || ctx->root->asset.parent_ino() != FUSE_ROOT_ID || + !ctx->root->IsValid() || ctx->root->IsInitialized()) { + Json::Value warning_root = ctx->root->ToJson(true); + warning_root["message"] = "The root inode is inconsistent"; + warnings.append(warning_root); + } + + // Step II. The total amount of inodes should not change. + Json::Value initialized_json; + Json::Value wrong_protos_json; + std::vector invalid_inodes; + size_t initialized_total = 0; + size_t updated_proto_total = 0; + size_t updated_total = 0; + { + LOG_DEBUG("Checking the number of inodes"); + absl::ReaderMutexLock lock(&ctx->inodes_mutex); + if (inodes_size != ctx->inodes.size() + ctx->invalid_inodes.size()) { + Json::Value warning_size; + warning_size["message"] = + absl::StrFormat("Inodes' size mismatch: expected: %u, actual: %u", + inodes_size, ctx->inodes.size()); + warnings.append(warning_size); + } + + // Step III. Consistency of ctx->inodes: inodes should point to the + // correct asset proto and asset protos should point to the right inodes. + LOG_DEBUG("Checking inode state"); + for (const auto& [context_proto, inode] : ctx->inodes) { + switch (inode->state) { + case InodeState::kInitialized: + // There must be no kInitialized inodes, all should be kUpdatedProto, + // kUpdated, or kInvalid after manifest update. + initialized_json.append(inode->ToJson(true)); + ++initialized_total; + break; + case InodeState::kUpdatedProto: + CheckProtoMismatch(inode, context_proto, wrong_protos_json); + inodes_to_check.push_back(inode.get()); + ++updated_proto_total; + break; + case InodeState::kUpdated: + CheckProtoMismatch(inode, context_proto, wrong_protos_json); + inodes_to_check.push_back(inode.get()); + ++updated_total; + break; + case InodeState::kInvalid: + CheckProtoNotNull(inode, wrong_protos_json); + invalid_inodes.push_back(inode.get()); + break; + } + } + } + LOG_DEBUG("Initialized=%u, updated_proto=%u, updated=%u, invalid=%u", + initialized_total, updated_proto_total, updated_total, + invalid_inodes.size()); + + if (!initialized_json.empty()) { + warnings["initialized_inodes"] = initialized_json; + } + + if (!wrong_protos_json.empty()) { + warnings["wrong_protos_inodes"] = wrong_protos_json; + } + + // IV. Tree consistency. + CheckConsistencyInodesHierarchy(inodes_to_check, warnings); + + // V. Check reachability of all AssetProtos. + std::set unreachable_inodes = CheckProtoReachability(warnings); + inodes_to_check.push_back(ctx->root.get()); + if (!unreachable_inodes.empty()) { + LOG_WARNING("Skipping %i inodes from the consistency check", + unreachable_inodes.size()); + inodes_to_check.erase( + std::remove_if(inodes_to_check.begin(), inodes_to_check.end(), + [&unreachable_inodes](const Inode* inode) { + return unreachable_inodes.find(inode) != + unreachable_inodes.end(); + }), + inodes_to_check.end()); + } + + // VI. Consistency of individual reachable inodes. + CheckConsistencyIndividualInodes(inodes_to_check, warnings); + + Json::Value output; + if (!warnings.empty()) { + Json::Value updated_proto_json; + Json::Value updated_json; + for (const Inode* inode : inodes_to_check) { + if (inode->IsUpdated()) { + updated_json.append(inode->ToJson(true)); + } else { + assert(inode->IsUpdatedProto()); + updated_proto_json.append(inode->ToJson(true)); + } + } + Json::Value invalid_json; + for (const Inode* inode : invalid_inodes) { + invalid_json.append(inode->ToJson(false)); + } + output["updated_proto_inodes"] = updated_proto_json; + output["updated_inodes"] = updated_json; + output["invalid_inodes"] = invalid_json; + output["warnings"] = warnings; + } + + if (output.empty()) { + LOG_INFO("FUSE consistency check succeeded"); + } else { + LOG_WARNING("FUSE consistency check: %s", output.toStyledString()); + } +} + +// Recursive procedure to invalidate the inode subtree for |ino| including +// the root |ino| of the subtree. The elements cannot be directly removed as +// they might be still referenced. +void InvalidateTree(fuse_ino_t ino) + ABSL_SHARED_LOCKS_REQUIRED(ctx->manifest_mutex) + ABSL_LOCKS_EXCLUDED(ctx->inodes_mutex) { + std::deque inos; + inos.push_back(ino); + while (!inos.empty()) { + fuse_ino_t tmp_ino = inos.front(); + Inode& inode = GetInode(tmp_ino); + if (!inode.IsValid()) { + LOG_WARNING( + "ino should be valid before invalidation. ino %u is already invalid", + ino); + return; + } + inode.state = InodeState::kInvalid; + if (inode.asset.proto()->type() == AssetProto::DIRECTORY) { + std::vector child_inos = CollectLoadedChildInos(inode.asset); + inos.insert(inos.end(), child_inos.begin(), child_inos.end()); + } + { + absl::MutexLock inode_lock(&ctx->inodes_mutex); + const AssetProto* outdated_proto = inode.asset.proto(); + ctx->invalid_inodes[tmp_ino] = ctx->inodes[outdated_proto]; + size_t count = ctx->inodes.erase(outdated_proto); + assert(count); + (void)count; + } + inode.asset.UpdateProto(nullptr); + inos.pop_front(); + } +} + +struct UpdateInode { + std::shared_ptr new_parent; + fuse_ino_t old_ino; +}; + +// ThreadPool task that runs the update of inodes. +class UpdateInodeTask : public Task { + public: + UpdateInodeTask(UpdateInode* inode, std::vector* result) + : update_inode_(inode), child_inodes_to_update_(result) {} + + // Task: + void ThreadRun(IsCancelledPredicate is_cancelled) override + ABSL_SHARED_LOCKS_REQUIRED(ctx->manifest_mutex) + ABSL_LOCKS_EXCLUDED(ctx->inodes_mutex) { + LOG_DEBUG("Updating inode %u", update_inode_->old_ino); + assert((ctx->manifest_mutex.AssertHeld(), true)); + + const std::shared_ptr& new_parent = update_inode_->new_parent; + Inode& old_inode = GetInode(update_inode_->old_ino); + assert(old_inode.IsValid()); + + const std::string& name = old_inode.asset.proto()->name(); + absl::StatusOr new_proto = + new_parent->asset.Lookup(name.c_str()); + + // The asset does not exist anymore. It has to be removed from the parent's + // set of children. If the node has its own children, they should be + // invalidated as well. The final removal from the inode map can only be + // done via forget() and forget_multi() calls. + if (!new_proto.ok() || !*new_proto) { + InvalidateTree(update_inode_->old_ino); + return; + } + // Asset still exists in a new proto. Its inode id should be preserved. If a + // new proto exists for the same name, but the asset has changed, an update + // is necessary, the inode id remains stable. + if (*(*new_proto) != *(old_inode.asset.proto())) { + LOG_DEBUG("Inode %u is marked for update", update_inode_->old_ino); + old_inode.state = InodeState::kUpdated; + } else { + old_inode.state = InodeState::kUpdatedProto; + } + const AssetProto* old_proto = old_inode.asset.proto(); + std::shared_ptr new_inode; + { + absl::MutexLock inode_lock(&ctx->inodes_mutex); + new_inode = ctx->inodes[*new_proto] = ctx->inodes[old_proto]; + } + // As there is an updated valid entry for the same inode in the map, + // the old one can be removed. + proto_to_remove_ = old_proto; + + std::vector child_inos = + CollectLoadedChildInos(old_inode.asset); + for (fuse_ino_t child_ino : child_inos) { + UpdateInode child_to_update; + child_to_update.new_parent = new_inode; + child_to_update.old_ino = child_ino; + child_inodes_to_update_->emplace_back(std::move(child_to_update)); + } + old_inode.asset.UpdateProto(*new_proto); + } + + const AssetProto* ProtoToRemove() const { return proto_to_remove_; } + + private: + const UpdateInode* const update_inode_; + std::vector* child_inodes_to_update_; + const AssetProto* proto_to_remove_ = nullptr; +}; + +// Recursive procedure to update the inodes contents on a level after a request +// to update the manifest id was received. +void ParallelUpdateProtosOnLevel( + Threadpool& pool, std::vector& input_inodes, + std::vector>& result, + std::vector& outdated_protos) { + LOG_DEBUG("Update asset protos in parallel on the same level"); + assert(input_inodes.size() == result.size()); + + for (unsigned int idx = 0; idx < input_inodes.size(); ++idx) { + pool.QueueTask( + std::make_unique(&input_inodes[idx], &result[idx])); + } + for (unsigned int idx = 0; idx < input_inodes.size(); ++idx) { + std::unique_ptr task = pool.GetCompletedTask(); + UpdateInodeTask* update_task = static_cast(task.get()); + if (update_task->ProtoToRemove()) { + outdated_protos.push_back(update_task->ProtoToRemove()); + } + } +} + +std::shared_ptr UpdateProtosFromRoot(const AssetProto* new_root_proto) + ABSL_LOCKS_EXCLUDED(ctx->inodes_mutex) { + LOG_DEBUG("Updating inode hierarchy starting from the root"); + assert((ctx->manifest_mutex.AssertHeld(), true)); + + // Create the new root. Make sure to preserve the lookup counts! + std::shared_ptr new_root = std::make_shared(); + new_root->asset.Initialize(FUSE_ROOT_ID, ctx->data_store_reader, + new_root_proto); + new_root->nlookup = ctx->root->nlookup.load(); + new_root->children_nlookup = ctx->root->children_nlookup.load(); + new_root->state = ctx->root->state.load(); + new_root->is_root = true; + + std::shared_ptr old_root = ctx->root; + std::vector children = CollectLoadedChildInos(old_root->asset); + std::vector inos_to_update; + inos_to_update.reserve(children.size()); + for (fuse_ino_t child : children) { + UpdateInode to_update; + to_update.new_parent = new_root; + to_update.old_ino = child; + inos_to_update.emplace_back(std::move(to_update)); + } + + // Outdated AssetProto(s) can be removed at the end, as they have a duplicated + // updated entry in inodes. Only updated (not removed) inodes are included. + std::vector outdated_protos; + Threadpool pool(std::thread::hardware_concurrency()); + while (!inos_to_update.empty()) { + std::vector> level_result( + inos_to_update.size(), std::vector()); + ParallelUpdateProtosOnLevel(pool, inos_to_update, level_result, + outdated_protos); + inos_to_update.clear(); + for (unsigned int idx = 0; idx < level_result.size(); ++idx) { + for (unsigned int jdx = 0; jdx < level_result[idx].size(); ++jdx) { + inos_to_update.push_back(level_result[idx][jdx]); + } + } + } + + // Inodes should not be removed, just the map entries with old protos. + absl::MutexLock inode_lock(&ctx->inodes_mutex); + for (size_t idx = outdated_protos.size(); idx > 0; --idx) { + assert(outdated_protos[idx - 1]); + size_t count = ctx->inodes.erase(outdated_protos[idx - 1]); + assert(count); + (void)count; + } + + return new_root; +} + +absl::Status SetManifest(const ContentIdProto& manifest_id) + ABSL_LOCKS_EXCLUDED(ctx->manifest_mutex, ctx->inodes_mutex) { + LOG_DEBUG("Setting manifest '%s' in FUSE", + ContentId::ToHexString(manifest_id)); + assert(ctx && ctx->initialized && ctx->data_store_reader); + + { + absl::WriterMutexLock manifest_lock(&ctx->manifest_mutex); + size_t old_inodes_size; + { + absl::MutexLock inodes_lock(&ctx->inodes_mutex); + old_inodes_size = ctx->inodes.size() + ctx->invalid_inodes.size(); + } + std::unique_ptr new_manifest = + std::make_unique(); + absl::Status status = + ctx->data_store_reader->GetProto(manifest_id, new_manifest.get()); + if (!status.ok()) { + LOG_ERROR("Failed to get manifest '%s'", + ContentId::ToHexString(manifest_id)); + return WrapStatus(status, "Failed to get manifest '%s'", + ContentId::ToHexString(manifest_id)); + } + ctx->root = UpdateProtosFromRoot(&new_manifest->root_dir()); + if (ctx->manifest->root_dir() != new_manifest->root_dir()) { + ctx->root->state = InodeState::kUpdated; + } else { + ctx->root->state = InodeState::kUpdatedProto; + } + ctx->manifest.swap(new_manifest); + if (ctx->consistency_check) { + CheckFUSEConsistency(old_inodes_size); + } + + absl::MutexLock inodes_lock(&ctx->inodes_mutex); + for (const auto& [proto, inode] : ctx->inodes) { + // Reset kUpdatedProto to kInitialized. The state was only used for + // validation. kUpdated is still needed for clearing kernel caches when a + // file is opened. + assert(inode->IsValid()); + if (inode->IsUpdatedProto() || + inode->asset.proto()->type() == AssetProto::DIRECTORY) { + inode->state = InodeState::kInitialized; + } + } + ctx->root->state = InodeState::kInitialized; + } + + // Process outstanding open requests. Be sure to move the vector because + // CdcFuseOpen() might requeue requests. + std::vector requests; + { + absl::MutexLock lock(&ctx->queued_open_requests_mutex_); + requests.swap(ctx->queued_open_requests_); + } + for (const OpenRequest request : requests) { + LOG_DEBUG("Resuming request to open ino %u", request.ino); + CdcFuseOpen(request.req, request.ino, request.fi); + } + +#ifndef USE_MOCK_LIBFUSE + // Acknowledge that the manifest id was received and FUSE was updated. + absl::Status status = + ctx->config_stream_client_->SendManifestAck(manifest_id); + if (!status.ok()) { + LOG_ERROR("Failed to send ack for manifest '%s'", + ContentId::ToHexString(manifest_id)); + return WrapStatus(status, "Failed to send ack for manifest '%s'", + ContentId::ToHexString(manifest_id)); + } +#endif + + return absl::OkStatus(); +} + +absl::Status StartConfigClient(std::string instance, + std::shared_ptr channel) { + LOG_DEBUG("Starting configuration client"); + assert(ctx && ctx->initialized); + if (ctx->config_stream_client_) { + ctx->config_stream_client_.reset(); + } + ctx->config_stream_client_ = std::make_unique( + std::move(instance), std::move(channel)); + return absl::OkStatus(); +} + +// Initializes FUSE with a manifest for an empty directory: +// The user will be able to check the empty folder before the first update +// of the manifest id is received. +void InitializeRootManifest() { + absl::MutexLock lock(&ctx->manifest_mutex); + assert(ctx && ctx->root); + ctx->manifest->mutable_root_dir()->set_type(AssetProto::DIRECTORY); + ctx->root->asset.Initialize(FUSE_ROOT_ID, ctx->data_store_reader, + &ctx->manifest->root_dir()); + ctx->root->is_root = true; + ctx->root->nlookup = 1; +} + +absl::Status Run(DataStoreReader* data_store_reader, bool consistency_check) { + assert(ctx && ctx->initialized && data_store_reader); + ctx->consistency_check = consistency_check; + ctx->data_store_reader = data_store_reader; + InitializeRootManifest(); +#ifndef USE_MOCK_LIBFUSE + RETURN_IF_ERROR(ctx->config_stream_client_->StartListeningToManifestUpdates( + [](const ContentIdProto& id) { return SetManifest(id); }), + "Failed to listen to manifest updates"); + + LOG_INFO("Starting session loop (mt = '%s')", + ctx->multithreaded ? "true" : "false"); + int res = ctx->multithreaded ? fuse_session_loop_mt(ctx->session) + : fuse_session_loop(ctx->session); + if (res == -1) return MakeStatus("Session loop failed"); + LOG_INFO("Session loop finished."); + + ctx->config_stream_client_->Shutdown(); +#else + // This code is not unit tested. +#endif + return absl::OkStatus(); +} + +} // namespace cdc_fuse_fs +} // namespace cdc_ft diff --git a/cdc_fuse_fs/cdc_fuse_fs.h b/cdc_fuse_fs/cdc_fuse_fs.h new file mode 100644 index 0000000..97ee2be --- /dev/null +++ b/cdc_fuse_fs/cdc_fuse_fs.h @@ -0,0 +1,86 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_FUSE_FS_CDC_FUSE_FS_H_ +#define CDC_FUSE_FS_CDC_FUSE_FS_H_ + +#ifndef R_OK +#define R_OK 4 +#endif +#ifndef W_OK +#define W_OK 2 +#endif +#ifndef X_OK +#define X_OK 1 +#endif + +#include + +#include "absl/status/status.h" +#include "cdc_fuse_fs/config_stream_client.h" +#include "grpcpp/channel.h" +#include "manifest/manifest_proto_defs.h" + +namespace cdc_ft { + +class DataStoreReader; + +// CdcFuse filesystem constants, exposed for testing. +namespace internal { +// Number of hardlinks is not important since the fs is read-only (I think). +constexpr int kCdcFuseDefaultNLink = 1; + +// Cloudcast user and group id. +constexpr int kCdcFuseCloudcastUid = 1000; +constexpr int kCdcFuseCloudcastGid = 1000; + +// Root user and group id. +constexpr int kCdcFuseRootUid = 0; +constexpr int kCdcFuseRootGid = 0; + +// Default timeout after which the kernel will assume inodes are stale. +constexpr double kCdcFuseInodeTimeoutSec = 1.0; +} // namespace internal + +namespace cdc_fuse_fs { + +// Initializes the CDC FUSE filesystem. Parses the command line, sets up a +// channel and a session, and optionally forks the process. For valid arguments +// see fuse_common.h. +absl::Status Initialize(int argc, char** argv); + +// Starts a client to read configuration updates over gRPC |channel|. +// |instance| is the gamelet instance id. +absl::Status StartConfigClient(std::string instance, + std::shared_ptr channel); + +// Sets the |data_store_reader| to load data from, initializes FUSE with a +// manifest for an empty directory, and starts the filesystem. The call does +// not return until the filesystem finishes running. +// |consistency_check| defines whether FUSE consistency should be inspected +// after each manifest update. +absl::Status Run(DataStoreReader* data_store_reader, bool consistency_check); + +// Releases resources. Should be called when the filesystem finished running. +void Shutdown(); + +// Sets |manifest_id| as a CDC FUSE root. +absl::Status SetManifest(const ContentIdProto& manifest_id); + +} // namespace cdc_fuse_fs +} // namespace cdc_ft + +#endif // CDC_FUSE_FS_CDC_FUSE_FS_H_ diff --git a/cdc_fuse_fs/cdc_fuse_fs.vcxproj b/cdc_fuse_fs/cdc_fuse_fs.vcxproj new file mode 100644 index 0000000..b60e370 --- /dev/null +++ b/cdc_fuse_fs/cdc_fuse_fs.vcxproj @@ -0,0 +1,61 @@ + + + + + Debug + GGP + + + Release + GGP + + + + {a537310c-0571-43d5-b7fe-c867f702294f} + cdc_fuse_fs + + + + Makefile + true + + + Makefile + false + + + $(SolutionDir)bazel-out\k8-dbg\bin\cdc_fuse_fs\ + $(NMakePreprocessorDefinitions) + /std:c++17 + + + $(SolutionDir)bazel-out\k8-opt\bin\cdc_fuse_fs\ + $(NMakePreprocessorDefinitions) + /std:c++17 + + + + + + + + + + + + + + + + + + //cdc_fuse_fs + cdc_fuse_fs + ..\;..\third_party\absl;..\third_party\blake3\c;..\third_party\googletest\googletest\include;..\third_party\protobuf\src;..\third_party\grpc\include;$(AdditionalIncludeDirectories) + ..\/ + + + + + + \ No newline at end of file diff --git a/cdc_fuse_fs/cdc_fuse_fs.vcxproj.filters b/cdc_fuse_fs/cdc_fuse_fs.vcxproj.filters new file mode 100644 index 0000000..9cd8510 --- /dev/null +++ b/cdc_fuse_fs/cdc_fuse_fs.vcxproj.filters @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/cdc_fuse_fs/cdc_fuse_fs_test.cc b/cdc_fuse_fs/cdc_fuse_fs_test.cc new file mode 100644 index 0000000..73b1df0 --- /dev/null +++ b/cdc_fuse_fs/cdc_fuse_fs_test.cc @@ -0,0 +1,1146 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_fuse_fs/cdc_fuse_fs.h" + +#include +#include + +#include "cdc_fuse_fs/mock_libfuse.h" +#include "common/log.h" +#include "common/path.h" +#include "common/status_test_macros.h" +#include "data_store/mem_data_store.h" +#include "gtest/gtest.h" +#include "manifest/fake_manifest_builder.h" + +namespace cdc_ft { + +// FUSE callback methods. Declared here since they depend on Fuse types that +// should not be exposed in cdc_fuse_fs.h. +void CdcFuseForget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); +void CdcFuseForgetMulti(fuse_req_t req, size_t count, + struct fuse_forget_data* forgets); +void CdcFuseGetAttr(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi); +void CdcFuseLookup(fuse_req_t req, fuse_ino_t parent_ino, const char* name); +void CdcFuseOpen(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi); +void CdcFuseOpenDir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi); +void CdcFuseRead(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, + struct fuse_file_info* fi); +void CdcFuseReadDir(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, + fuse_file_info* fi); +void CdcFuseRelease(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi); +void CdcFuseReleaseDir(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info* fi); +size_t CdcFuseGetInodeCountForTesting(); +size_t CdcFuseGetInvalidInodeCountForTesting(); +void CdcFuseAccess(fuse_req_t req, fuse_ino_t ino, int mask); + +namespace { + +class FuseLog : public ConsoleLog { + public: + explicit FuseLog(LogLevel log_level) : ConsoleLog(log_level) {} + + const std::string& LastMessage() const { return last_message_; } + + protected: + void WriteLogMessage(LogLevel level, const char* file, int line, + const char* func, const char* message) override { + ConsoleLog::WriteLogMessage(level, file, line, func, message); + last_message_ = message; + } + + private: + std::string last_message_; +}; + +class CdcFuseFsTest : public ::testing::Test { + protected: + static constexpr char kFile1Name[] = "file1.txt"; + static constexpr uint32_t kFile1Perm = path::MODE_IRUSR; + static constexpr int64_t kFile1Mtime = 1ull << 40; + const std::vector kFile1Data = {'f', 'i', 'l', 'e', '1'}; + + static constexpr char kFile2Name[] = "file2.txt"; + static constexpr uint32_t kFile2Perm = path::MODE_IRWXU; + static constexpr int64_t kFile2Mtime = -kFile1Mtime; + const std::vector kFile2Data = {'H', 'e', 'l', 'l', 'o', ' ', + 'W', 'o', 'r', 'l', 'd', '!'}; + + static constexpr char kSubdirName[] = "subdir"; + static constexpr uint32_t kSubdirPerm = path::MODE_IRUSR | path::MODE_IXUSR; + static constexpr int64_t kSubdirMtime = 0; + + static constexpr char kWorldFile[] = "world_file.txt"; + static constexpr char kGroupFile[] = "group_file.txt"; + static constexpr char kUserFile[] = "user_file.txt"; + + public: + CdcFuseFsTest() : builder_(&cache_) { + cdc_fuse_fs::Initialize(0, nullptr).IgnoreError(); + Log::Initialize(std::make_unique(LogLevel::kInfo)); + } + ~CdcFuseFsTest() { + Log::Shutdown(); + cdc_fuse_fs::Shutdown(); + } + + void SetUp() override { + // Set up an in-memory directory structure for testing. + // - file1.txt + // - subdir + // | + // - file2.txt + builder_.AddFile(builder_.Root(), kFile1Name, kFile1Mtime, kFile1Perm, + kFile1Data); + AssetProto* subdir = builder_.AddDirectory(builder_.Root(), kSubdirName, + kSubdirMtime, kSubdirPerm); + builder_.AddFile(subdir, kFile2Name, kFile2Mtime, kFile2Perm, kFile2Data); + + manifest_id_ = cache_.AddProto(*builder_.Manifest()); + fuse_.SetUid(internal::kCdcFuseCloudcastUid); + fuse_.SetGid(internal::kCdcFuseCloudcastGid); + + // Note: Run(&cache_) immediately exits after setting the provider/id on + // Windows. + EXPECT_OK(cdc_fuse_fs::Run(&cache_, true)); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + EXPECT_EQ("FUSE consistency check succeeded", Log()->LastMessage()); + } + + void TearDown() override {} + + protected: + void ExpectAttr(const struct stat& attr, uint32_t mode, uint64_t size, + int64_t mtime) { + EXPECT_NE(attr.st_ino, 0); + EXPECT_EQ(attr.st_mode, mode); + EXPECT_EQ(attr.st_size, size); + EXPECT_EQ(attr.st_nlink, internal::kCdcFuseDefaultNLink); + EXPECT_EQ(attr.st_mtime, mtime); + EXPECT_EQ(attr.st_uid, internal::kCdcFuseCloudcastUid); + EXPECT_EQ(attr.st_gid, internal::kCdcFuseCloudcastGid); + } + + void ExpectAccessError(int mask, int exp_error) { + size_t num_errors = fuse_.errors.size(); + for (size_t it = 0; it < fuse_.entries.size(); ++it, ++num_errors) { + CdcFuseAccess(req_, fuse_.entries[it].ino, mask); + ASSERT_EQ(fuse_.errors.size(), num_errors + 1); + EXPECT_EQ(fuse_.errors[num_errors], exp_error); + } + } + + void ExpectAccessSucceeds() { + // Each file should allow read access. + ExpectAccessError(R_OK, 0 /*error*/); + + // Each file should allow write access. + ExpectAccessError(W_OK, 0 /*error*/); + + // Each file should allow exec access. + ExpectAccessError(X_OK, 0 /*error*/); + + // All files should provide all types of access. + ExpectAccessError(R_OK | W_OK | X_OK, 0 /*error*/); + } + + // Wipes chunks for |kFile1Name| to simulate an intermediate manifest, i.e. + // the manifest that contains all assets, but misses file chunks. + // Returns the intermediate manifest id. + ContentIdProto CreateIntermediateManifestId() { + ManifestProto manifest; + EXPECT_OK(cache_.GetProto(manifest_id_, &manifest)); + EXPECT_GT(manifest.root_dir().dir_assets_size(), 0); + if (manifest.root_dir().dir_assets_size() == 0) return ContentIdProto(); + AssetProto* file1 = manifest.mutable_root_dir()->mutable_dir_assets(0); + EXPECT_EQ(file1->name(), kFile1Name); + file1->clear_file_chunks(); + return cache_.AddProto(manifest); + } + + MemDataStore cache_; + MockLibFuse fuse_; + fuse_req_t req_ = nullptr; + ContentIdProto manifest_id_; + FakeManifestBuilder builder_; + FuseLog* Log() const { return static_cast(Log::Instance()); } +}; + +TEST_F(CdcFuseFsTest, LookupSucceeds) { + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 1); + EXPECT_NE(fuse_.entries[0].ino, 0); + ExpectAttr(fuse_.entries[0].attr, kFile1Perm | path::MODE_IFREG, + kFile1Data.size(), kFile1Mtime); +} + +TEST_F(CdcFuseFsTest, LookupFailsNotADirectory) { + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 1); + EXPECT_EQ(fuse_.errors.size(), 0); + + // The ino refers to a file, but Lookup() wants a directory. + CdcFuseLookup(req_, fuse_.entries[0].ino, kFile1Name); + EXPECT_EQ(fuse_.entries.size(), 1); + ASSERT_EQ(fuse_.errors.size(), 1); + EXPECT_EQ(fuse_.errors[0], ENOENT); +} + +TEST_F(CdcFuseFsTest, LookupFailsDoesNotExist) { + CdcFuseLookup(req_, FUSE_ROOT_ID, "does_not_exist"); + ASSERT_EQ(fuse_.errors.size(), 1); + EXPECT_EQ(fuse_.errors[0], ENOENT); +} + +TEST_F(CdcFuseFsTest, GetAttrSucceedsRootDir) { + fuse_file_info fi; + CdcFuseGetAttr(req_, FUSE_ROOT_ID, &fi); + ASSERT_EQ(fuse_.attrs.size(), 1); + EXPECT_EQ(fuse_.attrs[0].timeout, internal::kCdcFuseInodeTimeoutSec); + ExpectAttr(fuse_.attrs[0].value, + FakeManifestBuilder::kRootDirPerms | path::MODE_IFDIR, 0, 0); +} + +TEST_F(CdcFuseFsTest, GetAttrSucceedsFile) { + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 1); + + fuse_file_info fi; + CdcFuseGetAttr(req_, fuse_.entries[0].ino, &fi); + ASSERT_EQ(fuse_.attrs.size(), 1); + EXPECT_EQ(fuse_.attrs[0].timeout, internal::kCdcFuseInodeTimeoutSec); + ExpectAttr(fuse_.attrs[0].value, kFile1Perm | path::MODE_IFREG, + kFile1Data.size(), kFile1Mtime); +} + +TEST_F(CdcFuseFsTest, OpenSucceeds) { + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 1); + + fuse_file_info fi; + CdcFuseOpen(req_, fuse_.entries[0].ino, &fi); + ASSERT_EQ(fuse_.open_files.size(), 1); +} + +TEST_F(CdcFuseFsTest, OpenRespectsODirect) { + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 1); + + fuse_file_info fi; + CdcFuseOpen(req_, fuse_.entries[0].ino, &fi); + fi.flags = O_DIRECT; + CdcFuseOpen(req_, fuse_.entries[0].ino, &fi); + + ASSERT_EQ(fuse_.open_files.size(), 2); + + ASSERT_FALSE(fuse_.open_files[0].direct_io); + ASSERT_TRUE(fuse_.open_files[1].direct_io); + + ASSERT_TRUE(fuse_.open_files[0].keep_cache); + ASSERT_FALSE(fuse_.open_files[1].keep_cache); +} + +TEST_F(CdcFuseFsTest, OpenFailsDirectory) { + fuse_file_info fi; + CdcFuseOpen(req_, FUSE_ROOT_ID, &fi); + ASSERT_EQ(fuse_.errors.size(), 1); + EXPECT_EQ(fuse_.errors[0], EISDIR); +} + +TEST_F(CdcFuseFsTest, OpenFailsWriteAccess) { + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 1); + + fuse_file_info fi(O_RDWR); + CdcFuseOpen(req_, fuse_.entries[0].ino, &fi); + ASSERT_EQ(fuse_.errors.size(), 1); + EXPECT_EQ(fuse_.errors[0], EACCES); +} + +TEST_F(CdcFuseFsTest, OpenQueuedForIntermediateManifest) { + ContentIdProto intermediate_manifest_id = CreateIntermediateManifestId(); + EXPECT_OK(cdc_fuse_fs::SetManifest(intermediate_manifest_id)); + + // Opening file1 should be queued as it contains no chunks. + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 1); + fuse_file_info fi; + CdcFuseOpen(req_, fuse_.entries[0].ino, &fi); + ASSERT_EQ(fuse_.open_files.size(), 0); + + // Setting the final manifest should fulfill queued open requests. + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + ASSERT_EQ(fuse_.open_files.size(), 1); +} + +TEST_F(CdcFuseFsTest, OpenQueuedRequestsRequeue) { + ContentIdProto intermediate_manifest_id = CreateIntermediateManifestId(); + EXPECT_OK(cdc_fuse_fs::SetManifest(intermediate_manifest_id)); + + // Opening file1 should be queued as it contains no chunks. + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 1); + fuse_file_info fi; + CdcFuseOpen(req_, fuse_.entries[0].ino, &fi); + ASSERT_EQ(fuse_.open_files.size(), 0); + + // Setting the same incomplete manifest again should requeue the request. + EXPECT_OK(cdc_fuse_fs::SetManifest(intermediate_manifest_id)); + ASSERT_EQ(fuse_.open_files.size(), 0); + + // Setting the final manifest should fulfill queued open requests. + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + ASSERT_EQ(fuse_.open_files.size(), 1); +} + +TEST_F(CdcFuseFsTest, ReadSucceeds) { + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 1); + + // Read everything from file1 except the first and the last byte. + fuse_file_info fi; + CdcFuseRead(req_, fuse_.entries[0].ino, kFile1Data.size() - 2, 1, &fi); + ASSERT_EQ(fuse_.buffers.size(), 1); + std::vector data(kFile1Data.begin() + 1, kFile1Data.end() - 1); + EXPECT_EQ(fuse_.buffers[0], data); +} + +TEST_F(CdcFuseFsTest, ReadFailsNotAFile) { + fuse_file_info fi; + CdcFuseRead(req_, FUSE_ROOT_ID, kFile1Data.size(), 0, &fi); + ASSERT_EQ(fuse_.errors.size(), 1); + EXPECT_EQ(fuse_.errors[0], EIO); +} + +TEST_F(CdcFuseFsTest, ReadDirSucceeds) { + const size_t kEntrySize = sizeof(MockLibFuse::DirEntry); + fuse_file_info fi; + CdcFuseReadDir(req_, FUSE_ROOT_ID, kEntrySize * 10, 0, &fi); + ASSERT_EQ(fuse_.buffers.size(), 1); + ASSERT_EQ(fuse_.buffers[0].size(), kEntrySize * 4); // ., .., file1, subdir + MockLibFuse::DirEntry* entries = + reinterpret_cast(fuse_.buffers[0].data()); + + // Get inos for "file1.txt" and "subdir". + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + CdcFuseLookup(req_, FUSE_ROOT_ID, kSubdirName); + ASSERT_EQ(fuse_.entries.size(), 2); + + std::unordered_map expected; + expected["."] = {FUSE_ROOT_ID, + FakeManifestBuilder::kRootDirPerms | path::MODE_IFDIR, + {0}, + 0}; + expected[".."] = {FUSE_ROOT_ID, + FakeManifestBuilder::kRootDirPerms | path::MODE_IFDIR, + {0}, + 0}; + expected[kFile1Name] = { + fuse_.entries[0].ino & 0xFFFF, kFile1Perm | path::MODE_IFREG, {0}, 0}; + expected[kSubdirName] = { + fuse_.entries[1].ino & 0xFFFF, kSubdirPerm | path::MODE_IFDIR, {0}, 0}; + + // A couple of things to note: + // - ReadDir() only fills the ino and mode attr entries. This is expected + // and matches what libfuse uses. The filesystem actually GetAttr() on every + // entry to get the full attributes. This has been fixed by ReadDirPlus() + // in LibFuse 3. + // - ".." is assigned the |FUSE_ROOT_ID| (this works fine, trust me!). + // - Unfortunately, on Windows stat::st_ino is 16 bit and there seems to be + // no good way to make it 64 bit. On Linux, we assert 64 bits, though. + + std::unordered_set unique_assets; + for (size_t i = 0; i < 4; ++i) { + auto it = expected.find(entries[i].name); + ASSERT_NE(it, expected.end()); + EXPECT_EQ(entries[i].ino & 0xFFFF, it->second.ino); + EXPECT_EQ(entries[i].mode, it->second.mode); + unique_assets.insert(entries[i].name); + } + EXPECT_EQ(unique_assets.size(), expected.size()); +} + +TEST_F(CdcFuseFsTest, ReadDirWithOffsetSizeSucceeds) { + const size_t kEntrySize = sizeof(MockLibFuse::DirEntry); + fuse_file_info fi; + CdcFuseReadDir(req_, FUSE_ROOT_ID, kEntrySize * 2, kEntrySize, &fi); + ASSERT_EQ(fuse_.buffers.size(), 1); + ASSERT_EQ(fuse_.buffers[0].size(), kEntrySize * 2); + MockLibFuse::DirEntry* entries = + reinterpret_cast(fuse_.buffers[0].data()); + + std::unordered_set known_assets; + known_assets.insert("."); + known_assets.insert(".."); + known_assets.insert(kFile1Name); + known_assets.insert(kSubdirName); + for (size_t i = 0; i < 2; ++i) + EXPECT_NE(known_assets.find(entries[i].name), known_assets.end()) + << "Could not find " << entries[i].name; +} + +TEST_F(CdcFuseFsTest, ReadDirBeyondEofSucceeds) { + // Start reading at entry 7, but there are only 6 entries. + const size_t kEntrySize = sizeof(MockLibFuse::DirEntry); + fuse_file_info fi; + CdcFuseReadDir(req_, FUSE_ROOT_ID, kEntrySize * 14, kEntrySize * 7, &fi); + ASSERT_EQ(fuse_.buffers.size(), 1); + EXPECT_TRUE(fuse_.buffers[0].empty()); +} + +TEST_F(CdcFuseFsTest, ReadDirFailsNotADirectory) { + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 1); + + fuse_file_info fi; + CdcFuseReadDir(req_, fuse_.entries[0].ino, 1, 0, &fi); + ASSERT_EQ(fuse_.errors.size(), 1); + EXPECT_EQ(fuse_.errors[0], ENOTDIR); +} + +TEST_F(CdcFuseFsTest, ReadDirFailsInvalidIndirectAssetList) { + FakeManifestBuilder builder(&cache_); + ContentIdProto invalid_id; + *builder.Root()->add_dir_indirect_assets() = invalid_id; + ContentIdProto root_id = cache_.AddProto(*builder.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(root_id)); + + fuse_file_info fi; + CdcFuseReadDir(req_, FUSE_ROOT_ID, 1, 0, &fi); + ASSERT_EQ(fuse_.errors.size(), 1); + EXPECT_EQ(fuse_.errors[0], EBADF); +} + +TEST_F(CdcFuseFsTest, Forget) { + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + EXPECT_EQ(fuse_.entries.size(), 1u); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 1u); + + CdcFuseForget(req_, fuse_.entries[0].ino, 1u); + // No new entry should be created as forget() finishes with + // fuse_reply_none(). + EXPECT_EQ(fuse_.entries.size(), 1u); + EXPECT_TRUE(fuse_.errors.empty()); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 0u); + EXPECT_EQ(fuse_.none_counter, 1u); +} + +TEST_F(CdcFuseFsTest, ForgetMulti) { + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + CdcFuseLookup(req_, FUSE_ROOT_ID, kSubdirName); + ASSERT_EQ(fuse_.entries.size(), 2); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 2u); + + fuse_forget_data nodes_to_forget[2]; + nodes_to_forget[0].ino = fuse_.entries[0].ino; + nodes_to_forget[0].nlookup = 1; + nodes_to_forget[1].ino = fuse_.entries[1].ino; + nodes_to_forget[1].nlookup = 1; + CdcFuseForgetMulti(req_, 2u, &nodes_to_forget[0]); + + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 0u); + EXPECT_EQ(fuse_.none_counter, 1u); +} + +TEST_F(CdcFuseFsTest, DoNotForgetRoot) { + CdcFuseForget(req_, FUSE_ROOT_ID, 1u); + EXPECT_EQ(fuse_.none_counter, 1u); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 0u); +} + +TEST_F(CdcFuseFsTest, DoNotForgetParent) { + CdcFuseLookup(req_, FUSE_ROOT_ID, kSubdirName); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 1u); + + CdcFuseLookup(req_, fuse_.entries[0].ino, kFile2Name); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 2u); + + ASSERT_EQ(fuse_.entries.size(), 2); + + CdcFuseForget(req_, fuse_.entries[0].ino, 1u); + EXPECT_EQ(fuse_.none_counter, 1u); + + // The inode for kFile2Name still holds a reference to its parent. + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 2u); + + // Unreal scenario: second forget for parent. + CdcFuseForget(req_, fuse_.entries[0].ino, 1u); + EXPECT_EQ(fuse_.none_counter, 2u); + + // The inode for kFile2Name still holds a reference to its parent. + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 2u); +} + +TEST_F(CdcFuseFsTest, ForgetAllParentsExceptRoot) { + FakeManifestBuilder builder(&cache_); + builder.AddDirectory(builder.Root(), kFile1Name, kFile1Mtime, kFile1Perm); + AssetProto* subdir1 = builder.AddDirectory(builder.Root(), kSubdirName, + kSubdirMtime, kSubdirPerm); + AssetProto* subdir2 = + builder.AddDirectory(subdir1, kSubdirName, kSubdirMtime, kSubdirPerm); + AssetProto* subdir3 = + builder.AddDirectory(subdir2, kSubdirName, kSubdirMtime, kSubdirPerm); + builder.AddFile(subdir3, kFile2Name, kFile2Mtime, kFile2Perm, kFile2Data); + EXPECT_OK(cdc_fuse_fs::SetManifest(cache_.AddProto(*builder.Manifest()))); + + CdcFuseLookup(req_, FUSE_ROOT_ID, kSubdirName); + ASSERT_EQ(fuse_.entries.size(), 1); + CdcFuseLookup(req_, fuse_.entries[0].ino, kSubdirName); + ASSERT_EQ(fuse_.entries.size(), 2); + CdcFuseLookup(req_, fuse_.entries[1].ino, kSubdirName); + ASSERT_EQ(fuse_.entries.size(), 3); + CdcFuseLookup(req_, fuse_.entries[2].ino, kFile2Name); + ASSERT_EQ(fuse_.entries.size(), 4); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 4u); + + CdcFuseForget(req_, fuse_.entries[0].ino, 1u); + EXPECT_EQ(fuse_.none_counter, 1u); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 4u); + + CdcFuseForget(req_, fuse_.entries[1].ino, 1u); + EXPECT_EQ(fuse_.none_counter, 2u); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 4u); + + CdcFuseForget(req_, fuse_.entries[2].ino, 1u); + EXPECT_EQ(fuse_.none_counter, 3u); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 4u); + + CdcFuseForget(req_, fuse_.entries[3].ino, 1u); + EXPECT_EQ(fuse_.none_counter, 4u); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 0u); +} + +TEST_F(CdcFuseFsTest, AccessToReadSucceeds) { + FakeManifestBuilder builder(&cache_); + + // Only read access for a specific rights collection. + builder.AddFile(builder.Root(), kUserFile, kFile1Mtime, path::MODE_IRUSR, + kFile1Data); + builder.AddFile(builder.Root(), kGroupFile, kFile1Mtime, path::MODE_IRGRP, + kFile1Data); + builder.AddFile(builder.Root(), kWorldFile, kFile1Mtime, path::MODE_IROTH, + kFile1Data); + manifest_id_ = cache_.AddProto(*builder.Manifest()); + + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + + CdcFuseLookup(req_, FUSE_ROOT_ID, kUserFile); + CdcFuseLookup(req_, FUSE_ROOT_ID, kGroupFile); + CdcFuseLookup(req_, FUSE_ROOT_ID, kWorldFile); + EXPECT_EQ(fuse_.entries.size(), 3u); + + // Each file should allow read access. + ExpectAccessError(R_OK, 0 /*error*/); + + // No file should provide write access. + ExpectAccessError(W_OK, EACCES /*error*/); + + // No file should provide exec access. + ExpectAccessError(X_OK, EACCES /*error*/); + + // No file should provide all types of access. + ExpectAccessError(R_OK | W_OK | X_OK, EACCES /*error*/); +} + +TEST_F(CdcFuseFsTest, AccessToAllRightsSucceeds) { + FakeManifestBuilder builder(&cache_); + + // All access rights for a specific rights collection. + builder.AddFile(builder.Root(), kUserFile, kFile1Mtime, path::MODE_IRWXU, + kFile1Data); + builder.AddFile(builder.Root(), kGroupFile, kFile1Mtime, path::MODE_IRWXG, + kFile1Data); + builder.AddFile(builder.Root(), kWorldFile, kFile1Mtime, path::MODE_IRWXO, + kFile1Data); + manifest_id_ = cache_.AddProto(*builder.Manifest()); + + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + + CdcFuseLookup(req_, FUSE_ROOT_ID, kUserFile); + CdcFuseLookup(req_, FUSE_ROOT_ID, kGroupFile); + CdcFuseLookup(req_, FUSE_ROOT_ID, kWorldFile); + EXPECT_EQ(fuse_.entries.size(), 3u); + + ExpectAccessSucceeds(); +} + +TEST_F(CdcFuseFsTest, AccessFailsWrongUser) { + FakeManifestBuilder builder(&cache_); + + // Only the default user has all rights. + builder.AddFile(builder.Root(), kUserFile, kFile1Mtime, path::MODE_IRWXU, + kFile1Data); + manifest_id_ = cache_.AddProto(*builder.Manifest()); + + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + + CdcFuseLookup(req_, FUSE_ROOT_ID, kUserFile); + EXPECT_EQ(fuse_.entries.size(), 1u); + + // The user does not have rights to access the file. + // Only root and default users have access. + fuse_.SetUid(100); + + ExpectAccessError(R_OK, EACCES /*error*/); +} + +TEST_F(CdcFuseFsTest, AccessFailsWrongGroup) { + FakeManifestBuilder builder(&cache_); + + // Only the users of the default group have all rights. + builder.AddFile(builder.Root(), kUserFile, kFile1Mtime, path::MODE_IRWXG, + kFile1Data); + manifest_id_ = cache_.AddProto(*builder.Manifest()); + + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + + CdcFuseLookup(req_, FUSE_ROOT_ID, kUserFile); + EXPECT_EQ(fuse_.entries.size(), 1u); + + fuse_.SetGid(100); + + // The user does not have rights to access the file. + ExpectAccessError(R_OK, EACCES /*error*/); +} + +TEST_F(CdcFuseFsTest, AccessAsRootUserSucceeds) { + FakeManifestBuilder builder(&cache_); + + // No rights are set. + builder.AddFile(builder.Root(), kUserFile, kFile1Mtime, 0, kFile1Data); + manifest_id_ = cache_.AddProto(*builder.Manifest()); + + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + + CdcFuseLookup(req_, FUSE_ROOT_ID, kUserFile); + EXPECT_EQ(fuse_.entries.size(), 1u); + + fuse_.SetUid(internal::kCdcFuseRootUid); + + ExpectAccessSucceeds(); +} + +TEST_F(CdcFuseFsTest, AccessAsRootGroupSucceeds) { + FakeManifestBuilder builder(&cache_); + + // No rights are set. + builder.AddFile(builder.Root(), kUserFile, kFile1Mtime, 0, kFile1Data); + manifest_id_ = cache_.AddProto(*builder.Manifest()); + + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + + CdcFuseLookup(req_, FUSE_ROOT_ID, kUserFile); + EXPECT_EQ(fuse_.entries.size(), 1u); + + fuse_.SetGid(internal::kCdcFuseRootGid); + + ExpectAccessSucceeds(); +} + +TEST_F(CdcFuseFsTest, SetInvalidManifestFails) { + ContentIdProto invalid_manifest_id; + absl::Status status = cdc_fuse_fs::SetManifest(invalid_manifest_id); + EXPECT_NOT_OK(status); + + // The old manifest still should be valid: its files should be requestable. + ExpectAccessSucceeds(); +} + +TEST_F(CdcFuseFsTest, AddFileUpdateManifestSucceeds) { + FakeManifestBuilder builder(&cache_); + + // All access rights for a specific rights collection. + builder.AddFile(builder.Root(), kUserFile, kFile1Mtime, path::MODE_IRWXU, + kFile1Data); + manifest_id_ = cache_.AddProto(*builder.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + + CdcFuseLookup(req_, FUSE_ROOT_ID, kUserFile); + EXPECT_EQ(fuse_.entries.size(), 1u); + + // The file does not exists -> error. + CdcFuseLookup(req_, FUSE_ROOT_ID, kGroupFile); + ASSERT_EQ(fuse_.errors.size(), 1); + EXPECT_EQ(fuse_.errors[0], ENOENT); + + // Add the missing file and update the manifest id. + builder.AddFile(builder.Root(), kGroupFile, kFile1Mtime, path::MODE_IRWXG, + kFile1Data); + manifest_id_ = cache_.AddProto(*builder.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + + // Can get access to the first file. + CdcFuseLookup(req_, FUSE_ROOT_ID, kUserFile); + EXPECT_EQ(fuse_.entries.size(), 2u); + + // Can get access to the new file. + CdcFuseLookup(req_, FUSE_ROOT_ID, kGroupFile); + EXPECT_EQ(fuse_.entries.size(), 3u); +} + +TEST_F(CdcFuseFsTest, CompletelyUpdatedManifestSucceeds) { + FakeManifestBuilder builder(&cache_); + + // All access rights for a specific rights collection. + builder.AddFile(builder.Root(), kUserFile, kFile1Mtime, path::MODE_IRWXU, + kFile1Data); + manifest_id_ = cache_.AddProto(*builder.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + + CdcFuseLookup(req_, FUSE_ROOT_ID, kUserFile); + EXPECT_EQ(fuse_.entries.size(), 1u); + + FakeManifestBuilder builder2(&cache_); + // Add the missing file and update the manifest id. + builder2.AddFile(builder2.Root(), kGroupFile, kFile1Mtime, path::MODE_IRWXG, + kFile1Data); + manifest_id_ = cache_.AddProto(*builder2.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + + // Cannot get access to the old file. + CdcFuseLookup(req_, FUSE_ROOT_ID, kUserFile); + ASSERT_EQ(fuse_.errors.size(), 1); + EXPECT_EQ(fuse_.errors[0], ENOENT); + + // Can get access to the new file. + CdcFuseLookup(req_, FUSE_ROOT_ID, kGroupFile); + EXPECT_EQ(fuse_.entries.size(), 2u); +} + +TEST_F(CdcFuseFsTest, CompletelyUpdatedManifestForgetOldFileSucceeds) { + FakeManifestBuilder builder(&cache_); + + builder.AddFile(builder.Root(), kUserFile, kFile1Mtime, path::MODE_IRWXU, + kFile1Data); + manifest_id_ = cache_.AddProto(*builder.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + + CdcFuseLookup(req_, FUSE_ROOT_ID, kUserFile); + EXPECT_EQ(fuse_.entries.size(), 1u); + + FakeManifestBuilder builder2(&cache_); + // Add file and update the manifest id. + builder2.AddFile(builder2.Root(), kGroupFile, kFile1Mtime, path::MODE_IRWXG, + kFile1Data); + manifest_id_ = cache_.AddProto(*builder2.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + EXPECT_EQ("FUSE consistency check succeeded", Log()->LastMessage()); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 0u); + EXPECT_EQ(CdcFuseGetInvalidInodeCountForTesting(), 1u); + + // Cannot get access to the old file. + CdcFuseForget(req_, fuse_.entries[0].ino, 1u); + EXPECT_EQ(fuse_.none_counter, 1u); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 0u); + EXPECT_EQ(CdcFuseGetInvalidInodeCountForTesting(), 0u); + + // Can get access to the new file. + CdcFuseLookup(req_, FUSE_ROOT_ID, kGroupFile); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 1u); + EXPECT_EQ(fuse_.entries.size(), 2u); +} + +TEST_F(CdcFuseFsTest, AddFileUpdateManifestOldInodesValid) { + // Get inode. + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 1u); + + // Update manifest while adding a new file. + builder_.AddFile(builder_.Root(), kUserFile, kFile1Mtime, path::MODE_IRWXU, + kFile1Data); + manifest_id_ = cache_.AddProto(*builder_.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + EXPECT_EQ("FUSE consistency check succeeded", Log()->LastMessage()); + + // New file should be accessible. + CdcFuseLookup(req_, FUSE_ROOT_ID, kUserFile); + EXPECT_EQ(fuse_.entries.size(), 2u); + + // inode for kFile1Name should be valid. + fuse_file_info fi; + CdcFuseRead(req_, fuse_.entries[0].ino, kFile1Data.size(), 0, &fi); + ASSERT_EQ(fuse_.buffers.size(), 1); + EXPECT_EQ(fuse_.buffers[0], kFile1Data); +} + +TEST_F(CdcFuseFsTest, ModifyFileUpdateManifestOldInodesValid) { + // Get inode. + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 1u); + + // Update manifest by modifying a file. + builder_.ModifyFile(builder_.Root(), kFile1Name, kFile2Mtime, kFile2Perm, + kFile2Data); + manifest_id_ = cache_.AddProto(*builder_.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + EXPECT_EQ("FUSE consistency check succeeded", Log()->LastMessage()); + EXPECT_EQ(CdcFuseGetInvalidInodeCountForTesting(), 0u); + + // inode for kFile1Name should be valid, but the content should be new -> + // reload is required. + fuse_file_info fi; + CdcFuseRead(req_, fuse_.entries[0].ino, kFile2Data.size(), 0, &fi); + EXPECT_TRUE(fuse_.buffers.empty()); + ASSERT_EQ(fuse_.errors.size(), 1); + EXPECT_EQ(fuse_.errors[0], EIO); + + // Forget the inode, lookup + read it again -> should succeed. + CdcFuseForget(req_, fuse_.entries[0].ino, 1u); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 0u); + EXPECT_EQ(fuse_.none_counter, 1u); + + // Read it again. + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ExpectAttr(fuse_.entries[1].attr, kFile2Perm | path::MODE_IFREG, + kFile2Data.size(), kFile2Mtime); + CdcFuseRead(req_, fuse_.entries[1].ino, kFile2Data.size(), 0, &fi); + ASSERT_EQ(fuse_.buffers.size(), 1); + EXPECT_EQ(fuse_.buffers[0], kFile2Data); +} + +TEST_F(CdcFuseFsTest, + LookupFileInSubfolderRemoveSubfolderUpdateManifestReadFile) { + // Get inode. + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + CdcFuseLookup(req_, FUSE_ROOT_ID, kSubdirName); + CdcFuseLookup(req_, fuse_.entries[1].ino, kFile2Name); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 3u); + ASSERT_EQ(fuse_.entries.size(), 3); + + // Update manifest: it has only 1 file. + FakeManifestBuilder builder(&cache_); + builder.AddFile(builder.Root(), kFile1Name, kFile1Mtime, kFile1Perm, + kFile1Data); + + manifest_id_ = cache_.AddProto(*builder.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + EXPECT_EQ("FUSE consistency check succeeded", Log()->LastMessage()); + + // The total amount of valid and invalid inodes should stay the same. + // As the old inodes could be potentially accessed by the system and should be + // forgotten with forget() or forget_multi(). + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 1u); + EXPECT_EQ(CdcFuseGetInvalidInodeCountForTesting(), 2u); + + fuse_file_info fi; + CdcFuseRead(req_, fuse_.entries[2].ino, kFile2Data.size(), 0, &fi); + EXPECT_TRUE(fuse_.buffers.empty()); + ASSERT_EQ(fuse_.errors.size(), 1u); + EXPECT_EQ(fuse_.errors[0], ENOENT); + + const size_t kEntrySize = sizeof(MockLibFuse::DirEntry); + CdcFuseReadDir(req_, fuse_.entries[1].ino, kEntrySize * 10, 0, &fi); + ASSERT_EQ(fuse_.errors.size(), 2u); + EXPECT_EQ(fuse_.errors[1], ENOENT); +} + +TEST_F(CdcFuseFsTest, FileToFolderUpdateManifest) { + // Get inode. + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 1); + + // Read everything from file1 -> fill internal asset's structures. + fuse_file_info file_info; + CdcFuseRead(req_, fuse_.entries[0].ino, kFile1Data.size(), 0, &file_info); + ASSERT_EQ(fuse_.buffers.size(), 1); + EXPECT_EQ(fuse_.buffers[0], kFile1Data); + + // Change file1.txt to folder. + FakeManifestBuilder builder(&cache_); + builder.AddDirectory(builder.Root(), kFile1Name, kFile1Mtime, kFile1Perm); + AssetProto* subdir = builder.AddDirectory(builder.Root(), kSubdirName, + kSubdirMtime, kSubdirPerm); + builder.AddFile(subdir, kFile2Name, kFile2Mtime, kFile2Perm, kFile2Data); + manifest_id_ = cache_.AddProto(*builder.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + EXPECT_EQ("FUSE consistency check succeeded", Log()->LastMessage()); + + // Read directory should succeed. + const size_t kEntrySize = sizeof(MockLibFuse::DirEntry); + fuse_file_info dir_info; + CdcFuseReadDir(req_, fuse_.entries[0].ino, kEntrySize * 10, 0, &dir_info); + ASSERT_EQ(fuse_.buffers.size(), 2); + ASSERT_EQ(fuse_.buffers[1].size(), kEntrySize * 2); // ., .. + MockLibFuse::DirEntry* entries = + reinterpret_cast(fuse_.buffers[1].data()); + EXPECT_STREQ(entries[0].name, "."); + EXPECT_STREQ(entries[1].name, ".."); +} + +TEST_F(CdcFuseFsTest, FolderToFileUpdateManifest) { + // Get inode. + CdcFuseLookup(req_, FUSE_ROOT_ID, kSubdirName); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 1u); + + // Read directory to fill internal asset's structures. + const size_t kEntrySize = sizeof(MockLibFuse::DirEntry); + fuse_file_info dir_info; + CdcFuseReadDir(req_, fuse_.entries[0].ino, kEntrySize * 10, 0, &dir_info); + ASSERT_EQ(fuse_.buffers.size(), 1); + ASSERT_EQ(fuse_.buffers[0].size(), kEntrySize * 3); // ., .., file2.txt + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 2u); + + // Get inos for "file2.txt". + CdcFuseLookup(req_, fuse_.entries[0].ino, kFile2Name); + ASSERT_EQ(fuse_.entries.size(), 2); + + // Change subfolder to file. + FakeManifestBuilder builder(&cache_); + builder.AddFile(builder.Root(), kSubdirName, kSubdirMtime, kSubdirPerm, + kFile1Data); + manifest_id_ = cache_.AddProto(*builder.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + EXPECT_EQ("FUSE consistency check succeeded", Log()->LastMessage()); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), + 1u); // the number of inodes should not change. + + // Reading file should fail as the proto has been changed. + fuse_file_info fi; + CdcFuseRead(req_, fuse_.entries[0].ino, kFile1Data.size(), 0, &fi); + EXPECT_EQ(fuse_.buffers.size(), 1); // should not change. + ASSERT_EQ(fuse_.errors.size(), 1); + EXPECT_EQ(fuse_.errors[0], EIO); + + // Forget the inode, lookup + read it again -> should succeed. + CdcFuseForget(req_, fuse_.entries[0].ino, 1u); + CdcFuseForget(req_, fuse_.entries[1].ino, 2u); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 0u); + EXPECT_EQ(fuse_.none_counter, 2u); + + // Read it again. + CdcFuseLookup(req_, FUSE_ROOT_ID, kSubdirName); + CdcFuseRead(req_, fuse_.entries[2].ino, kFile1Data.size(), 0, &fi); + ASSERT_EQ(fuse_.buffers.size(), 2); + EXPECT_EQ(fuse_.buffers[1], kFile1Data); +} + +TEST_F(CdcFuseFsTest, ModifyFileUpdateManifestTwiceOldInodesValid) { + // Get inode. + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 1u); + + // Update manifest by modifying a file. + builder_.ModifyFile(builder_.Root(), kFile1Name, kFile2Mtime, kFile2Perm, + kFile2Data); + manifest_id_ = cache_.AddProto(*builder_.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + EXPECT_EQ("FUSE consistency check succeeded", Log()->LastMessage()); + + // Lookup should return the same ino, but different attributes. + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 2u); + EXPECT_EQ(fuse_.entries[1].ino, fuse_.entries[0].ino); + ExpectAttr(fuse_.entries[1].attr, kFile2Perm | path::MODE_IFREG, + kFile2Data.size(), kFile2Mtime); + + // Update the file and manifest second time. + builder_.ModifyFile(builder_.Root(), kFile1Name, kFile1Mtime, kFile1Perm, + kFile1Data); + manifest_id_ = cache_.AddProto(*builder_.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + EXPECT_EQ("FUSE consistency check succeeded", Log()->LastMessage()); + + // Lookup should return the same ino, but different attributes. + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 3u); + EXPECT_EQ(fuse_.entries[2].ino, fuse_.entries[1].ino); + ExpectAttr(fuse_.entries[2].attr, kFile1Perm | path::MODE_IFREG, + kFile1Data.size(), kFile1Mtime); +} + +TEST_F(CdcFuseFsTest, RemoveFolderUpdateManifest) { + // Get inode. + CdcFuseLookup(req_, FUSE_ROOT_ID, kSubdirName); + CdcFuseLookup(req_, fuse_.entries[0].ino, kFile2Name); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 2u); + + // Remove subfolder. + FakeManifestBuilder builder(&cache_); + manifest_id_ = cache_.AddProto(*builder.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + EXPECT_EQ("FUSE consistency check succeeded", Log()->LastMessage()); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 0u); + EXPECT_EQ(CdcFuseGetInvalidInodeCountForTesting(), 2u); +} + +TEST_F(CdcFuseFsTest, InvalidateSubSubDirInvalidateSubDirSucceeds) { + // Create a subdir in a subdir of the root. + FakeManifestBuilder builder(&cache_); + AssetProto* subdir = builder.AddDirectory(builder.Root(), kSubdirName, + kSubdirMtime, kSubdirPerm); + builder.AddDirectory(subdir, kSubdirName, kSubdirMtime, kSubdirPerm); + manifest_id_ = cache_.AddProto(*builder.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + EXPECT_EQ("FUSE consistency check succeeded", Log()->LastMessage()); + + // Both subdirs are in inodes. + CdcFuseLookup(req_, FUSE_ROOT_ID, kSubdirName); + CdcFuseLookup(req_, fuse_.entries[0].ino, kSubdirName); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 2u); + + // Update manifest while removing subsubdir. + FakeManifestBuilder builder1(&cache_); + builder1.AddDirectory(builder1.Root(), kSubdirName, kSubdirMtime, + kSubdirPerm); + manifest_id_ = cache_.AddProto(*builder1.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + EXPECT_EQ("FUSE consistency check succeeded", Log()->LastMessage()); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 1u); + EXPECT_EQ(CdcFuseGetInvalidInodeCountForTesting(), 1u); + + // Update manifest while removing subdir. + FakeManifestBuilder builder2(&cache_); + manifest_id_ = cache_.AddProto(*builder2.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + EXPECT_EQ("FUSE consistency check succeeded", Log()->LastMessage()); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 0u); + EXPECT_EQ(CdcFuseGetInvalidInodeCountForTesting(), 2u); + + fuse_file_info fi; + CdcFuseReadDir(req_, fuse_.entries[0].ino, 1, 0, &fi); + ASSERT_EQ(fuse_.errors.size(), 1); + EXPECT_EQ(fuse_.errors[0], ENOENT); + + CdcFuseReadDir(req_, fuse_.entries[1].ino, 1, 0, &fi); + ASSERT_EQ(fuse_.errors.size(), 2); + EXPECT_EQ(fuse_.errors[1], ENOENT); +} + +TEST_F(CdcFuseFsTest, OpenDirSucceeds) { + CdcFuseLookup(req_, FUSE_ROOT_ID, kSubdirName); + ASSERT_EQ(fuse_.entries.size(), 1u); + + fuse_file_info fi; + CdcFuseOpenDir(req_, fuse_.entries[0].ino, &fi); + EXPECT_TRUE(fuse_.errors.empty()); + ASSERT_EQ(fuse_.open_files.size(), 1u); +} + +TEST_F(CdcFuseFsTest, OpenDirFailsNotADirectory) { + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 1u); + + fuse_file_info fi; + CdcFuseOpenDir(req_, fuse_.entries[0].ino, &fi); + ASSERT_EQ(fuse_.errors.size(), 1u); + EXPECT_EQ(fuse_.errors[0], ENOTDIR); +} + +TEST_F(CdcFuseFsTest, OpenDirFailsInvalidDir) { + CdcFuseLookup(req_, FUSE_ROOT_ID, kSubdirName); + ASSERT_EQ(fuse_.entries.size(), 1u); + + // Remove subdir. + FakeManifestBuilder builder(&cache_); + manifest_id_ = cache_.AddProto(*builder.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + EXPECT_EQ("FUSE consistency check succeeded", Log()->LastMessage()); + + fuse_file_info fi; + CdcFuseOpenDir(req_, fuse_.entries[0].ino, &fi); + ASSERT_EQ(fuse_.errors.size(), 1u); + EXPECT_EQ(fuse_.errors[0], ENOENT); +} + +TEST_F(CdcFuseFsTest, ReleaseSucceeds) { + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 1u); + + fuse_file_info fi; + CdcFuseRelease(req_, fuse_.entries[0].ino, &fi); + ASSERT_EQ(fuse_.errors.size(), 1u); + EXPECT_EQ(fuse_.errors[0], 0u); +} + +TEST_F(CdcFuseFsTest, ReleaseFailsForDirectory) { + CdcFuseLookup(req_, FUSE_ROOT_ID, kSubdirName); + ASSERT_EQ(fuse_.entries.size(), 1u); + + fuse_file_info fi; + CdcFuseRelease(req_, fuse_.entries[0].ino, &fi); + ASSERT_EQ(fuse_.errors.size(), 1u); + EXPECT_EQ(fuse_.errors[0], EISDIR); +} + +TEST_F(CdcFuseFsTest, ReleaseFailsInvalidFile) { + // Get inode. + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + + // Remove file. + FakeManifestBuilder builder(&cache_); + manifest_id_ = cache_.AddProto(*builder.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + EXPECT_EQ("FUSE consistency check succeeded", Log()->LastMessage()); + + fuse_file_info fi; + CdcFuseRelease(req_, fuse_.entries[0].ino, &fi); + ASSERT_EQ(fuse_.errors.size(), 1u); + EXPECT_EQ(fuse_.errors[0], ENOENT); +} + +TEST_F(CdcFuseFsTest, ReleaseDirSucceeds) { + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 1u); + + fuse_file_info fi; + CdcFuseRelease(req_, fuse_.entries[0].ino, &fi); + ASSERT_EQ(fuse_.errors.size(), 1u); + EXPECT_EQ(fuse_.errors[0], 0u); +} + +TEST_F(CdcFuseFsTest, ReleaseDirFailsNotADirectory) { + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + ASSERT_EQ(fuse_.entries.size(), 1u); + + fuse_file_info fi; + CdcFuseReleaseDir(req_, fuse_.entries[0].ino, &fi); + ASSERT_EQ(fuse_.errors.size(), 1u); + EXPECT_EQ(fuse_.errors[0], ENOTDIR); +} + +TEST_F(CdcFuseFsTest, ReleaseDirFailsInvalidDirectory) { + // Get inode. + CdcFuseLookup(req_, FUSE_ROOT_ID, kSubdirName); + + // Remove subdir. + FakeManifestBuilder builder(&cache_); + manifest_id_ = cache_.AddProto(*builder.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + EXPECT_EQ("FUSE consistency check succeeded", Log()->LastMessage()); + + fuse_file_info fi; + CdcFuseReleaseDir(req_, fuse_.entries[0].ino, &fi); + ASSERT_EQ(fuse_.errors.size(), 1u); + EXPECT_EQ(fuse_.errors[0], ENOENT); +} + +TEST_F(CdcFuseFsTest, UpdateManifestEmptyFile) { + CdcFuseLookup(req_, FUSE_ROOT_ID, kFile1Name); + EXPECT_EQ(fuse_.entries.size(), 1u); + FakeManifestBuilder builder(&cache_); + + builder.AddFile(builder.Root(), kFile1Name, kFile1Mtime, path::MODE_IRWXU, + {}); + manifest_id_ = cache_.AddProto(*builder.Manifest()); + EXPECT_OK(cdc_fuse_fs::SetManifest(manifest_id_)); + + EXPECT_EQ("FUSE consistency check succeeded", Log()->LastMessage()); + EXPECT_EQ(CdcFuseGetInodeCountForTesting(), 1u); + EXPECT_EQ(CdcFuseGetInvalidInodeCountForTesting(), 0u); +} + +} // namespace +} // namespace cdc_ft diff --git a/cdc_fuse_fs/config_stream_client.cc b/cdc_fuse_fs/config_stream_client.cc new file mode 100644 index 0000000..c80561c --- /dev/null +++ b/cdc_fuse_fs/config_stream_client.cc @@ -0,0 +1,122 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_fuse_fs/config_stream_client.h" + +#include + +#include "common/grpc_status.h" +#include "common/log.h" +#include "manifest/content_id.h" + +namespace cdc_ft { + +using GetManifestIdRequest = proto::GetManifestIdRequest; +using GetManifestIdResponse = proto::GetManifestIdResponse; +using AckManifestIdReceivedRequest = proto::AckManifestIdReceivedRequest; +using AckManifestIdReceivedResponse = proto::AckManifestIdReceivedResponse; +using ConfigStreamService = proto::ConfigStreamService; + +// Asynchronous gRPC streaming client for streaming configuration changes to +// gamelets. The client runs inside the CDC FUSE and requests updated manifest +// from the workstation. +class ManifestIdReader { + public: + ManifestIdReader(ConfigStreamService::Stub* stub) : stub_(stub) {} + + // Starts a GetManifestId() request and listens to the stream of manifest ids + // sent from the workstation. Calls |callback| on every manifest id received. + absl::Status StartListeningToManifestUpdates( + std::function callback) { + callback_ = callback; + + GetManifestIdRequest request; + assert(!reader_); + reader_ = stub_->GetManifestId(&context_, request); + if (!reader_) + return absl::UnavailableError("Failed to create manifest id reader"); + + reader_thread_ = + std::make_unique([this]() { ReadThreadMain(); }); + return absl::OkStatus(); + } + + // Thread that reads manifest ids from the GetManifestId() response stream. + void ReadThreadMain() { + GetManifestIdResponse response; + LOG_INFO("Started manifest id reader thread") + for (;;) { + LOG_INFO("Waiting for manifest id update") + if (!reader_->Read(&response)) break; + + LOG_INFO("Received new manifest id '%s'", + ContentId::ToHexString(response.id())); + absl::Status status = callback_(response.id()); + if (!status.ok()) { + LOG_ERROR("Failed to execute callback for manifest update '%s': '%s'", + ContentId::ToHexString(response.id()), status.message()); + } + } + // This should happen if the server shuts down. + LOG_INFO("Stopped manifest id reader thread") + } + + void Shutdown() { + if (!reader_thread_) return; + + context_.TryCancel(); + if (reader_thread_->joinable()) reader_thread_->join(); + reader_thread_.reset(); + } + + private: + ConfigStreamService::Stub* stub_; + grpc::ClientContext context_; + std::unique_ptr> reader_; + std::function callback_; + std::unique_ptr reader_thread_; +}; + +ConfigStreamClient::ConfigStreamClient(std::string instance, + std::shared_ptr channel) + : instance_(std::move(instance)), + stub_(ConfigStreamService::NewStub(std::move(channel))), + read_client_(std::make_unique(stub_.get())) {} + +ConfigStreamClient::~ConfigStreamClient() = default; + +absl::Status ConfigStreamClient::StartListeningToManifestUpdates( + std::function callback) { + LOG_INFO("Starting to listen to manifest updates"); + return read_client_->StartListeningToManifestUpdates(callback); +} + +absl::Status ConfigStreamClient::SendManifestAck(ContentIdProto manifest_id) { + AckManifestIdReceivedRequest request; + request.set_gamelet_id(instance_); + *request.mutable_manifest_id() = std::move(manifest_id); + + grpc::ClientContext context_; + AckManifestIdReceivedResponse response; + RETURN_ABSL_IF_ERROR( + stub_->AckManifestIdReceived(&context_, request, &response)); + return absl::OkStatus(); +} + +void ConfigStreamClient::Shutdown() { + LOG_INFO("Stopping to listen to manifest updates"); + read_client_->Shutdown(); +} + +} // namespace cdc_ft diff --git a/cdc_fuse_fs/config_stream_client.h b/cdc_fuse_fs/config_stream_client.h new file mode 100644 index 0000000..fa25b1a --- /dev/null +++ b/cdc_fuse_fs/config_stream_client.h @@ -0,0 +1,67 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_FUSE_FS_CONFIG_STREAM_CLIENT_H_ +#define CDC_FUSE_FS_CONFIG_STREAM_CLIENT_H_ + +#include + +#include "absl/status/status.h" +#include "grpcpp/grpcpp.h" +#include "manifest/manifest_proto_defs.h" +#include "proto/asset_stream_service.grpc.pb.h" + +namespace grpc_impl { +class Channel; +} + +namespace cdc_ft { + +class ManifestIdReader; + +class ConfigStreamClient { + public: + // |instance| is the id of the gamelet. + // |channel| is a gRPC channel to use. + ConfigStreamClient(std::string instance, + std::shared_ptr channel); + ~ConfigStreamClient(); + + // Sends a request to get a stream of manifest id updates. |callback| is + // called from a background thread for every manifest id received. + // Returns immediately without waiting for the first manifest id. + absl::Status StartListeningToManifestUpdates( + std::function callback); + + // Sends a message to indicate that the |manifest_id| was received and FUSE + // has been updated to use the new manifest. + absl::Status SendManifestAck(ContentIdProto manifest_id); + + // Stops listening for manifest updates. + void Shutdown(); + + private: + using ConfigStreamService = proto::ConfigStreamService; + + const std::string instance_; + const std::unique_ptr stub_; + + std::unique_ptr read_client_; +}; + +} // namespace cdc_ft + +#endif // CDC_FUSE_FS_CONFIG_STREAM_CLIENT_H_ diff --git a/cdc_fuse_fs/constants.h b/cdc_fuse_fs/constants.h new file mode 100644 index 0000000..b52d873 --- /dev/null +++ b/cdc_fuse_fs/constants.h @@ -0,0 +1,33 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_FUSE_FS_CONSTANTS_H_ +#define CDC_FUSE_FS_CONSTANTS_H_ + +namespace cdc_ft { + +// FUSE prints this to stdout when the binary timestamp and file size match the +// file on the workstation. +static constexpr char kFuseUpToDate[] = "cdc_fuse_fs is up-to-date"; + +// FUSE prints this to stdout when the binary timestamp or file size does not +// match the file on the workstation. It indicates that the binary has to be +// redeployed. +static constexpr char kFuseNotUpToDate[] = "cdc_fuse_fs is not up-to-date"; + +} // namespace cdc_ft + +#endif // CDC_FUSE_FS_CONSTANTS_H_ diff --git a/cdc_fuse_fs/main.cc b/cdc_fuse_fs/main.cc new file mode 100644 index 0000000..673a795 --- /dev/null +++ b/cdc_fuse_fs/main.cc @@ -0,0 +1,202 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "absl_helper/jedec_size_flag.h" +#include "cdc_fuse_fs/cdc_fuse_fs.h" +#include "cdc_fuse_fs/constants.h" +#include "common/gamelet_component.h" +#include "common/log.h" +#include "common/path.h" +#include "data_store/data_provider.h" +#include "data_store/disk_data_store.h" +#include "data_store/grpc_reader.h" +#include "grpcpp/channel.h" +#include "grpcpp/create_channel.h" +#include "grpcpp/support/channel_arguments.h" + +namespace cdc_ft { +namespace { + +constexpr char kFuseFilename[] = "cdc_fuse_fs"; +constexpr char kLibFuseFilename[] = "libfuse.so"; + +absl::StatusOr IsUpToDate(const std::string& components_arg) { + // Components are expected to reside in the same dir as the executable. + std::string component_dir; + RETURN_IF_ERROR(path::GetExeDir(&component_dir)); + + std::vector components = + GameletComponent::FromCommandLineArgs(components_arg); + std::vector our_components; + absl::Status status = + GameletComponent::Get({path::Join(component_dir, kFuseFilename), + path::Join(component_dir, kLibFuseFilename)}, + &our_components); + if (!status.ok() || components != our_components) { + return false; + } + + return true; +} + +} // namespace +} // namespace cdc_ft + +ABSL_FLAG(std::string, instance, "", "Gamelet instance id"); +ABSL_FLAG( + std::string, components, "", + "Whitespace-separated triples filename, size and timestamp of the " + "workstation version of this binary and dependencies. Used for a fast " + "up-to-date check."); +ABSL_FLAG(uint16_t, port, 0, "Port to connect to on localhost"); +ABSL_FLAG(cdc_ft::JedecSize, prefetch_size, cdc_ft::JedecSize(512 << 10), + "Additional data to request from the server when a FUSE read of " + "maximum size is detected. This amount is added to the original " + "request. Supports common unit suffixes K, M, G"); +ABSL_FLAG(std::string, cache_dir, "/var/cache/asset_streaming", + "Cache directory to store data chunks."); +ABSL_FLAG(int, cache_dir_levels, 2, + "Fanout of sub-directories to create within the cache directory."); +ABSL_FLAG(int, verbosity, 0, "Log verbosity"); +ABSL_FLAG(bool, stats, false, "Enable statistics"); +ABSL_FLAG(bool, check, false, "Execute consistency check"); +ABSL_FLAG(cdc_ft::JedecSize, cache_capacity, + cdc_ft::JedecSize(cdc_ft::DiskDataStore::kDefaultCapacity), + "Cache capacity. Supports common unit suffixes K, M, G."); +ABSL_FLAG(uint32_t, cleanup_timeout, cdc_ft::DataProvider::kCleanupTimeoutSec, + "Period in seconds at which instance cache cleanups are run"); +ABSL_FLAG(uint32_t, access_idle_timeout, cdc_ft::DataProvider::kAccessIdleSec, + "Do not run instance cache cleanups for this many seconds after the " + "last file access"); + +static_assert(static_cast(absl::StatusCode::kOk) == 0, "kOk != 0"); + +// Usage: cdc_fuse_fs -- mount_dir [-d|-s|..] +// Any args after -- are FUSE args, search third_party/fuse for FUSE_OPT_KEY or +// FUSE_LIB_OPT (there doesn't seem to be a place where they're all described). +int main(int argc, char* argv[]) { + // Parse absl flags. + std::vector mount_args = absl::ParseCommandLine(argc, argv); + std::string instance = absl::GetFlag(FLAGS_instance); + std::string components = absl::GetFlag(FLAGS_components); + uint16_t port = absl::GetFlag(FLAGS_port); + std::string cache_dir = absl::GetFlag(FLAGS_cache_dir); + int cache_dir_levels = absl::GetFlag(FLAGS_cache_dir_levels); + int verbosity = absl::GetFlag(FLAGS_verbosity); + bool stats = absl::GetFlag(FLAGS_stats); + bool consistency_check = absl::GetFlag(FLAGS_check); + uint64_t cache_capacity = absl::GetFlag(FLAGS_cache_capacity).Size(); + unsigned int dp_cleanup_timeout = absl::GetFlag(FLAGS_cleanup_timeout); + unsigned int dp_access_idle_timeout = + absl::GetFlag(FLAGS_access_idle_timeout); + + // Log to console. Logs are streamed back to the workstation through the SSH + // session. + cdc_ft::Log::Initialize(std::make_unique( + cdc_ft::Log::VerbosityToLogLevel(verbosity))); + + // Perform up-to-date check. + absl::StatusOr is_up_to_date = cdc_ft::IsUpToDate(components); + if (!is_up_to_date.ok()) { + LOG_ERROR("Failed to check file system freshness: %s", + is_up_to_date.status().ToString()); + return static_cast(is_up_to_date.status().code()); + } + if (!*is_up_to_date) { + printf("%s\n", cdc_ft::kFuseNotUpToDate); + return 0; + } + printf("%s\n", cdc_ft::kFuseUpToDate); + fflush(stdout); + + // Create fs. The rest of the flags are mount flags, so pass them along. + absl::Status status = cdc_ft::cdc_fuse_fs::Initialize( + static_cast(mount_args.size()), mount_args.data()); + if (!status.ok()) { + LOG_ERROR("Failed to initialize file system: %s", status.ToString()); + return static_cast(status.code()); + } + + // Create disk data store. + absl::StatusOr> store = + cdc_ft::DiskDataStore::Create(cache_dir_levels, cache_dir, false); + if (!store.ok()) { + LOG_ERROR("Failed to initialize the chunk cache in directory '%s': %s", + absl::GetFlag(FLAGS_cache_dir), store.status().ToString()); + return 1; + } + LOG_INFO("Setting cache capacity to '%u'", cache_capacity); + store.value()->SetCapacity(cache_capacity); + LOG_INFO("Caching chunks in '%s'", store.value()->RootDir()); + + // Start a gRpc client. + std::string client_address = absl::StrFormat("localhost:%u", port); + grpc::ChannelArguments channel_args; + channel_args.SetMaxReceiveMessageSize(-1); + std::shared_ptr grpc_channel = grpc::CreateCustomChannel( + client_address, grpc::InsecureChannelCredentials(), channel_args); + std::vector> readers; + readers.emplace_back( + std::make_unique(grpc_channel, stats)); + cdc_ft::GrpcReader* grpc_reader = + static_cast(readers[0].get()); + + // Send all cached content ids to the client if statistics are enabled. + if (stats) { + LOG_INFO("Sending all cached content ids"); + absl::StatusOr> ids = + store.value()->List(); + if (!ids.ok()) { + LOG_ERROR("Failed to get all cached content ids: %s", + ids.status().ToString()); + return 1; + } + status = grpc_reader->SendCachedContentIds(*ids); + if (!status.ok()) { + LOG_ERROR("Failed to send all cached content ids: %s", status.ToString()); + return 1; + } + } + + // Create data provider. + size_t prefetch_size = absl::GetFlag(FLAGS_prefetch_size).Size(); + cdc_ft::DataProvider data_provider(std::move(*store), std::move(readers), + prefetch_size, dp_cleanup_timeout, + dp_access_idle_timeout); + + if (!cdc_ft::cdc_fuse_fs::StartConfigClient(instance, grpc_channel).ok()) { + LOG_ERROR("Could not start reading configuration updates'"); + return 1; + } + + // Run FUSE. + LOG_INFO("Running filesystem"); + status = cdc_ft::cdc_fuse_fs::Run(&data_provider, consistency_check); + if (!status.ok()) { + LOG_ERROR("Filesystem stopped with error: %s", status.ToString()); + } + LOG_INFO("Filesystem ran successfully and shuts down"); + + data_provider.Shutdown(); + cdc_ft::cdc_fuse_fs::Shutdown(); + cdc_ft::Log::Shutdown(); + + static_assert(static_cast(absl::StatusCode::kOk) == 0, "kOk != 0"); + return static_cast(status.code()); +} diff --git a/cdc_fuse_fs/mock_libfuse.cc b/cdc_fuse_fs/mock_libfuse.cc new file mode 100644 index 0000000..33828e2 --- /dev/null +++ b/cdc_fuse_fs/mock_libfuse.cc @@ -0,0 +1,111 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_fuse_fs/mock_libfuse.h" + +#include +#include + +namespace cdc_ft { +namespace { +MockLibFuse* g_fuse; +} + +MockLibFuse::MockLibFuse() { + assert(!g_fuse); + g_fuse = this; +} + +MockLibFuse::~MockLibFuse() { + assert(g_fuse == this); + g_fuse = nullptr; +} + +void MockLibFuse::SetUid(int uid) { + assert(g_fuse == this); + g_fuse->context.uid = uid; +} + +void MockLibFuse::SetGid(int gid) { + assert(g_fuse == this); + g_fuse->context.gid = gid; +} + +size_t fuse_add_direntry(fuse_req_t req, char* buf, size_t bufsize, + const char* name, const struct stat* stbuf, + off_t off) { + assert(g_fuse); + if (bufsize >= sizeof(MockLibFuse::DirEntry)) { + assert(stbuf); + auto* entry = reinterpret_cast(buf); + strncpy(entry->name, name, sizeof(entry->name)); + entry->name[sizeof(entry->name) - 1] = 0; + entry->ino = stbuf->st_ino; + entry->mode = stbuf->st_mode; + entry->off = off; + } + return sizeof(MockLibFuse::DirEntry); +} + +int fuse_reply_attr(fuse_req_t req, const struct stat* attr, + double attr_timeout) { + assert(g_fuse); + assert(attr); + g_fuse->attrs.emplace_back(*attr, attr_timeout); + return 0; +} + +int fuse_reply_buf(fuse_req_t req, const char* buf, size_t size) { + assert(g_fuse); + std::vector data; + if (buf && size > 0) { + data.insert(data.end(), buf, buf + size); + } + g_fuse->buffers.push_back(std::move(data)); + return 0; +} + +int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param* e) { + assert(g_fuse); + assert(e); + g_fuse->entries.push_back(*e); + return 0; +} + +int fuse_reply_err(fuse_req_t req, int err) { + assert(g_fuse); + g_fuse->errors.push_back(err); + return 0; +} + +int fuse_reply_open(fuse_req_t req, const struct fuse_file_info* fi) { + assert(g_fuse); + assert(fi); + g_fuse->open_files.push_back(*fi); + return 0; +} + +void fuse_reply_none(fuse_req_t req) { + assert(g_fuse); + ++g_fuse->none_counter; +} + +int fuse_reply_statfs(fuse_req_t req, const struct statvfs* stbuf) { return 0; } + +struct fuse_context* fuse_get_context() { + assert(g_fuse); + return &g_fuse->context; +} + +} // namespace cdc_ft diff --git a/cdc_fuse_fs/mock_libfuse.h b/cdc_fuse_fs/mock_libfuse.h new file mode 100644 index 0000000..21abf7b --- /dev/null +++ b/cdc_fuse_fs/mock_libfuse.h @@ -0,0 +1,123 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_FUSE_FS_MOCK_LIBFUSE_H_ +#define CDC_FUSE_FS_MOCK_LIBFUSE_H_ + +#include +#include + +#include +#include + +namespace cdc_ft { + +// +// The interface below mimics the part of the FUSE low level interface we need. +// See include/fuse_lowlevel.h for more information. +// + +// Definitions. +using fuse_ino_t = uint64_t; +using fuse_req_t = void*; +using nlink_t = uint64_t; + +constexpr fuse_ino_t FUSE_ROOT_ID = 1; +#ifndef O_DIRECT +constexpr uint32_t O_DIRECT = 040000; +#endif + +struct fuse_entry_param { + fuse_ino_t ino; + struct stat attr; + double attr_timeout; + double entry_timeout; +}; + +struct fuse_file_info { + int flags = O_RDONLY; + unsigned int direct_io : 1; + unsigned int keep_cache : 1; + + fuse_file_info() : direct_io(0), keep_cache(0) {} + explicit fuse_file_info(int flags) + : flags(flags), direct_io(0), keep_cache(0) {} +}; + +struct fuse_forget_data { + uint64_t ino; + uint64_t nlookup; +}; + +struct fuse_context { + int uid; + int gid; +}; + +struct statvfs { + uint32_t f_bsize; + uint32_t f_namemax; +}; + +// FUSE reply/action functions. +size_t fuse_add_direntry(fuse_req_t req, char* buf, size_t bufsize, + const char* name, const struct stat* stbuf, off_t off); +int fuse_reply_attr(fuse_req_t req, const struct stat* attr, + double attr_timeout); +int fuse_reply_buf(fuse_req_t req, const char* buf, size_t size); +int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param* e); +int fuse_reply_err(fuse_req_t req, int err); +int fuse_reply_open(fuse_req_t req, const struct fuse_file_info* fi); +void fuse_reply_none(fuse_req_t req); +int fuse_reply_statfs(fuse_req_t req, const struct statvfs* stbuf); +struct fuse_context* fuse_get_context(); + +// FUSE mocking class. Basically just a recorder for the fuse_* callbacks above. +struct MockLibFuse { + public: + MockLibFuse(); + ~MockLibFuse(); + + struct Attr { + struct stat value; + double timeout; + Attr(struct stat value, double timeout) + : value(std::move(value)), timeout(timeout) {} + }; + void SetUid(int uid); + void SetGid(int gid); + + // Struct stored in the buffer |buf| by fuse_add_direntry(). + // Uses a maximum name size for simplicity. + struct DirEntry { + fuse_ino_t ino; + uint32_t mode; + char name[32]; + off_t off; + }; + + std::vector entries; + std::vector attrs; + std::vector errors; + std::vector open_files; + std::vector> buffers; + unsigned int none_counter = 0; + fuse_context context; +}; + +} // namespace cdc_ft + +#endif // CDC_FUSE_FS_MOCK_LIBFUSE_H_ diff --git a/cdc_indexer/BUILD b/cdc_indexer/BUILD new file mode 100644 index 0000000..93f4db4 --- /dev/null +++ b/cdc_indexer/BUILD @@ -0,0 +1,35 @@ +package(default_visibility = ["//visibility:public"]) + +cc_binary( + name = "cdc_indexer", + srcs = ["main.cc"], + deps = [ + ":indexer_lib", + "//absl_helper:jedec_size_flag", + "//common:path", + "@com_google_absl//absl/flags:config", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/flags:parse", + "@com_google_absl//absl/flags:usage", + "@com_google_absl//absl/random", + "@com_google_absl//absl/time", + ], +) + +cc_library( + name = "indexer_lib", + srcs = ["indexer.cc"], + hdrs = ["indexer.h"], + deps = [ + "//common:dir_iter", + "//common:path", + "//common:status_macros", + "//fastcdc", + "@com_github_blake3//:blake3", + "@com_google_absl//absl/functional:bind_front", + "@com_google_absl//absl/random", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/time", + ], +) diff --git a/cdc_indexer/README.md b/cdc_indexer/README.md new file mode 100644 index 0000000..3a57b1c --- /dev/null +++ b/cdc_indexer/README.md @@ -0,0 +1,72 @@ +# CDC Indexer + +This directory contains a CDC indexer based on our implementation of +[FastCDC](https://www.usenix.org/system/files/conference/atc16/atc16-paper-xia.pdf). + +Run the sample with Bazel: + +``` +bazel run -c opt //cdc_indexer -- --inputs '/path/to/files' +``` + +The CDC algorithm can be tweaked with a few compile-time constants for +experimentation. See the file `indexer.h` for preprocessor macros that can be +enabled, for example: + +``` +bazel build -c opt --copt=-DCDC_GEAR_TABLE=1 //cdc_indexer +``` + +At the end of the operation, the indexer outputs a summary of the results such +as the following: + +``` +00:02 7.44 GB in 2 files processed at 3.1 GB/s, 50% deduplication +Operation succeeded. + +Chunk size (min/avg/max): 128 KB / 256 KB / 1024 KB | Threads: 12 +gear_table: 64 bit | mask_s: 0x49249249249249 | mask_l: 0x1249249249 + Duration: 00:03 + Total files: 2 + Total chunks: 39203 + Unique chunks: 20692 + Total data: 9.25 GB + Unique data: 4.88 GB + Throughput: 3.07 GB/s + Avg. chunk size: 247 KB + Deduplication: 47.2% + + 160 KB ######### 1419 ( 7%) + 192 KB ######## 1268 ( 6%) + 224 KB ################### 2996 (14%) + 256 KB ######################################## 6353 (31%) + 288 KB ###################### 3466 (17%) + 320 KB ########################## 4102 (20%) + 352 KB ###### 946 ( 5%) + 384 KB 75 ( 0%) + 416 KB 27 ( 0%) + 448 KB 7 ( 0%) + 480 KB 5 ( 0%) + 512 KB 1 ( 0%) + 544 KB 4 ( 0%) + 576 KB 2 ( 0%) + 608 KB 3 ( 0%) + 640 KB 3 ( 0%) + 672 KB 3 ( 0%) + 704 KB 2 ( 0%) + 736 KB 0 ( 0%) + 768 KB 0 ( 0%) + 800 KB 1 ( 0%) + 832 KB 0 ( 0%) + 864 KB 0 ( 0%) + 896 KB 0 ( 0%) + 928 KB 0 ( 0%) + 960 KB 0 ( 0%) + 992 KB 0 ( 0%) +1024 KB 9 ( 0%) +``` + +For testing multiple combinations and comparing the results, the indexer also +features a flag `--results_file="results.csv"` which appends the raw data to the +given file in CSV format. Combine this flag with `--description` to label each +experiment with additional columns. diff --git a/cdc_indexer/indexer.cc b/cdc_indexer/indexer.cc new file mode 100644 index 0000000..c210ba7 --- /dev/null +++ b/cdc_indexer/indexer.cc @@ -0,0 +1,434 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_indexer/indexer.h" + +#include +#include +#include +#include +#include +#include + +#include "absl/functional/bind_front.h" +#include "absl/strings/str_format.h" +#include "absl/time/clock.h" +#include "blake3.h" +#include "common/dir_iter.h" +#include "common/errno_mapping.h" +#include "common/path.h" +#include "common/status_macros.h" + +namespace cdc_ft { + +struct IndexerJob { + std::string filepath; +}; + +class Indexer::Impl { + public: + Impl(const IndexerConfig& cfg, const std::vector& inputs); + const IndexerConfig& Config() const; + + // Calls the given `progress` function periodically until `SetDone(true)` is + // called. + void TriggerProgress(ProgressFn fn); + bool GetNextJob(IndexerJob* job); + + bool HasError() const; + absl::Status Error() const; + void SetError(absl::Status err); + + void SetDone(bool done); + + inline const IndexerConfig& Cfg() const { return cfg_; } + inline Indexer::OpStats Stats() const; + inline Indexer::ChunkSizeMap ChunkSizes() const; + void AddChunk(const uint8_t* data, size_t len); + void AddFile(); + + private: + friend class Indexer; + // Calculates a hash value for the given data. + inline hash_t Hash(const uint8_t* data, size_t len); + inline hash_t HashBlake3(const uint8_t* data, size_t len); + inline hash_t HashXxhash(const uint8_t* data, size_t len); + // Finds the smallest power of 2 such that the result is <= size. If size is > + // 2^31, then UINT64_MAX is returned. + inline size_t SizeBucket(size_t size) const; + + IndexerConfig cfg_; + bool done_; + // The following members are all guarded by jobs_mutex_. + std::queue inputs_; + DirectoryIterator dir_iter_; + std::mutex jobs_mutex_; + // Guarded by chunks_mutex_ + Indexer::ChunkMap chunks_; + std::mutex chunks_mutex_; + // Guarded by stats_mutex_. + Indexer::OpStats stats_; + mutable std::mutex stats_mutex_; + // Guarded by chunk_sizes_mutex_; + Indexer::ChunkSizeMap chunk_sizes_; + mutable std::mutex chunk_sizes_mutex_; + // Guarded by result_mutex_ + absl::Status result_; + mutable std::mutex result_mutex_; +}; + +class Indexer::Worker { + public: + Worker(Impl* impl); + void Run(); + + private: + absl::Status IndexFile(const std::string& filepath); + + Impl* impl_; + absl::Cord buf_; + const fastcdc::Config cdc_cfg_; +}; + +// This class holds a `Worker` object and the associated `std::thread` object +// that executes it. +class Indexer::WorkerThread { + public: + WorkerThread() : worker(nullptr), thrd(nullptr) {} + ~WorkerThread() { + if (thrd) { + if (thrd->joinable()) thrd->join(); + delete thrd; + } + if (worker) { + delete worker; + } + } + Worker* worker; + std::thread* thrd; +}; + +Indexer::Impl::Impl(const IndexerConfig& cfg, + const std::vector& inputs) + : cfg_(cfg), done_(false) { + // Perform some sanity checks on the config. + if (cfg_.num_threads == 0) + cfg_.num_threads = std::thread::hardware_concurrency(); + if (cfg_.read_block_size == 0) cfg_.read_block_size = 4 << 10; + if (cfg_.avg_chunk_size == 0) cfg_.avg_chunk_size = 256 << 10; + if (cfg_.min_chunk_size == 0 || cfg_.min_chunk_size > cfg_.avg_chunk_size) + cfg_.min_chunk_size = cfg_.avg_chunk_size >> 1; + if (cfg_.max_chunk_size == 0 || cfg_.max_chunk_size < cfg_.avg_chunk_size) + cfg_.max_chunk_size = cfg_.avg_chunk_size << 1; + if (cfg_.max_chunk_size_step == 0) + cfg_.max_chunk_size_step = + cfg_.min_chunk_size > 0 ? cfg_.min_chunk_size : 128u; + // Populate the CDC bitmasks which the Chunker creates. Only done here for + // being able to write it to the output, setting them in the IndexerConfig has + // no effect. + fastcdc::Config ccfg(cfg_.min_chunk_size, cfg_.avg_chunk_size, + cfg_.max_chunk_size); + Indexer::Chunker chunker(ccfg, nullptr); + cfg_.mask_s = chunker.Stage(0).mask; + cfg_.mask_l = chunker.Stage(chunker.StagesCount() - 1).mask; + // Collect inputs. + for (auto it = inputs.begin(); it != inputs.end(); ++it) { + inputs_.push(*it); + } +} + +const IndexerConfig& Indexer::Impl::Config() const { return cfg_; } + +// Executes the `progress` function in a loop, approximately every 200ms. Call +// `SetDone(true)` to stop this function. +void Indexer::Impl::TriggerProgress(Indexer::ProgressFn fn) { + if (!fn) return; + const int64_t interval = 200; + absl::Time started = absl::Now(); + // Keeping going until we're done or an error occured. + while (!done_ && !HasError()) { + absl::Time loop_started = absl::Now(); + stats_mutex_.lock(); + stats_.elapsed = loop_started - started; + stats_mutex_.unlock(); + + fn(Stats()); + // Aim for one update every interval. + auto loop_elapsed = absl::ToInt64Milliseconds(loop_started - absl::Now()); + if (loop_elapsed < interval) + std::this_thread::sleep_for( + std::chrono::milliseconds(interval - loop_elapsed)); + } +} + +bool Indexer::Impl::GetNextJob(IndexerJob* job) { + // Stop if an error occured. + if (HasError()) return false; + const std::lock_guard lock(jobs_mutex_); + + DirectoryEntry dent; + while (!dent.Valid()) { + // Open the next directory, if needed. + if (!dir_iter_.Valid()) { + if (inputs_.empty()) { + // We are done. + return false; + } else { + std::string input = inputs_.front(); + std::string uinput = path::ToUnix(input); + inputs_.pop(); + // Return files as jobs. + if (path::FileExists(uinput)) { + job->filepath = uinput; + return true; + } + // Otherwise read the directory. + if (!dir_iter_.Open(input, DirectorySearchFlags::kFiles)) { + // Ignore permission errors. + if (absl::IsPermissionDenied(dir_iter_.Status())) { + continue; + } + if (!dir_iter_.Status().ok()) { + SetError(dir_iter_.Status()); + } + return false; + } + } + } + if (dir_iter_.NextEntry(&dent)) { + break; + } else if (!dir_iter_.Status().ok()) { + SetError(dir_iter_.Status()); + return false; + } + } + + path::Join(&job->filepath, dir_iter_.Path(), dent.RelPathName()); + return true; +} + +void Indexer::Impl::SetDone(bool done) { done_ = done; } + +inline size_t Indexer::Impl::SizeBucket(size_t size) const { + size_t bucket = 1024; + // Go in steps of powers of two until min. chunk size is reached. + while (bucket < size && bucket < cfg_.min_chunk_size && bucket < (1llu << 63)) + bucket <<= 1; + // Go in steps of the configurable step size afterwards. + while (bucket < size && bucket < (1llu << 63)) + bucket += cfg_.max_chunk_size_step; + return bucket >= size ? bucket : UINT64_MAX; +} + +inline Indexer::OpStats Indexer::Impl::Stats() const { + const std::lock_guard lock(stats_mutex_); + return stats_; +} + +inline Indexer::ChunkSizeMap Indexer::Impl::ChunkSizes() const { + const std::lock_guard lock(chunk_sizes_mutex_); + return chunk_sizes_; +} + +Indexer::hash_t Indexer::Impl::HashBlake3(const uint8_t* data, size_t len) { + blake3_hasher state; + uint8_t out[BLAKE3_OUT_LEN]; + blake3_hasher_init(&state); + blake3_hasher_update(&state, data, len); + blake3_hasher_finalize(&state, out, BLAKE3_OUT_LEN); + return Indexer::hash_t(reinterpret_cast(out), BLAKE3_OUT_LEN); +} + +Indexer::hash_t Indexer::Impl::Hash(const uint8_t* data, size_t len) { + switch (cfg_.hash_type) { + case IndexerConfig::HashType::kNull: + return hash_t(); + case IndexerConfig::HashType::kBlake3: + return HashBlake3(data, len); + case IndexerConfig::HashType::kUndefined: + break; + } + std::cerr << "Unknown hash type" << std::endl; + return std::string(); +} + +void Indexer::Impl::AddChunk(const uint8_t* data, size_t len) { + std::string hash = Hash(data, len); + // See if the chunk already exists, insert it if not. + chunks_mutex_.lock(); + bool new_chunk = chunks_.find(hash) == chunks_.end(); + if (new_chunk) { + chunks_.emplace(hash, Chunk{hash, len}); + } + chunks_mutex_.unlock(); + + // Update the stats. + stats_mutex_.lock(); + stats_.total_bytes += len; + ++stats_.total_chunks; + if (new_chunk) { + stats_.unique_bytes += len; + ++stats_.unique_chunks; + } + stats_mutex_.unlock(); + + // Update chunk sizes distribution. + if (new_chunk) { + size_t bucket = SizeBucket(len); + chunk_sizes_mutex_.lock(); + chunk_sizes_[bucket]++; + chunk_sizes_mutex_.unlock(); + } +} + +void Indexer::Impl::AddFile() { + const std::lock_guard lock(stats_mutex_); + ++stats_.total_files; +} + +bool Indexer::Impl::HasError() const { + const std::lock_guard lock(result_mutex_); + return !result_.ok(); +} + +absl::Status Indexer::Impl::Error() const { + const std::lock_guard lock(result_mutex_); + return result_; +} + +void Indexer::Impl::SetError(absl::Status err) { + // Ignore attempts to set a non-error. + if (err.ok()) return; + const std::lock_guard lock(result_mutex_); + // Don't overwrite any previous error. + if (result_.ok()) result_ = err; +} + +Indexer::Worker::Worker(Indexer::Impl* impl) + : impl_(impl), + cdc_cfg_(impl_->Cfg().min_chunk_size, impl_->Cfg().avg_chunk_size, + impl_->Cfg().max_chunk_size) {} + +void Indexer::Worker::Run() { + IndexerJob job; + while (impl_->GetNextJob(&job)) { + absl::Status err = IndexFile(job.filepath); + if (!err.ok()) { + impl_->SetError(err); + return; + } + } +} + +absl::Status Indexer::Worker::IndexFile(const std::string& filepath) { + std::FILE* fin = std::fopen(filepath.c_str(), "rb"); + if (!fin) { + return ErrnoToCanonicalStatus( + errno, absl::StrFormat("failed to open file '%s'", filepath)); + } + path::FileCloser closer(fin); + std::fseek(fin, 0, SEEK_SET); + + auto hdlr = absl::bind_front(&Indexer::Impl::AddChunk, impl_); + Indexer::Chunker chunker(cdc_cfg_, hdlr); + + std::vector buf(impl_->Cfg().read_block_size, 0); + int err = 0; + while (!std::feof(fin)) { + size_t cnt = std::fread(buf.data(), sizeof(uint8_t), buf.size(), fin); + err = std::ferror(fin); + if (err) { + return ErrnoToCanonicalStatus( + err, absl::StrFormat("failed to read from file '%s'", filepath)); + } + if (cnt) { + chunker.Process(buf.data(), cnt); + } + } + chunker.Finalize(); + impl_->AddFile(); + + return absl::OkStatus(); +} + +IndexerConfig::IndexerConfig() + : read_block_size(32 << 10), + min_chunk_size(0), + avg_chunk_size(0), + max_chunk_size(0), + max_chunk_size_step(0), + num_threads(0), + mask_s(0), + mask_l(0) {} + +Indexer::Indexer() : impl_(nullptr) {} + +Indexer::~Indexer() { + if (impl_) delete impl_; +} + +absl::Status Indexer::Run(const IndexerConfig& cfg, + const std::vector& inputs, + Indexer::ProgressFn fn) { + if (impl_) delete impl_; + impl_ = new Impl(cfg, inputs); + + // Start the file creation workers. + std::vector workers(impl_->Config().num_threads); + for (auto it = workers.begin(); it != workers.end(); ++it) { + auto worker = new Worker(impl_); + it->worker = worker; + it->thrd = new std::thread(&Worker::Run, worker); + } + // Start the progress function worker. + std::thread prog(&Impl::TriggerProgress, impl_, fn); + + // Wait for the workers to finish. + for (auto it = workers.begin(); it != workers.end(); ++it) { + it->thrd->join(); + } + // Wait for the progress worker to finish. + impl_->SetDone(true); + prog.join(); + + return Error(); +} + +absl::Status Indexer::Error() const { + return impl_ ? impl_->Error() : absl::Status(); +} + +IndexerConfig Indexer::Config() const { + if (impl_) return impl_->Cfg(); + return IndexerConfig(); +} + +Indexer::OpStats Indexer::Stats() const { + if (impl_) return impl_->Stats(); + return Stats(); +} + +Indexer::ChunkSizeMap Indexer::ChunkSizes() const { + if (impl_) return impl_->ChunkSizes(); + return Indexer::ChunkSizeMap(); +} + +inline Indexer::OpStats::OpStats() + : total_files(0), + total_chunks(0), + unique_chunks(0), + total_bytes(0), + unique_bytes(0) {} + +}; // namespace cdc_ft diff --git a/cdc_indexer/indexer.h b/cdc_indexer/indexer.h new file mode 100644 index 0000000..2a10a86 --- /dev/null +++ b/cdc_indexer/indexer.h @@ -0,0 +1,145 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_INDEXER_INDEXER_H_ +#define CDC_INDEXER_INDEXER_H_ + +#include +#include +#include +#include + +#include "absl/status/status.h" +#include "absl/time/time.h" +#include "fastcdc/fastcdc.h" + +// Compile-time parameters for the FastCDC algorithm. +#define CDC_GEAR_32BIT 1 +#define CDC_GEAR_64BIT 2 +#ifndef CDC_GEAR_TABLE +#define CDC_GEAR_TABLE CDC_GEAR_64BIT +#endif +#ifndef CDC_MASK_STAGES +#define CDC_MASK_STAGES 7 +#endif +#ifndef CDC_MASK_BIT_LSHIFT_AMOUNT +#define CDC_MASK_BIT_LSHIFT_AMOUNT 3 +#endif + +namespace cdc_ft { + +struct IndexerConfig { + // The hash function to use. + enum class HashType { + kUndefined = 0, + // No hashing performed, always return an empty string. + kNull, + // Use BLAKE3 (cryptographic) + kBlake3, + }; + IndexerConfig(); + // Read file contents in the given block size from disk, defaults to 4K. + size_t read_block_size; + // The minimum allowed chunk size, defaults to avg_chunk_size/2. + size_t min_chunk_size; + // The target average chunk size. + size_t avg_chunk_size; + // The maximum allowed chunk size, defaults to 2*avg_chunk_size. + size_t max_chunk_size; + // Max. step size for bucketing the chunk size distribution. + size_t max_chunk_size_step; + // How many operations to run in parallel. If this value is zero, then + // `std::thread::hardware_concurrency()` is used. + uint32_t num_threads; + // Which hash function to use. + HashType hash_type; + // The masks will be populated by the indexer, setting them here has no + // effect. They are in this struct so that they can be conveniently accessed + // when printing the operation summary (and since they are derived from the + // configuration, they are technically part of it). + uint64_t mask_s; + uint64_t mask_l; +}; + +class Indexer { + public: + using hash_t = std::string; +#if CDC_GEAR_TABLE == CDC_GEAR_32BIT + typedef fastcdc::Chunker32 + Chunker; +#elif CDC_GEAR_TABLE == CDC_GEAR_64BIT + typedef fastcdc::Chunker64 + Chunker; +#else +#error "Unknown gear table" +#endif + + // Represents a chunk. + struct Chunk { + hash_t hash; + size_t size; + }; + + // Chunk storage, keyed by hash. The hash value must be mapped to a uint64_t + // value here, which is only acceptable for an experimental program like this. + typedef std::unordered_map ChunkMap; + // Used for counting number of chunks in size buckets. + typedef std::unordered_map ChunkSizeMap; + + // Statistics about the current operation. + struct OpStats { + OpStats(); + size_t total_files; + size_t total_chunks; + size_t unique_chunks; + size_t total_bytes; + size_t unique_bytes; + absl::Duration elapsed; + }; + + // Defines a callback function that can be used to display progress updates + // while the Indexer is busy. + typedef void(ProgressFn)(const OpStats& stats); + + Indexer(); + ~Indexer(); + + // Starts the indexing operation for the given configuration `cfg` and + // `inputs`. The optional callback function `fn` is called periodically with + // statistics about the ongoing operation. + absl::Status Run(const IndexerConfig& cfg, + const std::vector& inputs, ProgressFn fn); + // Returns the status of the ongoing or completed operation. + absl::Status Error() const; + // Returns the configuration that was passed to Run(). + IndexerConfig Config() const; + // Returns the statistics about the ongoing or completed operation. + OpStats Stats() const; + // Returns a map of chunk sizes to the number of occurrences. The sizes are + // combined to buckets according to the given `IndexerConfig` of the Run() + // operation. + ChunkSizeMap ChunkSizes() const; + + private: + class Impl; + class Worker; + class WorkerThread; + Impl* impl_; +}; + +}; // namespace cdc_ft + +#endif // CDC_INDEXER_INDEXER_H_ diff --git a/cdc_indexer/main.cc b/cdc_indexer/main.cc new file mode 100644 index 0000000..1a8b378 --- /dev/null +++ b/cdc_indexer/main.cc @@ -0,0 +1,435 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "absl/flags/usage.h" +#include "absl/flags/usage_config.h" +#include "absl/random/random.h" +#include "absl/status/status.h" +#include "absl/strings/match.h" +#include "absl/strings/str_format.h" +#include "absl_helper/jedec_size_flag.h" +#include "cdc_indexer/indexer.h" +#include "common/errno_mapping.h" +#include "common/path.h" + +ABSL_FLAG(std::vector, inputs, std::vector(), + "List of input files or directory to read from."); +ABSL_FLAG(uint32_t, num_threads, 0, + "How many threads should read files in parallel, use 0 to " + "auto-dertermine the best concurrency for this machine."); +ABSL_FLAG(cdc_ft::JedecSize, min_chunk_size, cdc_ft::JedecSize(0), + "The minimum chunk size to size the files into. Defaults to half of " + "the average chunk size. Supports common unit suffixes K, M, G."); +ABSL_FLAG(cdc_ft::JedecSize, avg_chunk_size, cdc_ft::JedecSize(256 << 10), + "The average chunk size to size the files into. Supports common " + "unit suffixes K, M, G."); +ABSL_FLAG(cdc_ft::JedecSize, max_chunk_size, cdc_ft::JedecSize(0), + "The maximum chunk size to size the files into. Defaults to twice " + "the average chunk size. Supports common unit suffixes K, M, G."); +ABSL_FLAG(cdc_ft::JedecSize, read_block_size, cdc_ft::JedecSize(0), + "The block size to read the input file(s) from disk. Defaults to the " + "value of --max_chunk_size. Supports common unit suffixes K, M, G."); +ABSL_FLAG(std::string, hash, "blake3", + "Which hash function to use. Supported values are \"blake3\" and " + "\"null\"."); +ABSL_FLAG(std::string, results_file, "", + "File name to append results to in CVS format."); +ABSL_FLAG(std::string, description, "", + "A descriptive string of the experiment that was run. If given, this " + "will be prepended literally to the results_file. Multiple columns " + "can be separated with commas."); + +namespace cdc_ft { +namespace { + +const char* GearTable() { + // The following macros are defined in indexer.h. +#if CDC_GEAR_TABLE == CDC_GEAR_32BIT + return "32 bit"; +#elif CDC_GEAR_TABLE == CDC_GEAR_64BIT + return "64 bit"; +#else +#error "Unknown gear table" + return "unknown"; +#endif +} + +void SetupFlagsHelp() { + absl::SetProgramUsageMessage( + "CDC indexer to measure and report data redundancy."); + absl::FlagsUsageConfig fuc; + // Filter flags to show when the --help flag is set. + fuc.contains_help_flags = [](absl::string_view f) { + return absl::EndsWith(f, "main.cc"); + }; + absl::SetFlagsUsageConfig(fuc); +} + +// Prints a human-readable representation of the given size, such as "4 KB". +template +std::string HumanBytes(T size, int precision = 0) { + const size_t threshold = 2048; + if (size < 1024) + return absl::StrFormat("%d bytes", static_cast(size)); + double s = static_cast(size) / 1024; + std::string units = "KB"; + if (s > threshold) { + s /= 1024; + units = "MB"; + } + if (s > threshold) { + s /= 1024; + units = "GB"; + } + if (s > threshold) { + s /= 1024; + units = "TB"; + } + if (s > threshold) { + s /= 1024; + units = "PB"; + } + return absl::StrFormat("%.*f %s", precision, s, units); +} + +// Prints a human-readable representation of a duration as minutes and seconds +// in the format "m:ss". +std::string HumanDuration(const absl::Duration& d) { + auto sec = absl::ToInt64Seconds(d); + return absl::StrFormat("%02d:%02d", sec / 60, std::abs(sec) % 60); +} + +std::string HashTypeToString(IndexerConfig::HashType type) { + switch (type) { + case IndexerConfig::HashType::kNull: + return "(no hashing)"; + case IndexerConfig::HashType::kBlake3: + return "BLAKE3"; + default: + return "unknown"; + } +} + +// Prints progress information on stdout. +void ShowProgress(const Indexer::OpStats& stats) { + static absl::Time op_start = absl::Now(); + static absl::Time last_progress = op_start; + static size_t last_total_bytes = 0; + + auto now = absl::Now(); + auto elapsed = now - last_progress; + if (elapsed < absl::Milliseconds(500)) return; + + double bps = + (stats.total_bytes - last_total_bytes) / absl::ToDoubleSeconds(elapsed); + double dedup_pct = (stats.total_bytes - stats.unique_bytes) / + static_cast(stats.total_bytes) * 100.0; + std::cout << '\r' << HumanDuration(now - op_start) << " " << std::setw(2) + << HumanBytes(stats.total_bytes, 2) << " in " << stats.total_files + << " files processed at " << HumanBytes(bps, 1) << "/s" + << ", " << static_cast(dedup_pct) << "% deduplication" + << std::flush; + last_progress = now; + last_total_bytes = stats.total_bytes; +} + +void ShowSummary(const IndexerConfig& cfg, const Indexer::OpStats& stats, + absl::Duration elapsed) { + const int title_w = 20; + const int num_w = 16; + double dedup_pct = (stats.total_bytes - stats.unique_bytes) / + static_cast(stats.total_bytes) * 100.0; + double bps = stats.total_bytes / absl::ToDoubleSeconds(elapsed); + std::cout << "Chunk size (min/avg/max): " << HumanBytes(cfg.min_chunk_size) + << " / " << HumanBytes(cfg.avg_chunk_size) << " / " + << HumanBytes(cfg.max_chunk_size) + << " | Hash: " << HashTypeToString(cfg.hash_type) + << " | Threads: " << cfg.num_threads << std::endl; + std::cout << "gear_table: " << GearTable() << " | mask_s: 0x" << std::hex + << cfg.mask_s << " | mask_l: 0x" << cfg.mask_l << std::dec + << std::endl; + std::cout << std::setw(title_w) << "Duration:" << std::setw(num_w) + << HumanDuration(elapsed) << std::endl; + std::cout << std::setw(title_w) << "Total files:" << std::setw(num_w) + << stats.total_files << std::endl; + std::cout << std::setw(title_w) << "Total chunks:" << std::setw(num_w) + << stats.total_chunks << std::endl; + std::cout << std::setw(title_w) << "Unique chunks:" << std::setw(num_w) + << stats.unique_chunks << std::endl; + std::cout << std::setw(title_w) << "Total data:" << std::setw(num_w) + << HumanBytes(stats.total_bytes, 2) << std::endl; + std::cout << std::setw(title_w) << "Unique data:" << std::setw(num_w) + << HumanBytes(stats.unique_bytes, 2) << std::endl; + std::cout << std::setw(title_w) << "Throughput:" << std::setw(num_w - 2) + << HumanBytes(bps, 2) << "/s" << std::endl; + std::cout << std::setw(title_w) << "Avg. chunk size:" << std::setw(num_w) + << HumanBytes(static_cast(stats.unique_bytes) / + stats.unique_chunks) + << std::endl; + std::cout << std::setw(title_w) << "Deduplication:" << std::setw(num_w - 1) + << std::setprecision(4) << dedup_pct << "%" << std::endl; +} + +void ShowChunkSize(size_t size, uint64_t cnt, uint64_t max_count, + uint64_t total_count) { + const int key_w = 7; + const int hbar_w = 40; + const int num_w = 10; + const int pct_w = 2; + + double pct = 100.0 * static_cast(cnt) / total_count; + double hscale = static_cast(cnt) / max_count; + int blocks = round(hscale * hbar_w); + + std::cout << std::setw(key_w) << HumanBytes(size) << " "; + for (int i = 0; i < blocks; i++) std::cout << "#"; + for (int i = hbar_w - blocks; i > 0; i--) std::cout << " "; + std::cout << " " << std::setw(num_w) << cnt << " (" << std::setw(pct_w) + << round(pct) << "%)" << std::endl; +} + +std::vector ChunkSizeBuckets(const IndexerConfig& cfg, + const Indexer::ChunkSizeMap& sizes, + size_t fixed_min_size, + size_t fixed_max_size, + uint64_t* max_count_out, + uint64_t* total_count_out) { + size_t min_size = 1u << 31; + size_t max_size = 0; + uint64_t max_count = 0; + uint64_t total_count = 0, found_count = 0; + uint64_t outside_min_max_count = 0; + std::vector buckets; + // Find out min/max chunk sizes + for (auto [chunk_size, count] : sizes) { + if (chunk_size < min_size) min_size = chunk_size; + if (chunk_size > max_size) max_size = chunk_size; + if (count > max_count) max_count = count; + if (chunk_size < fixed_min_size) outside_min_max_count += count; + if (fixed_max_size > 0 && chunk_size > fixed_max_size) + outside_min_max_count += count; + total_count += count; + } + if (fixed_min_size > 0) min_size = fixed_min_size; + // Use steps of powers of two until min. chunk size is reached. + uint64_t size; + uint64_t pow_end_size = std::min(cfg.min_chunk_size, max_size); + for (size = min_size; size < pow_end_size; size <<= 1) { + buckets.push_back(size); + auto it = sizes.find(size); + if (it != sizes.end()) found_count += it->second; + } + if (fixed_max_size > max_size) max_size = fixed_max_size; + // Use step increments of max_chunk_size_step afterwards. + for (; size <= max_size; size += cfg.max_chunk_size_step) { + buckets.push_back(size); + auto it = sizes.find(size); + if (it != sizes.end()) found_count += it->second; + } + // Make sure we found every bucket. + assert(total_count == found_count + outside_min_max_count); + if (max_count_out) *max_count_out = max_count; + if (total_count_out) *total_count_out = total_count; + return buckets; +} + +void ShowChunkSizes(const IndexerConfig& cfg, + const Indexer::ChunkSizeMap& sizes) { + uint64_t max_count = 0; + uint64_t total_count = 0; + auto buckets = ChunkSizeBuckets(cfg, sizes, 0, 0, &max_count, &total_count); + for (auto size : buckets) { + auto it = sizes.find(size); + uint64_t cnt = it != sizes.end() ? it->second : 0; + ShowChunkSize(size, cnt, max_count, total_count); + } +} + +absl::Status WriteResultsFile(const std::string& filepath, + const std::string& description, + const IndexerConfig& cfg, + const Indexer::OpStats& stats, + const Indexer::ChunkSizeMap& sizes) { + bool exists = path::FileExists(filepath); + std::FILE* fout = std::fopen(filepath.c_str(), "a"); + if (!fout) { + return ErrnoToCanonicalStatus( + errno, absl::StrFormat("Couldn't write to file '%s'", filepath)); + } + + path::FileCloser closer(fout); + + static constexpr int num_columns = 15; + static const char* columns[num_columns] = { + "gear_table", + "mask_s", + "mask_l", + "Min chunk size [KiB]", + "Avg chunk size [KiB]", + "Max chunk size [KiB]", + "Read speed [MiB/s]", + "Files", + "Total chunks", + "Unique chunks", + "Total size [MiB]", + "Unique size [MiB]", + "Dedup size [MiB]", + "Dedup ratio", + "Res avg chunk size [KiB]", + }; + + auto buckets = ChunkSizeBuckets(cfg, sizes, cfg.min_chunk_size, + cfg.max_chunk_size, nullptr, nullptr); + // Write column headers this is a new file. + if (!exists) { + // Write empty columns corresponding to the no. of given columns. + int desc_cols = description.empty() ? 0 : 1; + desc_cols += std::count(description.begin(), description.end(), ','); + for (int i = 0; i < desc_cols; i++) { + std::fprintf(fout, i == 0 ? "Description," : ","); + } + // Write fixed column headers. + for (int i = 0; i < num_columns; i++) { + std::fprintf(fout, "%s,", columns[i]); + } + // Write chunk distribution column headers + for (auto size : buckets) { + std::fprintf(fout, "%s,", HumanBytes(size).c_str()); + } + std::fprintf(fout, "\n"); + } + + // Count allow chunks below min_chunk_size and above max_chunk_size as they + // won't be included in the buckets list automatically. + uint64_t below_min_cnt = 0, above_max_cnt = 0; + for (auto [chunk_size, count] : sizes) { + if (chunk_size < cfg.min_chunk_size) below_min_cnt += count; + if (chunk_size > cfg.max_chunk_size) above_max_cnt += count; + } + + static constexpr double mib = static_cast(1 << 20); + + // Write user-supplied description + if (!description.empty()) std::fprintf(fout, "%s,", description.c_str()); + // Write chunking params. + std::fprintf(fout, "%s,0x%zx,0x%zx,", GearTable(), cfg.mask_s, cfg.mask_l); + std::fprintf(fout, "%zu,%zu,%zu,", cfg.min_chunk_size >> 10, + cfg.avg_chunk_size >> 10, cfg.max_chunk_size >> 10); + // Write speed, files, chunks. + double mibps = + (stats.total_bytes / mib) / absl::ToDoubleSeconds(stats.elapsed); + std::fprintf(fout, "%f,%zu,%zu,%zu,", mibps, stats.total_files, + stats.total_chunks, stats.unique_chunks); + // Write total and unique sizes. + std::fprintf(fout, "%f,%f,%f,", stats.total_bytes / mib, + stats.unique_bytes / mib, + (stats.total_bytes - stats.unique_bytes) / mib); + // Write dedup ratio and avg. chunk size. + double dedup_ratio = (stats.total_bytes - stats.unique_bytes) / + static_cast(stats.total_bytes); + size_t avg_size = stats.unique_bytes / stats.unique_chunks; + std::fprintf(fout, "%f,%zu,", dedup_ratio, avg_size >> 10); + // Write chunk distribution + size_t index = 0; + for (auto size : buckets) { + auto it = sizes.find(size); + uint64_t cnt = it != sizes.end() ? it->second : 0; + if (index == 0) { + cnt += below_min_cnt; + } else if (index + 1 == buckets.size()) { + cnt += above_max_cnt; + } + ++index; + std::fprintf(fout, "%f,", static_cast(cnt) / stats.unique_chunks); + } + std::fprintf(fout, "\n"); + return absl::OkStatus(); +} + +IndexerConfig::HashType GetHashType(const std::string name) { + if (name == "null") return IndexerConfig::HashType::kNull; + if (name == "blake3") return IndexerConfig::HashType::kBlake3; + std::cerr << "Unknown hash type: \"" << name << "\"" << std::endl; + return IndexerConfig::HashType::kUndefined; +} + +} // namespace +} // namespace cdc_ft + +int main(int argc, char* argv[]) { + cdc_ft::SetupFlagsHelp(); + absl::ParseCommandLine(argc, argv); + + std::vector inputs = absl::GetFlag(FLAGS_inputs); + + if (inputs.empty()) { + std::cout << "Execute the following command to get help on the usage:" + << std::endl + << argv[0] << " --help" << std::endl; + return 0; + } + + cdc_ft::IndexerConfig cfg; + cfg.num_threads = absl::GetFlag(FLAGS_num_threads); + cfg.min_chunk_size = absl::GetFlag(FLAGS_min_chunk_size).Size(); + cfg.avg_chunk_size = absl::GetFlag(FLAGS_avg_chunk_size).Size(); + cfg.max_chunk_size = absl::GetFlag(FLAGS_max_chunk_size).Size(); + cfg.read_block_size = absl::GetFlag(FLAGS_read_block_size).Size(); + cfg.hash_type = cdc_ft::GetHashType(absl::GetFlag(FLAGS_hash)); + + if (!cfg.min_chunk_size) cfg.min_chunk_size = cfg.avg_chunk_size >> 1; + if (!cfg.max_chunk_size) cfg.max_chunk_size = cfg.avg_chunk_size << 1; + if (!cfg.read_block_size) cfg.read_block_size = cfg.max_chunk_size; + cfg.max_chunk_size_step = std::max(cfg.min_chunk_size >> 2, 1024u); + assert(cfg.avg_chunk_size > 0); + assert(cfg.avg_chunk_size > cfg.min_chunk_size); + assert(cfg.avg_chunk_size < cfg.max_chunk_size); + assert(cfg.hash_type != cdc_ft::IndexerConfig::HashType::kUndefined); + + cdc_ft::Indexer idx; + std::cout << "Starting indexer on " << inputs.size() << " inputs." + << std::endl; + static absl::Time start = absl::Now(); + absl::Status res = idx.Run(cfg, inputs, cdc_ft::ShowProgress); + auto elapsed = absl::Now() - start; + std::cout << std::endl; + if (res.ok()) { + std::cout << "Operation succeeded." << std::endl << std::endl; + cdc_ft::ShowSummary(idx.Config(), idx.Stats(), elapsed); + std::cout << std::endl; + cdc_ft::ShowChunkSizes(idx.Config(), idx.ChunkSizes()); + std::string results_file = absl::GetFlag(FLAGS_results_file); + if (!results_file.empty()) { + res = cdc_ft::WriteResultsFile( + results_file, absl::GetFlag(FLAGS_description), idx.Config(), + idx.Stats(), idx.ChunkSizes()); + if (!res.ok()) + std::cerr << "Failed to write results to '" << results_file + << "': " << res.message() << std::endl; + } + } else { + std::cerr << "Error: (" << res.code() << ") " << res.message() << std::endl; + } + + return static_cast(res.code()); +} diff --git a/cdc_rsync/.gitignore b/cdc_rsync/.gitignore new file mode 100644 index 0000000..922e0ca --- /dev/null +++ b/cdc_rsync/.gitignore @@ -0,0 +1,4 @@ +x64/* +generated_protos +*.log +*.user \ No newline at end of file diff --git a/cdc_rsync/BUILD b/cdc_rsync/BUILD new file mode 100644 index 0000000..1c01242 --- /dev/null +++ b/cdc_rsync/BUILD @@ -0,0 +1,191 @@ +load( + "//tools:windows_cc_library.bzl", + "cc_windows_shared_library", +) + +package(default_visibility = [ + "//:__subpackages__", +]) + +cc_library( + name = "client_file_info", + hdrs = ["client_file_info.h"], +) + +cc_library( + name = "client_socket", + srcs = ["client_socket.cc"], + hdrs = ["client_socket.h"], + target_compatible_with = ["@platforms//os:windows"], + deps = [ + "//cdc_rsync/base:socket", + "//common:log", + "//common:status", + "//common:util", + ], +) + +cc_library( + name = "file_finder_and_sender", + srcs = ["file_finder_and_sender.cc"], + hdrs = ["file_finder_and_sender.h"], + target_compatible_with = ["@platforms//os:windows"], + deps = [ + ":client_file_info", + "//cdc_rsync/base:message_pump", + "//cdc_rsync/protos:messages_cc_proto", + "//common:log", + "//common:path", + "//common:path_filter", + "//common:platform", + "//common:util", + ], +) + +cc_test( + name = "file_finder_and_sender_test", + srcs = ["file_finder_and_sender_test.cc"], + data = ["testdata/root.txt"] + glob(["testdata/file_finder_and_sender/**"]), + deps = [ + ":file_finder_and_sender", + "//cdc_rsync/base:fake_socket", + "//cdc_rsync/protos:messages_cc_proto", + "//common:status_test_macros", + "//common:test_main", + "@com_google_googletest//:gtest", + "@com_google_protobuf//:protobuf_lite", + ], +) + +cc_windows_shared_library( + name = "cdc_rsync", + srcs = [ + "cdc_rsync.cc", + "cdc_rsync_client.cc", + "dllmain.cc", + ], + hdrs = [ + "cdc_rsync.h", + "cdc_rsync_client.h", + "error_messages.h", + ], + linkopts = select({ + "//tools:windows": [ + "/DEFAULTLIB:Ws2_32.lib", # Sockets, e.g. recv, send, WSA*. + ], + "//conditions:default": [], + }), + local_defines = ["COMPILING_DLL"], + target_compatible_with = ["@platforms//os:windows"], + deps = [ + ":client_socket", + ":file_finder_and_sender", + ":parallel_file_opener", + ":progress_tracker", + ":zstd_stream", + "//cdc_rsync/base:cdc_interface", + "//cdc_rsync/base:message_pump", + "//cdc_rsync/base:server_exit_code", + "//cdc_rsync/base:socket", + "//cdc_rsync/protos:messages_cc_proto", + "//common:gamelet_component", + "//common:log", + "//common:path", + "//common:path_filter", + "//common:platform", + "//common:port_manager", + "//common:process", + "//common:remote_util", + "//common:sdk_util", + "//common:status", + "//common:status_macros", + "//common:threadpool", + "//common:util", + "@com_google_absl//absl/status", + ], +) + +cc_library( + name = "parallel_file_opener", + srcs = ["parallel_file_opener.cc"], + hdrs = ["parallel_file_opener.h"], + data = ["testdata/root.txt"] + glob(["testdata/parallel_file_opener/**"]), + deps = [ + ":client_file_info", + "//common:path", + "//common:platform", + "//common:threadpool", + ], +) + +cc_test( + name = "parallel_file_opener_test", + srcs = ["parallel_file_opener_test.cc"], + deps = [ + ":parallel_file_opener", + "//common:test_main", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "progress_tracker", + srcs = ["progress_tracker.cc"], + hdrs = ["progress_tracker.h"], + deps = [ + ":file_finder_and_sender", + "//cdc_rsync/base:cdc_interface", + "//common:stopwatch", + "@com_github_jsoncpp//:jsoncpp", + "@com_google_absl//absl/strings:str_format", + ], +) + +cc_test( + name = "progress_tracker_test", + srcs = ["progress_tracker_test.cc"], + deps = [ + ":progress_tracker", + "//cdc_rsync/protos:messages_cc_proto", + "//common:test_main", + "//common:testing_clock", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "zstd_stream", + srcs = ["zstd_stream.cc"], + hdrs = ["zstd_stream.h"], + deps = [ + ":client_socket", + "//common:buffer", + "//common:status", + "//common:status_macros", + "//common:stopwatch", + "@com_github_zstd//:zstd", + ], +) + +cc_test( + name = "zstd_stream_test", + srcs = ["zstd_stream_test.cc"], + deps = [ + ":zstd_stream", + "//cdc_rsync/base:fake_socket", + "//cdc_rsync_server:unzstd_stream", + "//common:status_test_macros", + "//common:test_main", + "@com_github_zstd//:zstd", + ], +) + +filegroup( + name = "all_test_sources", + srcs = glob(["*_test.cc"]), +) + +filegroup( + name = "all_test_data", + srcs = glob(["testdata/**"]), +) diff --git a/cdc_rsync/README.md b/cdc_rsync/README.md new file mode 100644 index 0000000..5ed9428 --- /dev/null +++ b/cdc_rsync/README.md @@ -0,0 +1,5 @@ +# CDC RSync + +CDC RSync is a command line tool / library for uploading files to a remote machine in an rsync-like +fashion. It quickly skips files with matching timestamp and size, and only transfers deltas for +existing files. diff --git a/cdc_rsync/base/BUILD b/cdc_rsync/base/BUILD new file mode 100644 index 0000000..6692651 --- /dev/null +++ b/cdc_rsync/base/BUILD @@ -0,0 +1,92 @@ +package(default_visibility = [ + "//:__subpackages__", +]) + +cc_library( + name = "cdc_interface", + srcs = ["cdc_interface.cc"], + hdrs = ["cdc_interface.h"], + deps = [ + ":message_pump", + "//cdc_rsync/protos:messages_cc_proto", + "//common:buffer", + "//common:log", + "//common:path", + "//common:status", + "//common:threadpool", + "//fastcdc", + "@com_github_blake3//:blake3", + "@com_google_absl//absl/strings:str_format", + ], +) + +cc_test( + name = "cdc_interface_test", + srcs = ["cdc_interface_test.cc"], + data = ["testdata/root.txt"] + glob(["testdata/cdc_interface/**"]), + deps = [ + ":cdc_interface", + ":fake_socket", + "//common:status_test_macros", + "//common:test_main", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "fake_socket", + srcs = ["fake_socket.cc"], + hdrs = ["fake_socket.h"], + deps = [ + "//cdc_rsync/base:socket", + "@com_google_absl//absl/status", + ], +) + +cc_library( + name = "message_pump", + srcs = ["message_pump.cc"], + hdrs = ["message_pump.h"], + deps = [ + ":socket", + "//common:buffer", + "//common:log", + "//common:status", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings:str_format", + "@com_google_protobuf//:protobuf_lite", + ], +) + +cc_test( + name = "message_pump_test", + srcs = ["message_pump_test.cc"], + deps = [ + ":fake_socket", + ":message_pump", + "//cdc_rsync/protos:messages_cc_proto", + "//common:status_test_macros", + "//common:test_main", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "server_exit_code", + hdrs = ["server_exit_code.h"], +) + +cc_library( + name = "socket", + hdrs = ["socket.h"], +) + +filegroup( + name = "all_test_sources", + srcs = glob(["*_test.cc"]), +) + +filegroup( + name = "all_test_data", + srcs = glob(["testdata/**"]), +) diff --git a/cdc_rsync/base/cdc_interface.cc b/cdc_rsync/base/cdc_interface.cc new file mode 100644 index 0000000..2618582 --- /dev/null +++ b/cdc_rsync/base/cdc_interface.cc @@ -0,0 +1,670 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync/base/cdc_interface.h" + +#include + +#include "absl/strings/str_format.h" +#include "blake3.h" +#include "cdc_rsync/base/message_pump.h" +#include "cdc_rsync/protos/messages.pb.h" +#include "common/buffer.h" +#include "common/path.h" +#include "common/status.h" +#include "common/util.h" +#include "fastcdc/fastcdc.h" + +#if PLATFORM_LINUX +#include +#endif + +namespace cdc_ft { +namespace { + +// The average chunk size should be as low as possible, but not too low. +// Lower sizes mean better delta-encoding and hence less data uploads. +// However, chunking becomes slower for lower sizes. At 8 KB, a gamelet can +// still process close to 700 MB/sec, which matches hard drive speed. +// Signature data rate is another factor. The gamelet generates signature data +// at a rate of 700 MB/sec / kAvgChunkSize * sizeof(Chunk) = 1.7 MB/sec for 8 KB +// chunks. That means, the client needs at least 16 MBit download bandwidth to +// stream signatures or else this part becomes slower. 4 KB chunks would require +// a 32 MBit connection. +constexpr size_t kAvgChunkSize = 8 * 1024; +constexpr size_t kMinChunkSize = kAvgChunkSize / 2; +constexpr size_t kMaxChunkSize = kAvgChunkSize * 4; + +// This number was found by experimentally optimizing chunking throughput. +constexpr size_t kFileIoBufferSize = kMaxChunkSize * 4; + +// Limits the size of contiguous patch chunks where data is copied from the +// basis file. Necessary since the server copies chunks in one go and doesn't +// split them up (would be possible, but unnecessarily complicates code). +constexpr size_t kCombinedChunkSizeThreshold = 64 * 1024; + +// Number of hashing tasks in flight at a given point of time. +constexpr size_t kMaxNumHashTasks = 64; + +#pragma pack(push, 1) +// 16 byte hashes guarantee a sufficiently low chance of hash collisions. For +// 8 byte the chance of a hash collision is actually quite high for large files +// 0.0004% for a 100 GB file and 8 KB chunks. +struct Hash { + uint64_t low; + uint64_t high; + + bool operator==(const Hash& other) const { + return low == other.low && high == other.high; + } + bool operator!=(const Hash& other) const { return !(*this == other); } +}; +#pragma pack(pop) + +static_assert(sizeof(Hash) <= BLAKE3_OUT_LEN, ""); + +} // namespace +} // namespace cdc_ft + +namespace std { + +template <> +struct hash { + size_t operator()(const cdc_ft::Hash& hash) const { return hash.low; } +}; + +} // namespace std + +namespace cdc_ft { +namespace { + +// Send a batch of signatures every 8 MB of processed data (~90 packets per +// second at 700 MB/sec processing rate). The size of each signature batch is +// kMinNumChunksPerBatch * sizeof(Chunk), e.g. 20 KB for an avg chunk size of +// 8 KB. +constexpr int kMinSigBatchDataSize = 8 * 1024 * 1024; +constexpr int kMinNumChunksPerBatch = kMinSigBatchDataSize / kAvgChunkSize; + +// Send patch commands in batches of at least that size for efficiency. +constexpr int kPatchRequestSizeThreshold = 65536; + +// 16 bytes hash, 4 bytes size = 20 bytes. +struct Chunk { + Hash hash; + uint32_t size = 0; + Chunk(const Hash& hash, uint32_t size) : hash(hash), size(size) {} +}; + +Hash ComputeHash(const void* data, size_t size) { + assert(data); + Hash hash; + blake3_hasher hasher; + blake3_hasher_init(&hasher); + blake3_hasher_update(&hasher, data, size); + blake3_hasher_finalize(&hasher, reinterpret_cast(&hash), + sizeof(hash)); + return hash; +} + +// Task that computes hashes for a single chunk and adds the result to +// AddSignaturesResponse. +class HashTask : public Task { + public: + HashTask() {} + ~HashTask() {} + + HashTask(const HashTask& other) = delete; + HashTask& operator=(HashTask&) = delete; + + // Sets the data to compute the hash of. + // Should be called before queuing the task. + void SetData(const void* data, size_t size) { + buffer_.reserve(size); + buffer_.resize(size); + memcpy(buffer_.data(), data, size); + } + + // Appends the computed hash to |response|. + // Should be called once the task is finished. + void AppendHash(AddSignaturesResponse* response) const { + response->add_sizes(static_cast(buffer_.size())); + std::string* hashes = response->mutable_hashes(); + hashes->append(reinterpret_cast(&hash_), sizeof(hash_)); + } + + void ThreadRun(IsCancelledPredicate is_cancelled) override { + hash_ = ComputeHash(buffer_.data(), buffer_.size()); + } + + private: + Buffer buffer_; + struct Hash hash_ = {0}; +}; + +class ServerChunkReceiver { + public: + explicit ServerChunkReceiver(MessagePump* message_pump) + : message_pump_(message_pump) { + assert(message_pump_); + } + + // Receives server signature packets and places the data into a map + // (chunk hash) -> (server-side file offset). + // If |block| is false, returns immediately if no data is available. + // If |block| is true, blocks until some data is available. + // |num_server_bytes_processed| is set to the total size of the chunks + // received. + absl::Status Receive(bool block, uint64_t* num_server_bytes_processed) { + assert(num_server_bytes_processed); + *num_server_bytes_processed = 0; + + // Already all server chunks received? + if (all_chunks_received_) { + return absl::OkStatus(); + } + + // If no data is available, early out (unless blocking is requested). + if (!block && !message_pump_->CanReceive()) { + return absl::OkStatus(); + } + + // Receive signatures. + AddSignaturesResponse response; + absl::Status status = + message_pump_->ReceiveMessage(PacketType::kAddSignatures, &response); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive AddSignaturesResponse"); + } + + // Validate size of packed hashes, just in case. + const int num_chunks = response.sizes_size(); + if (response.hashes().size() != num_chunks * sizeof(Hash)) { + return MakeStatus("Bad hashes size. Expected %u. Actual %u.", + num_chunks * sizeof(Hash), response.hashes().size()); + } + + // An empty packet marks the end of the server chunks. + if (num_chunks == 0) { + all_chunks_received_ = true; + return absl::OkStatus(); + } + + // Copy the data over to |server_chunk_offsets|. + const Hash* hashes = + reinterpret_cast(response.hashes().data()); + for (int n = 0; n < num_chunks; ++n) { + uint32_t size = response.sizes(n); + chunk_offsets_.insert({hashes[n], curr_offset_}); + curr_offset_ += size; + *num_server_bytes_processed += size; + } + + return absl::OkStatus(); + } + + // True if all server chunks have been received. + bool AllChunksReceived() const { return all_chunks_received_; } + + // Returns a map (server chunk hash) -> (offset of that chunk in server file). + const std::unordered_map& ChunkOffsets() const { + return chunk_offsets_; + } + + private: + MessagePump* message_pump_; + + // Maps server chunk hashes to the file offset in the server file. + std::unordered_map chunk_offsets_; + + // Current server file offset. + uint64_t curr_offset_ = 0; + + // Whether all server files have been received. + bool all_chunks_received_ = false; +}; + +class PatchSender { + // 1 byte for source, 8 bytes for offset and 4 bytes for size. + static constexpr size_t kPatchMetadataSize = + sizeof(uint8_t) + sizeof(uint64_t) + sizeof(uint32_t); + + public: + PatchSender(FILE* file, MessagePump* message_pump) + : file_(file), message_pump_(message_pump) {} + + // Tries to send patch data for the next chunk in |client_chunks|. The class + // keeps an internal counter for the current chunk index. Patch data is not + // sent if the current client chunk is not found among the server chunks and + // there are outstanding server chunks. In that case, the method returns + // with an OK status and should be called later as soon as additional server + // chunks have been received. + // |num_client_bytes_processed| is set to the total size of the chunks added. + absl::Status TryAddChunks(const std::vector& client_chunks, + const ServerChunkReceiver& server_chunk_receiver, + uint64_t* num_client_bytes_processed) { + assert(num_client_bytes_processed); + *num_client_bytes_processed = 0; + + while (curr_chunk_idx_ < client_chunks.size()) { + const Chunk& chunk = client_chunks[curr_chunk_idx_]; + auto it = server_chunk_receiver.ChunkOffsets().find(chunk.hash); + bool exists = it != server_chunk_receiver.ChunkOffsets().end(); + + // If there are outstanding server chunks and the client hash is not + // found, do not send the patch data yet. A future server chunk might + // contain the data. + if (!exists && !server_chunk_receiver.AllChunksReceived()) { + return absl::OkStatus(); + } + + absl::Status status = exists ? AddExistingChunk(it->second, chunk.size) + : AddNewChunk(chunk.size); + if (!status.ok()) { + return WrapStatus(status, "Failed to add chunk"); + } + + ++curr_chunk_idx_; + *num_client_bytes_processed += chunk.size; + + // Break loop if all server chunks are received. Otherwise, progress + // reporting is blocked. + if (server_chunk_receiver.AllChunksReceived()) { + break; + } + } + + return absl::OkStatus(); + } + + // Sends the remaining patch commands and an EOF marker. + absl::Status Flush() { + if (request_size_ > 0) { + absl::Status status = + message_pump_->SendMessage(PacketType::kAddPatchCommands, request_); + if (!status.ok()) { + return WrapStatus(status, "Failed to send final patch commands"); + } + total_request_size_ += request_size_; + request_.Clear(); + } + + // Send an empty patch commands request as EOF marker. + absl::Status status = + message_pump_->SendMessage(PacketType::kAddPatchCommands, request_); + if (!status.ok()) { + return WrapStatus(status, "Failed to send patch commands EOF marker"); + } + + return absl::OkStatus(); + } + + // Returns the (estimated) total size of all patch data sent. + uint64_t GetTotalRequestSize() const { return total_request_size_; } + + // Index of the next client chunk. + size_t CurrChunkIdx() const { return curr_chunk_idx_; } + + private: + // Adds patch data for a client chunk that has a matching server chunk of + // given |size| at given |offset| in the server file. + absl::Status AddExistingChunk(uint64_t offset, uint32_t size) { + int last_idx = request_.sources_size() - 1; + if (last_idx >= 0 && + request_.sources(last_idx) == + AddPatchCommandsRequest::SOURCE_BASIS_FILE && + request_.offsets(last_idx) + request_.sizes(last_idx) == offset && + request_.sizes(last_idx) < kCombinedChunkSizeThreshold) { + // Same source and contiguous data -> Append to last entry. + request_.set_sizes(last_idx, request_.sizes(last_idx) + size); + } else { + // Different source or first chunk -> Create new entry. + request_.add_sources(AddPatchCommandsRequest::SOURCE_BASIS_FILE); + request_.add_offsets(offset); + request_.add_sizes(size); + request_size_ += kPatchMetadataSize; + } + + return OnChunkAdded(size); + } + + absl::Status AddNewChunk(uint32_t size) { + std::string* data = request_.mutable_data(); + int last_idx = request_.sources_size() - 1; + if (last_idx >= 0 && + request_.sources(last_idx) == AddPatchCommandsRequest::SOURCE_DATA) { + // Same source -> Append to last entry. + request_.set_sizes(last_idx, request_.sizes(last_idx) + size); + } else { + // Different source or first chunk -> Create new entry. + request_.add_sources(AddPatchCommandsRequest::SOURCE_DATA); + request_.add_offsets(data->size()); + request_.add_sizes(size); + request_size_ += kPatchMetadataSize; + } + + // Read data from client file into |data|. Be sure to restore the previous + // file offset as the chunker might still be processing the file. + size_t prev_size = data->size(); + data->resize(prev_size + size); + int64_t prev_offset = ftell64(file_); + if (fseek64(file_, file_offset_, SEEK_SET) != 0 || + fread(&(*data)[prev_size], 1, size, file_) != size || + fseek64(file_, prev_offset, SEEK_SET) != 0) { + return MakeStatus("Failed to read %u bytes at offset %u", size, + file_offset_); + } + request_size_ += size; + + return OnChunkAdded(size); + } + + absl::Status OnChunkAdded(uint32_t size) { + file_offset_ += size; + + // Send patch commands if there's enough data. + if (request_size_ > kPatchRequestSizeThreshold) { + absl::Status status = + message_pump_->SendMessage(PacketType::kAddPatchCommands, request_); + if (!status.ok()) { + return WrapStatus(status, "Failed to send patch commands"); + } + total_request_size_ += request_size_; + request_size_ = 0; + request_.Clear(); + } + + return absl::OkStatus(); + } + + FILE* file_; + MessagePump* message_pump_; + + AddPatchCommandsRequest request_; + size_t request_size_ = 0; + size_t total_request_size_ = 0; + uint64_t file_offset_ = 0; + size_t curr_chunk_idx_ = 0; +}; + +} // namespace + +CdcInterface::CdcInterface(MessagePump* message_pump) + : message_pump_(message_pump) {} + +absl::Status CdcInterface::CreateAndSendSignature(const std::string& filepath) { + absl::StatusOr file = path::OpenFile(filepath, "rb"); + if (!file.ok()) { + return file.status(); + } +#if PLATFORM_LINUX + // Tell the kernel we'll load the file sequentially (improves IO bandwidth). + posix_fadvise(fileno(*file), 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + + // Use a background thread for computing hashes on the server. + // Allocate lazily since it is not needed on the client. + // MUST NOT use more than 1 worker thread since the order of finished tasks + // would then not necessarily match the pushing order. However, the order is + // important for computing offsets. + if (!hash_pool_) hash_pool_ = std::make_unique(1); + + // |chunk_handler| is called for each CDC chunk. It pushes a hash task to the + // pool. Tasks are "recycled" from |free_tasks_|, so that buffers don't have + // to reallocated constantly. + size_t num_hash_tasks = 0; + auto chunk_handler = [pool = hash_pool_.get(), &num_hash_tasks, + free_tasks = &free_tasks_](const void* data, + size_t size) { + ++num_hash_tasks; + if (free_tasks->empty()) { + free_tasks->push_back(std::make_unique()); + } + std::unique_ptr task = std::move(free_tasks->back()); + free_tasks->pop_back(); + static_cast(task.get())->SetData(data, size); + pool->QueueTask(std::move(task)); + }; + + fastcdc::Config config(kMinChunkSize, kAvgChunkSize, kMaxChunkSize); + fastcdc::Chunker chunker(config, chunk_handler); + + AddSignaturesResponse response; + auto read_handler = [&chunker, &response, pool = hash_pool_.get(), + &num_hash_tasks, free_tasks = &free_tasks_, + message_pump = message_pump_](const void* data, + size_t size) { + chunker.Process(static_cast(data), size); + + // Finish hashing tasks. Block if there are too many of them in flight. + for (;;) { + std::unique_ptr task = num_hash_tasks >= kMaxNumHashTasks + ? pool->GetCompletedTask() + : pool->TryGetCompletedTask(); + if (!task) break; + num_hash_tasks--; + static_cast(task.get())->AppendHash(&response); + free_tasks->push_back(std::move(task)); + } + + // Send data if we have enough chunks. + if (response.sizes_size() >= kMinNumChunksPerBatch) { + absl::Status status = + message_pump->SendMessage(PacketType::kAddSignatures, response); + if (!status.ok()) { + return WrapStatus(status, "Failed to send signatures"); + } + response.Clear(); + } + + return absl::OkStatus(); + }; + + absl::Status status = + path::StreamReadFileContents(*file, kFileIoBufferSize, read_handler); + fclose(*file); + if (!status.ok()) { + return WrapStatus(status, "Failed to compute signatures"); + } + chunker.Finalize(); + + // Finish hashing tasks. + hash_pool_->Wait(); + std::unique_ptr task = hash_pool_->TryGetCompletedTask(); + while (task) { + static_cast(task.get())->AppendHash(&response); + free_tasks_.push_back(std::move(task)); + task = hash_pool_->TryGetCompletedTask(); + } + + // Send the remaining chunks, if any. + if (response.sizes_size() > 0) { + status = message_pump_->SendMessage(PacketType::kAddSignatures, response); + if (!status.ok()) { + return WrapStatus(status, "Failed to send final signatures"); + } + response.Clear(); + } + + // Send an empty response as EOF marker. + status = message_pump_->SendMessage(PacketType::kAddSignatures, response); + if (!status.ok()) { + return WrapStatus(status, "Failed to send signatures EOF marker"); + } + + return absl::OkStatus(); +} + +absl::Status CdcInterface::ReceiveSignatureAndCreateAndSendDiff( + FILE* file, ReportCdcProgress* progress) { + // + // Compute signatures from client |file| and send patches while receiving + // server signatures. + // + std::vector client_chunks; + ServerChunkReceiver server_chunk_receiver(message_pump_); + PatchSender patch_sender(file, message_pump_); + + auto chunk_handler = [&client_chunks](const void* data, size_t size) { + client_chunks.emplace_back(ComputeHash(data, size), + static_cast(size)); + }; + + fastcdc::Config config(kMinChunkSize, kAvgChunkSize, kMaxChunkSize); + fastcdc::Chunker chunker(config, chunk_handler); + + uint64_t file_size = 0; + auto read_handler = [&chunker, &client_chunks, &server_chunk_receiver, + &file_size, progress, + &patch_sender](const void* data, size_t size) { + // Process client chunks for the data read. + chunker.Process(static_cast(data), size); + file_size += size; + + const bool all_client_chunks_read = data == nullptr; + if (all_client_chunks_read) { + chunker.Finalize(); + } + + do { + // Receive any server chunks available. + uint64_t num_server_bytes_processed = 0; + absl::Status status = server_chunk_receiver.Receive( + /*block=*/all_client_chunks_read, &num_server_bytes_processed); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive server chunks"); + } + + // Try to send patch data. + uint64_t num_client_bytes_processed = 0; + status = patch_sender.TryAddChunks(client_chunks, server_chunk_receiver, + &num_client_bytes_processed); + if (!status.ok()) { + return WrapStatus(status, "Failed to send patch data"); + } + + progress->ReportSyncProgress(num_client_bytes_processed, + num_server_bytes_processed); + } while (all_client_chunks_read && + (!server_chunk_receiver.AllChunksReceived() || + patch_sender.CurrChunkIdx() < client_chunks.size())); + + return absl::OkStatus(); + }; + + absl::Status status = + path::StreamReadFileContents(file, kFileIoBufferSize, read_handler); + if (!status.ok()) { + return WrapStatus(status, "Failed to stream file"); + } + + // Should have sent all client chunks by now. + assert(patch_sender.CurrChunkIdx() == client_chunks.size()); + + // Flush remaining patches. + status = patch_sender.Flush(); + if (!status.ok()) { + return WrapStatus(status, "Failed to flush patches"); + } + + return absl::OkStatus(); +} + +absl::Status CdcInterface::ReceiveDiffAndPatch( + const std::string& basis_filepath, FILE* patched_file, + bool* is_executable) { + Buffer buffer; + *is_executable = false; + + absl::StatusOr basis_file = path::OpenFile(basis_filepath, "rb"); + if (!basis_file.ok()) { + return basis_file.status(); + } +#if PLATFORM_LINUX + // Tell the kernel we'll load the file sequentially (improves IO bandwidth). + // It is not strictly true that the basis file is accessed sequentially, but + // for larger parts of this file this should be the case. + posix_fadvise(fileno(*basis_file), 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + + bool first_chunk = true; + for (;;) { + AddPatchCommandsRequest request; + absl::Status status = + message_pump_->ReceiveMessage(PacketType::kAddPatchCommands, &request); + if (!status.ok()) { + fclose(*basis_file); + return WrapStatus(status, "Failed to receive AddPatchCommandsRequest"); + } + + // All arrays must be of the same size. + int num_chunks = request.sources_size(); + if (num_chunks != request.offsets_size() || + num_chunks != request.sizes_size()) { + fclose(*basis_file); + return MakeStatus( + "Corrupted patch command arrays: Expected sizes %i. Actual %i/%i.", + num_chunks, request.offsets_size(), request.sizes_size()); + } + + if (num_chunks == 0) { + // A zero-size request marks the end of patch commands. + break; + } + + for (int n = 0; n < num_chunks; ++n) { + AddPatchCommandsRequest::Source source = request.sources(n); + uint64_t chunk_offset = request.offsets(n); + uint32_t chunk_size = request.sizes(n); + + const char* chunk_data = nullptr; + if (source == AddPatchCommandsRequest::SOURCE_BASIS_FILE) { + // Copy [chunk_offset, chunk_offset + chunk_size) from |basis_file|. + buffer.resize(chunk_size); + if (fseek64(*basis_file, chunk_offset, SEEK_SET) != 0 || + fread(buffer.data(), 1, chunk_size, *basis_file) != chunk_size) { + fclose(*basis_file); + return MakeStatus( + "Failed to read %u bytes at offset %u from basis file", + chunk_size, chunk_offset); + } + chunk_data = buffer.data(); + } else { + // Write [chunk_offset, chunk_offset + chunk_size) from request data. + assert(source == AddPatchCommandsRequest::SOURCE_DATA); + if (request.data().size() < chunk_offset + chunk_size) { + fclose(*basis_file); + return MakeStatus( + "Insufficient data in patch commands. Required %u. Actual %u.", + chunk_offset + chunk_size, request.data().size()); + } + chunk_data = &request.data()[chunk_offset]; + } + + if (first_chunk && chunk_size > 0) { + first_chunk = false; + *is_executable = Util::IsExecutable(chunk_data, chunk_size); + } + if (fwrite(chunk_data, 1, chunk_size, patched_file) != chunk_size) { + fclose(*basis_file); + return MakeStatus("Failed to write %u bytes to patched file", + chunk_size); + } + } + } + fclose(*basis_file); + + return absl::OkStatus(); +} + +} // namespace cdc_ft diff --git a/cdc_rsync/base/cdc_interface.h b/cdc_rsync/base/cdc_interface.h new file mode 100644 index 0000000..cbe0350 --- /dev/null +++ b/cdc_rsync/base/cdc_interface.h @@ -0,0 +1,73 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_BASE_CDC_INTERFACE_H_ +#define CDC_RSYNC_BASE_CDC_INTERFACE_H_ + +#include + +#include "absl/status/status.h" +#include "common/threadpool.h" + +namespace cdc_ft { + +class MessagePump; + +class ReportCdcProgress { + public: + virtual ~ReportCdcProgress() = default; + virtual void ReportSyncProgress(size_t num_client_bytes_processed, + size_t num_server_bytes_processed) = 0; +}; + +// Creates signatures, diffs and patches files. Abstraction layer for fastcdc +// chunking and blake3 hashing. +class CdcInterface { + public: + explicit CdcInterface(MessagePump* message_pump); + + // Creates the signature of the file at |filepath| and sends it to the socket. + // Typically called on the server. + absl::Status CreateAndSendSignature(const std::string& filepath); + + // Receives the server-side signature of |file| from the socket, creates diff + // data using the signature and the file, and sends the diffs to the socket. + // Typically called on the client. + absl::Status ReceiveSignatureAndCreateAndSendDiff( + FILE* file, ReportCdcProgress* progress); + + // Receives diffs from the socket and patches the file at |basis_filepath|. + // The patched data is written to |patched_file|, which must be open in "wb" + // mode. Sets |is_executable| to true if the patched file is an executable + // (based on magic headers). + // Typically called on the server. + absl::Status ReceiveDiffAndPatch(const std::string& basis_filepath, + FILE* patched_file, bool* is_executable); + + private: + MessagePump* const message_pump_; + + // Thread pool for computing chunk hashes. + std::unique_ptr hash_pool_; + + // List of unused hash computation tasks. Tasks are reused by the hash pool + // in order to prevent buffer reallocation. + std::vector> free_tasks_; +}; + +} // namespace cdc_ft + +#endif // CDC_RSYNC_BASE_CDC_INTERFACE_H_ diff --git a/cdc_rsync/base/cdc_interface_test.cc b/cdc_rsync/base/cdc_interface_test.cc new file mode 100644 index 0000000..b8a480f --- /dev/null +++ b/cdc_rsync/base/cdc_interface_test.cc @@ -0,0 +1,118 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync/base/cdc_interface.h" + +#include +#include + +#include "cdc_rsync/base/fake_socket.h" +#include "cdc_rsync/base/message_pump.h" +#include "common/log.h" +#include "common/path.h" +#include "common/status_test_macros.h" +#include "common/test_main.h" +#include "gtest/gtest.h" + +namespace cdc_ft { +namespace { + +class FakeCdcProgress : public ReportCdcProgress { + public: + void ReportSyncProgress(uint64_t num_client_bytes_processed, + uint64_t num_server_bytes_processed) override { + total_client_bytes_processed += num_client_bytes_processed; + total_server_bytes_processed += num_server_bytes_processed; + } + + uint64_t total_client_bytes_processed = 0; + uint64_t total_server_bytes_processed = 0; +}; + +class CdcInterfaceTest : public ::testing::Test { + public: + void SetUp() override { + Log::Initialize(std::make_unique(LogLevel::kInfo)); + message_pump_.StartMessagePump(); + } + + void TearDown() override { + socket_.ShutdownSendingEnd(); + message_pump_.StopMessagePump(); + Log::Shutdown(); + } + + protected: + FakeSocket socket_; + MessagePump message_pump_{&socket_, MessagePump::PacketReceivedDelegate()}; + + std::string base_dir_ = GetTestDataDir("cdc_interface"); +}; + +TEST_F(CdcInterfaceTest, SyncTest) { + CdcInterface cdc(&message_pump_); + FakeCdcProgress progress; + + const std::string old_filepath = path::Join(base_dir_, "old_file.txt"); + const std::string new_filepath = path::Join(base_dir_, "new_file.txt"); + const std::string patched_filepath = + path::Join(base_dir_, "patched_file.txt"); + + path::Stats old_stats; + EXPECT_OK(path::GetStats(old_filepath, &old_stats)); + + path::Stats new_stats; + EXPECT_OK(path::GetStats(new_filepath, &new_stats)); + + // Create signature of old file and send it to the fake socket (it'll just + // send it to itself). + EXPECT_OK(cdc.CreateAndSendSignature(old_filepath)); + + // Receive the signature from the fake socket, generate the diff to the file + // at |new_filepath| and send it to the socket again. + absl::StatusOr new_file = path::OpenFile(new_filepath, "rb"); + EXPECT_OK(new_file); + EXPECT_OK(cdc.ReceiveSignatureAndCreateAndSendDiff(*new_file, &progress)); + fclose(*new_file); + + // Receive the diff from the fake socket and create a patched file. + std::FILE* patched_file = std::tmpfile(); + ASSERT_TRUE(patched_file != nullptr); + bool is_executable = false; + EXPECT_OK( + cdc.ReceiveDiffAndPatch(old_filepath, patched_file, &is_executable)); + EXPECT_FALSE(is_executable); + + // Read new file. + std::ifstream new_file_stream(new_filepath.c_str(), std::ios::binary); + std::vector new_file_data( + std::istreambuf_iterator(new_file_stream), {}); + + // Read patched file. + fseek(patched_file, 0, SEEK_END); + std::vector patched_file_data(ftell(patched_file)); + fseek(patched_file, 0, SEEK_SET); + fread(patched_file_data.data(), 1, patched_file_data.size(), patched_file); + + // New and patched file should be equal now. + EXPECT_EQ(patched_file_data, new_file_data); + fclose(patched_file); + + // Verify progress tracker. + EXPECT_EQ(progress.total_server_bytes_processed, old_stats.size); + EXPECT_EQ(progress.total_client_bytes_processed, new_stats.size); +} + +} // namespace +} // namespace cdc_ft diff --git a/cdc_rsync/base/fake_socket.cc b/cdc_rsync/base/fake_socket.cc new file mode 100644 index 0000000..cbd42be --- /dev/null +++ b/cdc_rsync/base/fake_socket.cc @@ -0,0 +1,70 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync/base/fake_socket.h" + +namespace cdc_ft { + +FakeSocket::FakeSocket() = default; + +FakeSocket::~FakeSocket() = default; + +absl::Status FakeSocket::Send(const void* buffer, size_t size) { + // Wait until we can send again. + std::unique_lock suspend_lock(suspend_mutex_); + suspend_cv_.wait(suspend_lock, [this]() { return !sending_suspended_; }); + suspend_lock.unlock(); + + std::unique_lock lock(data_mutex_); + data_.append(static_cast(buffer), size); + lock.unlock(); + data_cv_.notify_all(); + return absl::OkStatus(); +} + +absl::Status FakeSocket::Receive(void* buffer, size_t size, + bool allow_partial_read, + size_t* bytes_received) { + *bytes_received = 0; + std::unique_lock lock(data_mutex_); + data_cv_.wait(lock, [this, size, allow_partial_read]() { + return allow_partial_read || data_.size() >= size || shutdown_; + }); + if (shutdown_) { + return absl::UnavailableError("Pipe is shut down"); + } + size_t to_copy = std::min(size, data_.size()); + memcpy(buffer, data_.data(), to_copy); + *bytes_received = to_copy; + + // This is horribly inefficent, but should be OK in a fake. + data_.erase(0, to_copy); + return absl::OkStatus(); +} + +void FakeSocket::ShutdownSendingEnd() { + std::unique_lock lock(data_mutex_); + shutdown_ = true; + lock.unlock(); + data_cv_.notify_all(); +} + +void FakeSocket::SuspendSending(bool suspended) { + std::unique_lock lock(suspend_mutex_); + sending_suspended_ = suspended; + lock.unlock(); + suspend_cv_.notify_all(); +} + +} // namespace cdc_ft diff --git a/cdc_rsync/base/fake_socket.h b/cdc_rsync/base/fake_socket.h new file mode 100644 index 0000000..9ef3f53 --- /dev/null +++ b/cdc_rsync/base/fake_socket.h @@ -0,0 +1,57 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_BASE_FAKE_SOCKET_H_ +#define CDC_RSYNC_BASE_FAKE_SOCKET_H_ + +#include +#include + +#include "absl/status/status.h" +#include "cdc_rsync/base/socket.h" + +namespace cdc_ft { + +// Fake socket that receives the same data it sends. +class FakeSocket : public Socket { + public: + FakeSocket(); + ~FakeSocket(); + + // Socket: + absl::Status Send(const void* buffer, size_t size) override; // thread-safe + absl::Status Receive(void* buffer, size_t size, bool allow_partial_read, + size_t* bytes_received) override; // thread-safe + + void ShutdownSendingEnd(); + + // If set to true, blocks on Send() until it is set to false again. + void SuspendSending(bool suspended); + + private: + std::mutex data_mutex_; + std::condition_variable data_cv_; + std::string data_; + bool shutdown_ = false; + + bool sending_suspended_ = false; + std::mutex suspend_mutex_; + std::condition_variable suspend_cv_; +}; + +} // namespace cdc_ft + +#endif // CDC_RSYNC_BASE_FAKE_SOCKET_H_ diff --git a/cdc_rsync/base/message_pump.cc b/cdc_rsync/base/message_pump.cc new file mode 100644 index 0000000..08436f1 --- /dev/null +++ b/cdc_rsync/base/message_pump.cc @@ -0,0 +1,473 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync/base/message_pump.h" + +#include "absl/status/status.h" +#include "absl/strings/str_format.h" +#include "cdc_rsync/base/socket.h" +#include "common/buffer.h" +#include "common/log.h" +#include "common/status.h" +#include "google/protobuf/message_lite.h" + +namespace cdc_ft { +namespace { + +// Max total size of messages in the packet queues. +// If exdeeded, Send/Receive methods start blocking. +uint64_t kInOutBufferSize = 1024 * 1024 * 8; + +// Header is 1 byte type, 3 bytes size. +constexpr size_t kHeaderSize = 4; + +// Size is compressed to 3 bytes. +constexpr uint32_t kMaxPacketSize = 256 * 256 * 256 - 1; + +// Creates a packet of size |kHeaderSize| + |size| and sets the header. +absl::Status CreateSerializedPacket(PacketType type, size_t size, + Buffer* serialized_packet) { + if (size > kMaxPacketSize) { + return MakeStatus("Max packet size exceeded: %u", size); + } + + serialized_packet->clear(); + serialized_packet->reserve(kHeaderSize + size); + + // Header is 1 byte type, 3 bytes size. + static_assert(static_cast(PacketType::kCount) <= 256, ""); + static_assert(kMaxPacketSize < 256 * 256 * 256, ""); + static_assert(kHeaderSize == 4, ""); + + uint8_t header[] = {static_cast(type), + static_cast(size & 0xFF), + static_cast((size >> 8) & 0xFF), + static_cast((size >> 16) & 0xFF)}; + serialized_packet->append(header, sizeof(header)); + return absl::OkStatus(); +} + +#define HANDLE_PACKET_TYPE(type) \ + case PacketType::type: \ + return #type; + +const char* PacketTypeName(PacketType type) { + if (type > PacketType::kCount) { + return ""; + } + + switch (type) { + HANDLE_PACKET_TYPE(kRawData) + HANDLE_PACKET_TYPE(kTest) + HANDLE_PACKET_TYPE(kSetOptions) + HANDLE_PACKET_TYPE(kToggleCompression) + HANDLE_PACKET_TYPE(kAddFiles) + HANDLE_PACKET_TYPE(kSendFileStats) + HANDLE_PACKET_TYPE(kAddFileIndices) + HANDLE_PACKET_TYPE(kSendMissingFileData) + HANDLE_PACKET_TYPE(kAddSignatures) + HANDLE_PACKET_TYPE(kAddPatchCommands) + HANDLE_PACKET_TYPE(kAddDeletedFiles) + HANDLE_PACKET_TYPE(kShutdown) + HANDLE_PACKET_TYPE(kCount) + } + + return ""; +} + +#undef HANDLE_PACKET_TYPE + +} // namespace + +MessagePump::MessagePump(Socket* socket, PacketReceivedDelegate packet_received) + : socket_(socket), + packet_received_(packet_received), + creation_thread_id_(std::this_thread::get_id()) { + assert(socket_ != nullptr); +} + +MessagePump::~MessagePump() { StopMessagePump(); } + +void MessagePump::StartMessagePump() { + assert(creation_thread_id_ == std::this_thread::get_id()); + + message_sender_thread_ = std::thread([this]() { ThreadSenderMain(); }); + message_receiver_thread_ = std::thread([this]() { ThreadReceiverMain(); }); +} + +void MessagePump::StopMessagePump() { + assert(creation_thread_id_ == std::this_thread::get_id()); + + if (shutdown_) { + return; + } + + FlushOutgoingQueue(); + + { + absl::MutexLock outgoing_lock(&outgoing_mutex_); + absl::MutexLock incoming_lock(&incoming_mutex_); + shutdown_ = true; + } + + if (message_sender_thread_.joinable()) { + message_sender_thread_.join(); + } + + if (message_receiver_thread_.joinable()) { + message_receiver_thread_.join(); + } +} + +absl::Status MessagePump::SendRawData(const void* data, size_t size) { + Buffer serialized_packet; + absl::Status status = + CreateSerializedPacket(PacketType::kRawData, size, &serialized_packet); + if (!status.ok()) { + return status; + } + const uint8_t* u8_data = static_cast(data); + serialized_packet.append(u8_data, size); + return QueuePacket(std::move(serialized_packet)); +} + +absl::Status MessagePump::SendMessage( + PacketType type, const google::protobuf::MessageLite& message) { + Buffer serialized_packet; + size_t size = message.ByteSizeLong(); + absl::Status status = CreateSerializedPacket(type, size, &serialized_packet); + if (!status.ok()) { + return status; + } + + // Serialize the message directly into the packet. + serialized_packet.resize(kHeaderSize + size); + if (size > 0 && + !message.SerializeToArray(serialized_packet.data() + kHeaderSize, + static_cast(size))) { + return MakeStatus("Failed to serialize message to array"); + } + + return QueuePacket(std::move(serialized_packet)); +} + +absl::Status MessagePump::QueuePacket(Buffer&& serialize_packet) { + // Wait a little if the max queue size is exceeded. + absl::MutexLock outgoing_lock(&outgoing_mutex_); + auto cond = [this]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(outgoing_mutex_) { + return outgoing_packets_byte_size_ < kInOutBufferSize || send_error_ || + receive_error_; + }; + outgoing_mutex_.Await(absl::Condition(&cond)); + + // There could be a race where send_error_ is set to true after this, but + // that's OK. + if (send_error_ || receive_error_) { + absl::MutexLock status_lock(&status_mutex_); + return WrapStatus(status_, + "Failed to send packet. Message pump thread is down"); + } + + // Put packet into outgoing queue. + outgoing_packets_byte_size_ += serialize_packet.size(); + outgoing_packets_.push(std::move(serialize_packet)); + + return absl::OkStatus(); +} + +absl::Status MessagePump::ThreadDoSendPacket(Buffer&& serialized_packet) { + if (receive_error_) { + // Just eat the packet if there was a receive error as the other side is + // probably down and won't read packets anymore. + return absl::OkStatus(); + } + + if (output_handler_) { + // Redirect output, don't send to socket. + absl::Status status = + output_handler_(serialized_packet.data(), serialized_packet.size()); + return WrapStatus(status, "Output handler failed"); + } + + absl::Status status = + socket_->Send(serialized_packet.data(), serialized_packet.size()); + if (!status.ok()) { + return WrapStatus(status, "Failed to send packet of size %u", + serialized_packet.size()); + } + + LOG_VERBOSE("Sent packet of size %u (total buffer: %u)", + serialized_packet.size(), outgoing_packets_byte_size_.load()); + + return absl::OkStatus(); +} + +absl::Status MessagePump::ReceiveRawData(Buffer* data) { + Packet packet; + absl::Status status = DequeuePacket(&packet); + if (!status.ok()) { + return WrapStatus(status, "Failed to dequeue packet"); + } + + if (packet.type != PacketType::kRawData) { + return MakeStatus("Unexpected packet type %s. Expected kRawData.", + PacketTypeName(packet.type)); + } + + *data = std::move(packet.data); + return absl::OkStatus(); +} + +absl::Status MessagePump::ReceiveMessage( + PacketType type, google::protobuf::MessageLite* message) { + Packet packet; + absl::Status status = DequeuePacket(&packet); + if (!status.ok()) { + return WrapStatus(status, "Failed to dequeue packet"); + } + + if (packet.type != type) { + return MakeStatus("Unexpected packet type %s. Expected %s.", + PacketTypeName(packet.type), PacketTypeName(type)); + } + + if (!message->ParseFromArray(packet.data.data(), + static_cast(packet.data.size()))) { + return MakeStatus("Failed to parse packet of type %s and size %u", + PacketTypeName(packet.type), packet.data.size()); + } + return absl::OkStatus(); +} + +absl::Status MessagePump::DequeuePacket(Packet* packet) { + // Wait for a packet to be available. + absl::MutexLock incoming_lock(&incoming_mutex_); + auto cond = [this]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(incoming_mutex_) { + return !incoming_packets_.empty() || send_error_ || receive_error_; + }; + incoming_mutex_.Await(absl::Condition(&cond)); + + // If receive_error_ is true, do not return an error until |incoming_packets_| + // is empty and all valid packets have been returned. This way, the error + // shows up for the packet that failed to be received. + if (send_error_ || (receive_error_ && incoming_packets_.empty())) { + absl::MutexLock status_lock(&status_mutex_); + return WrapStatus(status_, "Message pump thread is down"); + } + + // Grab packet from incoming queue. + *packet = std::move(incoming_packets_.front()); + incoming_packets_.pop(); + + // Update byte size. + incoming_packets_byte_size_ -= kHeaderSize + packet->data.size(); + + return absl::OkStatus(); +} + +absl::Status MessagePump::ThreadDoReceivePacket(Packet* packet) { + // Read type and size in one go for performance reasons. + uint8_t header[kHeaderSize]; + absl::Status status = ThreadDoReceive(&header, kHeaderSize); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive packet of size %u", + kHeaderSize); + } + + static_assert(kHeaderSize == 4, ""); + + uint8_t packet_type = header[0]; + uint32_t packet_size = static_cast(header[1]) | + (static_cast(header[2]) << 8) | + (static_cast(header[3]) << 16); + + if (packet_type >= static_cast(PacketType::kCount)) { + return MakeStatus("Invalid packet type: %u", packet_type); + } + packet->type = static_cast(packet_type); + + if (packet_size > kMaxPacketSize) { + return MakeStatus("Max packet size exceeded: %u", packet_size); + } + + packet->data.resize(packet_size); + status = ThreadDoReceive(packet->data.data(), packet_size); + if (!status.ok()) { + return WrapStatus(status, "Failed to read packet data of size %u", + packet_size); + } + + LOG_VERBOSE("Received packet of size %u (total buffer: %u)", packet_size, + incoming_packets_byte_size_.load()); + + return absl::OkStatus(); +} + +absl::Status MessagePump::ThreadDoReceive(void* buffer, size_t size) { + if (size == 0) { + return absl::OkStatus(); + } + + if (input_reader_) { + size_t bytes_read = 0; + bool eof = false; + absl::Status status = input_reader_->Read(buffer, size, &bytes_read, &eof); + if (eof) { + input_reader_.reset(); + } + if (!status.ok()) { + return status; + } + + // |input_reader_| should read |size| bytes unless |eof| is hit. + assert(bytes_read == size || eof); + + // Since this method never reads across packet boundaries and since packets + // should not be partially received through |input_reader_|, it is an error + // if there's a partial read on EOF. + if (eof && (bytes_read > 0 && bytes_read < size)) { + return MakeStatus("EOF after partial read of %u / %u bytes", bytes_read, + size); + } + + // Special case, might happen if |input_reader_| was an unzip stream and the + // last read stopped right before zlib's EOF marker. Fall through to reading + // uncompressed data in that case. + if (bytes_read == size) { + return absl::OkStatus(); + } + + assert(eof && bytes_read == 0); + } + + size_t unused; + return socket_->Receive(buffer, size, /*allow_partial_read=*/false, &unused); +} + +void MessagePump::FlushOutgoingQueue() { + absl::MutexLock outgoing_lock(&outgoing_mutex_); + auto cond = [this]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(outgoing_mutex_) { + return outgoing_packets_byte_size_ == 0 || send_error_ || receive_error_; + }; + outgoing_mutex_.Await(absl::Condition(&cond)); +} + +void MessagePump::RedirectInput(std::unique_ptr input_reader) { + assert(std::this_thread::get_id() == message_receiver_thread_.get_id()); + assert(input_reader); + + if (input_reader_) { + LOG_WARNING("Input reader already set"); + return; + } + + input_reader_ = std::move(input_reader); +} + +void MessagePump::RedirectOutput(OutputHandler output_handler) { + FlushOutgoingQueue(); + output_handler_ = std::move(output_handler); +} + +size_t MessagePump::GetNumOutgoingPackagesForTesting() { + absl::MutexLock outgoing_lock(&outgoing_mutex_); + return outgoing_packets_.size(); +} + +size_t MessagePump::GetMaxInOutBufferSizeForTesting() { + return kInOutBufferSize; +} + +size_t MessagePump::GetMaxPacketSizeForTesting() { return kMaxPacketSize; } + +void MessagePump::ThreadSenderMain() { + while (!send_error_) { + Buffer serialized_packet; + size_t size; + { + // Wait for a packet to be available. + absl::MutexLock outgoing_lock(&outgoing_mutex_); + auto cond = [this]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(outgoing_mutex_) { + return outgoing_packets_.size() > 0 || shutdown_; + }; + outgoing_mutex_.Await(absl::Condition(&cond)); + if (shutdown_) { + break; + } + + // Grab packet from outgoing queue. + serialized_packet = std::move(outgoing_packets_.front()); + size = serialized_packet.size(); + outgoing_packets_.pop(); + } + + // Send data. This blocks until all data is submitted. + absl::Status status = ThreadDoSendPacket(std::move(serialized_packet)); + if (!status.ok()) { + { + absl::MutexLock status_lock(&status_mutex_); + status_ = WrapStatus(status, "Failed to send packet"); + } + absl::MutexLock outgoing_lock(&outgoing_mutex_); + absl::MutexLock incoming_lock(&incoming_mutex_); + send_error_ = true; + break; + } + + // Decrease AFTER sending, this is important for FlushOutgoingQueue(). + absl::MutexLock outgoing_lock(&outgoing_mutex_); + outgoing_packets_byte_size_ -= size; + } +} + +void MessagePump::ThreadReceiverMain() { + while (!receive_error_) { + // Wait for a packet to be available. + { + absl::MutexLock incoming_lock(&incoming_mutex_); + auto cond = [this]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(incoming_mutex_) { + return incoming_packets_byte_size_ < kInOutBufferSize || shutdown_; + }; + incoming_mutex_.Await(absl::Condition(&cond)); + if (shutdown_) { + break; + } + } + + // Receive packet. This blocks until data is available. + Packet packet; + absl::Status status = ThreadDoReceivePacket(&packet); + if (!status.ok()) { + { + absl::MutexLock status_lock(&status_mutex_); + status_ = WrapStatus(status, "Failed to receive packet"); + } + absl::MutexLock outgoing_lock(&outgoing_mutex_); + absl::MutexLock incoming_lock(&incoming_mutex_); + receive_error_ = true; + break; + } + + if (packet_received_) { + packet_received_(packet.type); + } + + // Queue the packet for receiving. + absl::MutexLock incoming_lock(&incoming_mutex_); + incoming_packets_byte_size_ += kHeaderSize + packet.data.size(); + incoming_packets_.push(std::move(packet)); + } +} + +} // namespace cdc_ft diff --git a/cdc_rsync/base/message_pump.h b/cdc_rsync/base/message_pump.h new file mode 100644 index 0000000..b9faaa9 --- /dev/null +++ b/cdc_rsync/base/message_pump.h @@ -0,0 +1,275 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_BASE_MESSAGE_PUMP_H_ +#define CDC_RSYNC_BASE_MESSAGE_PUMP_H_ + +#include +#include + +#include "absl/base/thread_annotations.h" +#include "absl/status/status.h" +#include "absl/synchronization/mutex.h" +#include "common/buffer.h" + +namespace google { +namespace protobuf { +class MessageLite; +} +} // namespace google + +namespace cdc_ft { + +class Socket; + +// See messages.proto. When sending a kXXXRequest from client to server or a +// kXXXResponse from server to client, use packet type kXXX. See messages.proto. +enum class PacketType { + // Not a proto, just raw bytes. + kRawData = 0, + + // Used for testing. + kTest, + + // Send options to server. + kSetOptions, + + // Toggle compression on/off. + kToggleCompression, + + // + // Send all files from client to server. + // + + // Send file paths including timestamps and sizes, and directories to server. + // An empty request indicates that all data has been sent. + kAddFiles, + // Send stats about missing, excessive, changed and matching files to client. + kSendFileStats, + + // + // Send all missing files from server to client. + // + + // Send indices of missing files to client. + // An empty request indicates that all data has been sent. + // Also used for sending indices of changed files. + kAddFileIndices, + + // Start sending missing file data to the server. After each + // SendMissingFileDataRequest, the client sends file data as raw packets and + // an empty packet to indicate eof. + kSendMissingFileData, + + // + // Rsync data exchange. + // + + // Send signatures to client. + // An empty response indicates that all data has been sent. + kAddSignatures, + + // Send patch commands to server. + // An empty request indicates that all data has been sent. + kAddPatchCommands, + + // + // Deletion of extraneous files. + // + kAddDeletedFiles, + + // + // Shutdown. + // + + // Ask the server to shut down. Also used for shutdown ack. + kShutdown, + + // Must be last. + kCount +}; + +class MessagePump { + public: + using PacketReceivedDelegate = std::function; + + // |socket| is the underlying socket that data is sent to and received from, + // unless redirected with one of the Redirect* methods. |packet_received| is + // a callback that is called from the receiver thread as soon as a packet is + // received. RedirectInput() should be called from this delegate. Useful for + // things like decompression. + MessagePump(Socket* socket, PacketReceivedDelegate packet_received); + virtual ~MessagePump(); + + // Starts worker threads to send/receive messages. Should be called after the + // socket is connected. Must not be already started. + // NOT thread-safe. Should be called from the creation thread. + void StartMessagePump(); + + // Stops worker threads to send/receive messages. No-op if already stopped or + // not started. Cannot be restarted. + // NOT thread-safe. Should be called from the creation thread. + void StopMessagePump() ABSL_LOCKS_EXCLUDED(outgoing_mutex_, incoming_mutex_); + + // Queues data for sending. May block if too much data is queued. + // Thread-safe. + absl::Status SendRawData(const void* data, size_t size); + absl::Status SendMessage(PacketType type, + const google::protobuf::MessageLite& message); + + // Receives a packet. Blocks if currently no packets is available. + // Thread-safe. + absl::Status ReceiveRawData(Buffer* data); + absl::Status ReceiveMessage(PacketType type, + google::protobuf::MessageLite* message); + + // Returns true if the Receive* functions have data available. Note that + // receiving messages from multiple threads might be racy, i.e. if + // CanReceive() returns true and Receive* is called afterwards, the method + // might block if another thread has grabbed the packet in the meantime. + bool CanReceive() const { return incoming_packets_byte_size_ > 0; } + + // Blocks until all outgoing messages were sent. Does not prevent that other + // threads queue new packets while the method is blocking, so the caller + // should make sure that that's not the case for consistent behavior. + // Thread-safe. + void FlushOutgoingQueue() ABSL_LOCKS_EXCLUDED(outgoing_mutex_); + + class InputReader { + public: + virtual ~InputReader() {} + + // Reads as much as data possible to |out_buffer|, but no more than + // |out_size| bytes. Sets |bytes_read| to the number of bytes read. + // |eof| is set to true if no more input data is available. The flag + // indicates that the parent MessagePump should reset the input reader + // and read data from the socket again. + virtual absl::Status Read(void* out_buffer, size_t out_size, + size_t* bytes_read, bool* eof) = 0; + }; + + // Starts receiving input from |input_reader| instead of from the socket. + // |input_reader| is called on a background thread. It must be a valid + // pointer. The input reader stays in place until it returns |eof| == true. + // After that, the input reader is reset and data is received from the socket + // again. + // This method must be called from the receiver thread, usually during the + // execution of the PacketReceivedDelegate passed in the constructor. + // Otherwise, the receiver thread might be blocked on a recv() call and the + // first data received would still be read the socket. + void RedirectInput(std::unique_ptr input_reader); + + // If set to a non-empty function, starts sending output to |output_handler| + // instead of to the socket. If set to an empty function, starts sending to + // the socket again. |output_handler| is called on a background thread. + // The outgoing packet queue is flushed prior to changing the output handler. + // The caller must make sure that no background threads are sending new + // messages while this method is running. + using OutputHandler = + std::function; + void RedirectOutput(OutputHandler output_handler); + + // Returns the number of packets queued for sending. + size_t GetNumOutgoingPackagesForTesting() + ABSL_LOCKS_EXCLUDED(outgoing_mutex_); + + // Returns the max total size of messages in the packet queues. + size_t GetMaxInOutBufferSizeForTesting(); + + // Returns hte max size of a single raw or proto message (including header). + size_t GetMaxPacketSizeForTesting(); + + protected: + struct Packet { + PacketType type = PacketType::kCount; + Buffer data; + + // Instances should be moved, not copied. + Packet() = default; + Packet(Packet&& other) { *this = std::move(other); } + Packet(const Packet&) = delete; + Packet& operator=(const Packet&) = delete; + + Packet& operator=(Packet&& other) { + type = other.type; + data = std::move(other.data); + return *this; + } + }; + + private: + // Outgoing packets are already serialized to save mem copies. + absl::Status QueuePacket(Buffer&& serialized_packet) + ABSL_LOCKS_EXCLUDED(outgoing_mutex_, status_mutex_); + absl::Status DequeuePacket(Packet* packet) + ABSL_LOCKS_EXCLUDED(incoming_mutex_, status_mutex_); + + // Underlying socket, not owned. + Socket* socket_; + + // Delegate called if a packet was received. + // Called immediately from the receiver thread. + PacketReceivedDelegate packet_received_; + + // Message pump threads main method for sending and receiving data. + void ThreadSenderMain() ABSL_LOCKS_EXCLUDED(outgoing_mutex_, status_mutex_); + void ThreadReceiverMain() ABSL_LOCKS_EXCLUDED(incoming_mutex_, status_mutex_); + + // Actually send/receive packets. + absl::Status ThreadDoSendPacket(Buffer&& serialized_packet); + absl::Status ThreadDoReceivePacket(Packet* packet); + absl::Status ThreadDoReceive(void* buffer, size_t size); + + std::thread message_sender_thread_; + std::thread message_receiver_thread_; + + // If set, input is not received from the socket, but from |input_reader_|. + std::unique_ptr input_reader_; + // If set, output is not sent to the socket, but to |output_handler_|. + OutputHandler output_handler_; + + // + // Synchronization of message pump threads and main thread. + // + + // Guards to protect access to queued packets. + absl::Mutex outgoing_mutex_; + absl::Mutex incoming_mutex_ ABSL_ACQUIRED_AFTER(outgoing_mutex_); + + // Queued packets. + std::queue outgoing_packets_ ABSL_GUARDED_BY(outgoing_mutex_); + std::queue incoming_packets_ ABSL_GUARDED_BY(incoming_mutex_); + + // Total size of queued packets. Used to limit max queue size. + std::atomic_uint64_t outgoing_packets_byte_size_{0}; + std::atomic_uint64_t incoming_packets_byte_size_{0}; + + // If true, the respective thread saw an error and shut down. + std::atomic_bool send_error_{false}; + std::atomic_bool receive_error_{false}; + + // Shutdown signal to sender and receiver threads. + std::atomic_bool shutdown_{false}; + + absl::Mutex status_mutex_; + absl::Status status_ ABSL_GUARDED_BY(status_mutex_); + + std::thread::id creation_thread_id_; +}; + +} // namespace cdc_ft + +#endif // CDC_RSYNC_BASE_MESSAGE_PUMP_H_ diff --git a/cdc_rsync/base/message_pump_test.cc b/cdc_rsync/base/message_pump_test.cc new file mode 100644 index 0000000..ac47d60 --- /dev/null +++ b/cdc_rsync/base/message_pump_test.cc @@ -0,0 +1,272 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync/base/message_pump.h" + +#include "cdc_rsync/base/fake_socket.h" +#include "cdc_rsync/protos/messages.pb.h" +#include "common/log.h" +#include "common/status.h" +#include "common/status_test_macros.h" +#include "gtest/gtest.h" + +namespace cdc_ft { +namespace { + +class MessagePumpTest : public ::testing::Test { + public: + void SetUp() override { + Log::Initialize(std::make_unique(LogLevel::kInfo)); + message_pump_.StartMessagePump(); + } + + void TearDown() override { + fake_socket_.ShutdownSendingEnd(); + message_pump_.StopMessagePump(); + Log::Shutdown(); + } + + protected: + // Called on the receiver thread. + void ThreadPackageReceived(PacketType type) { + // Empty by default. Only takes effect if set by tests. + if (type == PacketType::kToggleCompression) { + message_pump_.RedirectInput(std::move(fake_compressed_input_reader_)); + } + } + + FakeSocket fake_socket_; + MessagePump message_pump_{ + &fake_socket_, [this](PacketType type) { ThreadPackageReceived(type); }}; + std::unique_ptr fake_compressed_input_reader_; +}; + +TEST_F(MessagePumpTest, SendReceiveRawData) { + // The FakeSocket just routes everything that's sent to the receiving end. + const Buffer raw_data = {'r', 'a', 'w'}; + EXPECT_OK(message_pump_.SendRawData(raw_data.data(), raw_data.size())); + + Buffer received_raw_data; + EXPECT_OK(message_pump_.ReceiveRawData(&received_raw_data)); + + EXPECT_EQ(raw_data, received_raw_data); +} + +TEST_F(MessagePumpTest, SendReceiveMessage) { + TestRequest request; + request.set_message("message"); + EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, request)); + + TestRequest received_request; + EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kTest, &received_request)); + + EXPECT_EQ(request.message(), received_request.message()); +} + +TEST_F(MessagePumpTest, SendReceiveMultiple) { + const Buffer raw_data_1 = {'r', 'a', 'w', '1'}; + const Buffer raw_data_2 = {'r', 'a', 'w', '2'}; + TestRequest request; + request.set_message("message"); + + EXPECT_OK(message_pump_.SendRawData(raw_data_1.data(), raw_data_1.size())); + EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, request)); + EXPECT_OK(message_pump_.SendRawData(raw_data_2.data(), raw_data_2.size())); + + Buffer received_raw_data_1; + Buffer received_raw_data_2; + TestRequest received_request; + + EXPECT_OK(message_pump_.ReceiveRawData(&received_raw_data_1)); + EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kTest, &received_request)); + EXPECT_OK(message_pump_.ReceiveRawData(&received_raw_data_2)); + + EXPECT_EQ(raw_data_1, received_raw_data_1); + EXPECT_EQ(request.message(), received_request.message()); + EXPECT_EQ(raw_data_2, received_raw_data_2); +} + +TEST_F(MessagePumpTest, ReceiveMessageInstreadOfRaw) { + const Buffer raw_data = {'r', 'a', 'w'}; + EXPECT_OK(message_pump_.SendRawData(raw_data.data(), raw_data.size())); + + TestRequest received_request; + EXPECT_NOT_OK( + message_pump_.ReceiveMessage(PacketType::kTest, &received_request)); +} + +TEST_F(MessagePumpTest, ReceiveRawInsteadOfMessage) { + TestRequest request; + EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, request)); + + Buffer received_raw_data; + EXPECT_NOT_OK(message_pump_.ReceiveRawData(&received_raw_data)); +} + +TEST_F(MessagePumpTest, ReceiveMessageWrongType) { + TestRequest request; + EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, request)); + + ShutdownRequest received_request; + EXPECT_NOT_OK( + message_pump_.ReceiveMessage(PacketType::kShutdown, &received_request)); +} + +TEST_F(MessagePumpTest, MessageMaxSizeExceeded) { + TestRequest request; + size_t max_size = message_pump_.GetMaxPacketSizeForTesting(); + request.set_message(std::string(max_size + 1, 'x')); + EXPECT_NOT_OK(message_pump_.SendMessage(PacketType::kTest, request)); +} + +TEST_F(MessagePumpTest, FlushOutgoingQueue) { + TestRequest request; + request.set_message(std::string(1024 * 4, 'x')); + constexpr size_t kNumMessages = 1000; + + // Note: Must stay below max queue size or else SendMessage starts blocking. + ASSERT_LT((request.message().size() + 4) * kNumMessages, + message_pump_.GetMaxInOutBufferSizeForTesting()); + + // Queue up a bunch of large messages. + fake_socket_.SuspendSending(true); + for (size_t n = 0; n < kNumMessages; ++n) { + EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, request)); + } + EXPECT_GT(message_pump_.GetNumOutgoingPackagesForTesting(), 0); + + // Flush the queue. + fake_socket_.SuspendSending(false); + message_pump_.FlushOutgoingQueue(); + + // Check if the queue is empty. + EXPECT_EQ(message_pump_.GetNumOutgoingPackagesForTesting(), 0); +} + +class FakeCompressedInputReader : public MessagePump::InputReader { + public: + explicit FakeCompressedInputReader(Socket* socket) : socket_(socket) {} + + // Doesn't actually do compression, just replaces the word "compressed" by + // "COMPRESSED" as a sign that this handler was executed. In the real rsync + // algorithm, this is used to decompress data. + absl::Status Read(void* out_buffer, size_t out_size, size_t* bytes_read, + bool* eof) override { + absl::Status status = socket_->Receive( + out_buffer, out_size, /*allow_partial_read=*/false, bytes_read); + if (!status.ok()) { + return WrapStatus(status, "socket_->Receive() failed"); + } + assert(*bytes_read == out_size); + char* char_buffer = static_cast(out_buffer); + char* pos = strstr(char_buffer, "compressed"); + if (pos) { + memcpy(pos, "COMPRESSED", strlen("COMPRESSED")); + } + *eof = strstr(char_buffer, "set_eof") != nullptr; + return absl::OkStatus(); + }; + + private: + Socket* socket_; +}; + +TEST_F(MessagePumpTest, RedirectInput) { + fake_compressed_input_reader_ = + std::make_unique(&fake_socket_); + + TestRequest test_request; + ToggleCompressionRequest compression_request; + + test_request.set_message("uncompressed"); + EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, test_request)); + + // Once this message is received, |fake_compressed_input_reader_| is set by + // ThreadPackageReceived(). + EXPECT_OK(message_pump_.SendMessage(PacketType::kToggleCompression, + compression_request)); + + // Send a "compressed" message (should be converted to upper case). + test_request.set_message("compressed"); + EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, test_request)); + + // Trigger reset of the input reader. + test_request.set_message("set_eof"); + EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, test_request)); + + // The next message should be "uncompressed" (lower case) again. + test_request.set_message("uncompressed"); + EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, test_request)); + + EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kTest, &test_request)); + EXPECT_EQ(test_request.message(), "uncompressed"); + + EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kToggleCompression, + &compression_request)); + + EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kTest, &test_request)); + EXPECT_EQ(test_request.message(), "COMPRESSED"); + + EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kTest, &test_request)); + EXPECT_EQ(test_request.message(), "set_eof"); + + EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kTest, &test_request)); + EXPECT_EQ(test_request.message(), "uncompressed"); +} + +TEST_F(MessagePumpTest, RedirectOutput) { + // Doesn't actually do compression, just replaces the word "compressed" by + // "COMPRESSED" as a sign that this handler was executed. In the real rsync + // algorithm, this handler would pipe the data through zstd to compress it. + auto fake_compressed_output_handler = [this](const void* data, size_t size) { + std::string char_buffer(static_cast(data), size); + std::string::size_type pos = char_buffer.find("compressed"); + if (pos != std::string::npos) { + char_buffer.replace(pos, strlen("COMPRESSED"), "COMPRESSED"); + } + return fake_socket_.Send(char_buffer.data(), size); + }; + + TestRequest test_request; + ToggleCompressionRequest compression_request; + + test_request.set_message("uncompressed"); + EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, test_request)); + + // Set output handler. + message_pump_.RedirectOutput(fake_compressed_output_handler); + + // Send a "compressed" message (should be converted to upper case). + test_request.set_message("compressed"); + EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, test_request)); + + // Clear output handler again. + message_pump_.RedirectOutput(MessagePump::OutputHandler()); + + // The next message should be "uncompressed" (lower case) again. + test_request.set_message("uncompressed"); + EXPECT_OK(message_pump_.SendMessage(PacketType::kTest, test_request)); + + EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kTest, &test_request)); + EXPECT_EQ(test_request.message(), "uncompressed"); + + EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kTest, &test_request)); + EXPECT_EQ(test_request.message(), "COMPRESSED"); + + EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kTest, &test_request)); + EXPECT_EQ(test_request.message(), "uncompressed"); +} + +} // namespace +} // namespace cdc_ft diff --git a/cdc_rsync/base/server_exit_code.h b/cdc_rsync/base/server_exit_code.h new file mode 100644 index 0000000..23806c8 --- /dev/null +++ b/cdc_rsync/base/server_exit_code.h @@ -0,0 +1,63 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_BASE_SERVER_EXIT_CODE_H_ +#define CDC_RSYNC_BASE_SERVER_EXIT_CODE_H_ + +namespace cdc_ft { + +// Since the client cannot distinguish between stderr and stdout (ssh.exe sends +// both to stdout), the server marks the beginning and ending of error messages +// with this marker char. The client interprets everything in between as an +// error message. +constexpr char kServerErrorMarker = 0x1e; + +enum ServerExitCode { + // Pick a range of exit codes that does not overlap with unrelated exit codes + // like bash exit codes. + // - 126: error from bash when binary can't be started (permission denied). + // - 127: error from bash when binary isn't found + // - 255: ssh.exe error code. + // Note that codes must be <= 255. + + // KEEP UPDATED! + kServerExitCodeMin = 50, + + // Generic error on startup, before out-of-date check, e.g. bad args. + kServerExitCodeGenericStartup = 50, + + // A gamelet component is outdated and needs to be re-uploaded. + kServerExitCodeOutOfDate = 51, + + // + // All other exit codes must be strictly bigger than kServerErrorOutOfDate. + // They are guaranteed to be past the out-of-date check. + // + + // Unspecified error. + kServerExitCodeGeneric = 52, + + // Binding to the forward port failed, probably because there's another + // instance of cdc_rsync running. + kServerExitCodeAddressInUse = 53, + + // KEEP UPDATED! + kServerExitCodeMax = 53, +}; + +} // namespace cdc_ft + +#endif // CDC_RSYNC_BASE_SERVER_EXIT_CODE_H_ diff --git a/cdc_rsync/base/socket.h b/cdc_rsync/base/socket.h new file mode 100644 index 0000000..c156dab --- /dev/null +++ b/cdc_rsync/base/socket.h @@ -0,0 +1,45 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_BASE_SOCKET_H_ +#define CDC_RSYNC_BASE_SOCKET_H_ + +#include "absl/status/status.h" + +namespace cdc_ft { + +class Socket { + public: + Socket() = default; + virtual ~Socket() = default; + + // Send data to the socket. + virtual absl::Status Send(const void* buffer, size_t size) = 0; + + // Receives data from the socket. Blocks until data is available or the + // sending end of the socket gets shut down by the sender. + // If |allow_partial_read| is false, blocks until |size| bytes are available. + // If |allow_partial_read| is true, may return with success if less than + // |size| (but more than 0) bytes were received. + // The number of bytes written to |buffer| is returned in |bytes_received|. + virtual absl::Status Receive(void* buffer, size_t size, + bool allow_partial_read, + size_t* bytes_received) = 0; +}; + +} // namespace cdc_ft + +#endif // CDC_RSYNC_BASE_SOCKET_H_ diff --git a/cdc_rsync/base/testdata/cdc_interface/new_file.txt b/cdc_rsync/base/testdata/cdc_interface/new_file.txt new file mode 100644 index 0000000..81ab4e3 --- /dev/null +++ b/cdc_rsync/base/testdata/cdc_interface/new_file.txt @@ -0,0 +1 @@ +Data for rsync testing. This is the new, modified file on the workstation. \ No newline at end of file diff --git a/cdc_rsync/base/testdata/cdc_interface/old_file.txt b/cdc_rsync/base/testdata/cdc_interface/old_file.txt new file mode 100644 index 0000000..8a33c21 --- /dev/null +++ b/cdc_rsync/base/testdata/cdc_interface/old_file.txt @@ -0,0 +1 @@ +Data for rsync testing. This is the old version on the gamelet. \ No newline at end of file diff --git a/cdc_rsync/base/testdata/root.txt b/cdc_rsync/base/testdata/root.txt new file mode 100644 index 0000000..e69de29 diff --git a/cdc_rsync/cdc_rsync.cc b/cdc_rsync/cdc_rsync.cc new file mode 100644 index 0000000..1e2d279 --- /dev/null +++ b/cdc_rsync/cdc_rsync.cc @@ -0,0 +1,125 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync/cdc_rsync.h" + +#include + +#include "cdc_rsync/cdc_rsync_client.h" +#include "cdc_rsync/error_messages.h" +#include "common/log.h" +#include "common/path_filter.h" +#include "common/status.h" + +namespace cdc_ft { +namespace { + +ReturnCode TagToMessage(Tag tag, const Options* options, std::string* msg) { + msg->clear(); + switch (tag) { + case Tag::kSocketEof: + *msg = kMsgConnectionLost; + return ReturnCode::kConnectionLost; + + case Tag::kAddressInUse: + *msg = kMsgAddressInUse; + return ReturnCode::kAddressInUse; + + case Tag::kDeployServer: + *msg = kMsgDeployFailed; + return ReturnCode::kDeployFailed; + + case Tag::kInstancePickerNotAvailableInQuietMode: + *msg = kMsgInstancePickerNotAvailableInQuietMode; + return ReturnCode::kInstancePickerNotAvailableInQuietMode; + + case Tag::kConnectionTimeout: + *msg = + absl::StrFormat(kMsgFmtConnectionTimeout, options->ip, options->port); + return ReturnCode::kConnectionTimeout; + + case Tag::kCount: + return ReturnCode::kGenericError; + } + + // Should not happen (TM). Will fall back to status message in this case. + return ReturnCode::kGenericError; +} + +PathFilter::Rule::Type ToInternalType(FilterRule::Type type) { + switch (type) { + case FilterRule::Type::kInclude: + return PathFilter::Rule::Type::kInclude; + case FilterRule::Type::kExclude: + return PathFilter::Rule::Type::kExclude; + } + assert(false); + return PathFilter::Rule::Type::kInclude; +} + +} // namespace + +ReturnCode Sync(const Options* options, const FilterRule* filter_rules, + size_t num_filter_rules, const char* sources_dir, + const char* const* sources, size_t num_sources, + const char* destination, const char** error_message) { + LogLevel log_level = Log::VerbosityToLogLevel(options->verbosity); + Log::Initialize(std::make_unique(log_level)); + + PathFilter path_filter; + for (size_t n = 0; n < num_filter_rules; ++n) { + path_filter.AddRule(ToInternalType(filter_rules[n].type), + filter_rules[n].pattern); + } + + std::vector sources_vec; + for (size_t n = 0; n < num_sources; ++n) { + sources_vec.push_back(sources[n]); + } + + // Run rsync. + GgpRsyncClient client(*options, std::move(path_filter), sources_dir, + std::move(sources_vec), destination); + absl::Status status = client.Run(); + + if (status.ok()) { + *error_message = nullptr; + return ReturnCode::kOk; + } + + std::string msg; + ReturnCode code = ReturnCode::kGenericError; + absl::optional tag = GetTag(status); + if (tag.has_value()) { + code = TagToMessage(tag.value(), options, &msg); + } + + // Fall back to status message. + if (msg.empty()) { + msg = std::string(status.message()); + } else if (options->verbosity >= 2) { + // In verbose mode, log the status as well, so nothing gets lost. + LOG_ERROR("%s", status.ToString().c_str()); + } + + // Store error message in static buffer (don't use std::string through DLL + // boundary!). + static char buf[1024] = {0}; + strncpy_s(buf, msg.c_str(), _TRUNCATE); + *error_message = buf; + + return code; +} + +} // namespace cdc_ft diff --git a/cdc_rsync/cdc_rsync.h b/cdc_rsync/cdc_rsync.h new file mode 100644 index 0000000..9a13328 --- /dev/null +++ b/cdc_rsync/cdc_rsync.h @@ -0,0 +1,107 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_CDC_RSYNC_H_ +#define CDC_RSYNC_CDC_RSYNC_H_ + +#ifdef COMPILING_DLL +#define CDC_RSYNC_API __declspec(dllexport) +#else +#define CDC_RSYNC_API __declspec(dllimport) +#endif + +namespace cdc_ft { + +#ifdef __cplusplus +extern "C" { +#endif + +struct Options { + const char* ip = nullptr; + int port = 0; + bool delete_ = false; + bool recursive = false; + int verbosity = 0; + bool quiet = false; + bool whole_file = false; + bool relative = false; + bool compress = false; + bool checksum = false; + bool dry_run = false; + bool existing = false; + bool json = false; + const char* copy_dest = nullptr; + int compress_level = 6; + int connection_timeout_sec = 10; + + // Compression level 0 is invalid. + static constexpr int kMinCompressLevel = -5; + static constexpr int kMaxCompressLevel = 22; +}; + +// Rule for including/excluding files. +struct FilterRule { + enum class Type { + kInclude, + kExclude, + }; + + Type type; + const char* pattern; + + FilterRule(Type type, const char* pattern) : type(type), pattern(pattern) {} +}; + +enum class ReturnCode { + // No error. Will match the tool's exit code, so OK must be 0. + kOk = 0, + + // Generic error. + kGenericError = 1, + + // Server connection timed out. + kConnectionTimeout = 2, + + // Connection to the server was shut down unexpectedly. + kConnectionLost = 3, + + // Binding to the forward port failed, probably because there's another + // instance of cdc_rsync running. + kAddressInUse = 4, + + // Server deployment failed. This should be rare, it means that the server + // components were successfully copied, but the up-to-date check still fails. + kDeployFailed = 5, + + // Gamelet selection asks for user input, but we are in quiet mode. + kInstancePickerNotAvailableInQuietMode = 6, +}; + +// Calling Sync() a second time overwrites the data in |error_message|. +CDC_RSYNC_API ReturnCode Sync(const Options* options, + const FilterRule* filter_rules, + size_t filter_num_rules, const char* sources_dir, + const char* const* sources, size_t num_sources, + const char* destination, + const char** error_message); + +#ifdef __cplusplus +} // extern "C" +#endif + +} // namespace cdc_ft + +#endif // CDC_RSYNC_CDC_RSYNC_H_ diff --git a/cdc_rsync/cdc_rsync_client.cc b/cdc_rsync/cdc_rsync_client.cc new file mode 100644 index 0000000..c121af1 --- /dev/null +++ b/cdc_rsync/cdc_rsync_client.cc @@ -0,0 +1,789 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync/cdc_rsync_client.h" + +#include "absl/strings/str_format.h" +#include "absl/strings/str_split.h" +#include "cdc_rsync/base/cdc_interface.h" +#include "cdc_rsync/base/message_pump.h" +#include "cdc_rsync/base/server_exit_code.h" +#include "cdc_rsync/client_file_info.h" +#include "cdc_rsync/client_socket.h" +#include "cdc_rsync/file_finder_and_sender.h" +#include "cdc_rsync/parallel_file_opener.h" +#include "cdc_rsync/progress_tracker.h" +#include "cdc_rsync/protos/messages.pb.h" +#include "cdc_rsync/zstd_stream.h" +#include "common/gamelet_component.h" +#include "common/log.h" +#include "common/path.h" +#include "common/process.h" +#include "common/status.h" +#include "common/status_macros.h" +#include "common/stopwatch.h" +#include "common/util.h" + +namespace cdc_ft { +namespace { + +// Bash exit code if binary could not be run, e.g. permission denied. +constexpr int kExitCodeCouldNotExecute = 126; + +// Bash exit code if binary was not found. +constexpr int kExitCodeNotFound = 127; + +constexpr int kForwardPortFirst = 44450; +constexpr int kForwardPortLast = 44459; +constexpr char kGgpServerFilename[] = "cdc_rsync_server"; +constexpr char kRemoteToolsBinDir[] = "/opt/developer/tools/bin/"; + +SetOptionsRequest::FilterRule::Type ToProtoType(PathFilter::Rule::Type type) { + switch (type) { + case PathFilter::Rule::Type::kInclude: + return SetOptionsRequest::FilterRule::TYPE_INCLUDE; + case PathFilter::Rule::Type::kExclude: + return SetOptionsRequest::FilterRule::TYPE_EXCLUDE; + } + assert(false); + return SetOptionsRequest::FilterRule::TYPE_INCLUDE; +} + +// Translates a server process exit code and stderr into a status. +absl::Status GetServerExitStatus(int exit_code, const std::string& error_msg) { + auto se_code = static_cast(exit_code); + switch (se_code) { + case kServerExitCodeGenericStartup: + if (!error_msg.empty()) { + return MakeStatus("Server returned error during startup: %s", + error_msg); + } + return MakeStatus( + "Server exited with an unspecified error during startup"); + + case kServerExitCodeOutOfDate: + return MakeStatus( + "Server exited since instance components are out of date"); + + case kServerExitCodeGeneric: + if (!error_msg.empty()) { + return MakeStatus("Server returned error: %s", error_msg); + } + return MakeStatus("Server exited with an unspecified error"); + + case kServerExitCodeAddressInUse: + return SetTag(MakeStatus("Server failed to connect"), Tag::kAddressInUse); + } + + // Could potentially happen if the server exits due to another reason, + // e.g. some ssh.exe error (remember that the server process is actually + // an ssh process). + return MakeStatus("Server exited with code %i", exit_code); +} + +} // namespace + +GgpRsyncClient::GgpRsyncClient(const Options& options, PathFilter path_filter, + std::string sources_dir, + std::vector sources, + std::string destination) + : options_(options), + path_filter_(std::move(path_filter)), + sources_dir_(std::move(sources_dir)), + sources_(std::move(sources)), + destination_(std::move(destination)), + remote_util_(options.verbosity, options.quiet, &process_factory_, + /*forward_output_to_log=*/false), + port_manager_("cdc_rsync_ports_f77bcdfe-368c-4c45-9f01-230c5e7e2132", + kForwardPortFirst, kForwardPortLast, &process_factory_, + &remote_util_), + printer_(options.quiet, Util::IsTTY() && !options.json), + progress_(&printer_, options.verbosity, options.json) {} + +GgpRsyncClient::~GgpRsyncClient() { + message_pump_.StopMessagePump(); + socket_.Disconnect(); +} + +absl::Status GgpRsyncClient::Run() { + absl::Status status = remote_util_.GetInitStatus(); + if (!status.ok()) { + return WrapStatus(status, "Failed to initialize critical components"); + } + + // Initialize |remote_util_|. + remote_util_.SetIpAndPort(options_.ip, options_.port); + + // Start the server process. + status = StartServer(); + if (HasTag(status, Tag::kDeployServer)) { + // Gamelet components are not deployed or out-dated. Deploy and retry. + status = DeployServer(); + if (!status.ok()) { + return WrapStatus(status, "Failed to deploy server"); + } + + status = StartServer(); + } + if (!status.ok()) { + return WrapStatus(status, "Failed to start server"); + } + + // Tag::kSocketEof most likely means that the server had an error exited. In + // that case, try to shut it down properly to get more info from the error + // message. + status = Sync(); + if (!status.ok() && !HasTag(status, Tag::kSocketEof)) { + return WrapStatus(status, "Failed to sync files"); + } + + absl::Status stop_status = StopServer(); + if (!stop_status.ok()) { + return WrapStatus(stop_status, "Failed to stop server"); + } + + // If the server doesn't send any error information, return the sync status. + if (server_error_.empty() && HasTag(status, Tag::kSocketEof)) { + return status; + } + + // Check exit code and stderr. + if (server_exit_code_ != 0) { + status = GetServerExitStatus(server_exit_code_, server_error_); + } + + return status; +} + +absl::Status GgpRsyncClient::StartServer() { + assert(!server_process_); + + // Components are expected to reside in the same dir as the executable. + std::string component_dir; + absl::Status status = path::GetExeDir(&component_dir); + if (!status.ok()) { + return WrapStatus(status, "Failed to get the executable directory"); + } + + std::vector components; + status = GameletComponent::Get( + {path::Join(component_dir, kGgpServerFilename)}, &components); + if (!status.ok()) { + return MakeStatus( + "Required instance component not found. Make sure the file " + "cdc_rsync_server resides in the same folder as cdc_rsync.exe."); + } + std::string component_args = GameletComponent::ToCommandLineArgs(components); + + // Find available local and remote ports for port forwarding. + absl::StatusOr port_res = + port_manager_.ReservePort(options_.connection_timeout_sec); + constexpr char kErrorMsg[] = "Failed to find available port"; + if (absl::IsDeadlineExceeded(port_res.status())) { + // Server didn't respond in time. + return SetTag(WrapStatus(port_res.status(), kErrorMsg), + Tag::kConnectionTimeout); + } + if (absl::IsResourceExhausted(port_res.status())) + return SetTag(WrapStatus(port_res.status(), kErrorMsg), Tag::kAddressInUse); + if (!port_res.ok()) + return WrapStatus(port_res.status(), "Failed to find available port"); + int port = *port_res; + + std::string remote_server_path = + std::string(kRemoteToolsBinDir) + kGgpServerFilename; + // Test existence manually to prevent misleading bash output message + // "bash: .../cdc_rsync_server: No such file or directory". + std::string remote_command = absl::StrFormat( + "if [ ! -f %s ]; then exit %i; fi; %s %i %s", remote_server_path, + kExitCodeNotFound, remote_server_path, port, component_args); + ProcessStartInfo start_info = + remote_util_.BuildProcessStartInfoForSshPortForwardAndCommand( + port, port, false, remote_command); + start_info.name = "cdc_rsync_server"; + + // Capture stdout, but forward to stdout for debugging purposes. + start_info.stdout_handler = [this](const char* data, size_t /*data_size*/) { + return HandleServerOutput(data); + }; + + std::unique_ptr process = process_factory_.Create(start_info); + status = process->Start(); + if (!status.ok()) { + return WrapStatus(status, "Failed to start cdc_rsync_server process"); + } + + // Wait until the server process is listening. + auto detect_listening = [is_listening = &is_server_listening_]() -> bool { + return *is_listening; + }; + status = process->RunUntil(detect_listening); + if (!status.ok()) { + // Some internal process error. Note that this does NOT mean that + // cdc_rsync_server does not exist. In that case, the ssh process exits with + // code 127. + return status; + } + + if (process->HasExited()) { + // Don't re-deploy for code > kServerExitCodeOutOfDate, which means that the + // out-of-date check already passed on the server. + server_exit_code_ = process->ExitCode(); + if (server_exit_code_ > kServerExitCodeOutOfDate && + server_exit_code_ <= kServerExitCodeMax) { + return GetServerExitStatus(server_exit_code_, server_error_); + } + + // Server exited before it started listening, most likely because of + // outdated components (code kServerExitCodeOutOfDate) or because the server + // wasn't deployed at all yet (code kExitCodeNotFound). Instruct caller + // to re-deploy. + return SetTag(MakeStatus("Redeploy server"), Tag::kDeployServer); + } + + assert(is_server_listening_); + status = socket_.Connect(port); + if (!status.ok()) { + return WrapStatus(status, "Failed to initialize connection"); + } + + server_process_ = std::move(process); + message_pump_.StartMessagePump(); + return absl::OkStatus(); +} + +absl::Status GgpRsyncClient::StopServer() { + assert(server_process_); + + // Close socket. + absl::Status status = socket_.ShutdownSendingEnd(); + if (!status.ok()) { + return WrapStatus(status, "Failed to shut down socket sending end"); + } + + status = server_process_->RunUntilExit(); + if (!status.ok()) { + return WrapStatus(status, "Failed to stop cdc_rsync_server process"); + } + + server_exit_code_ = server_process_->ExitCode(); + server_process_.reset(); + return absl::OkStatus(); +} + +absl::Status GgpRsyncClient::HandleServerOutput(const char* data) { + // Note: This is called from a background thread! + + // Handle server error messages. Unfortunately, if the server prints to + // stderr, the ssh process does not write it to its stderr, but to stdout, so + // we have to jump through hoops to read the error. We use a marker char for + // the start of the error message: + // This is stdout \x1e This is stderr \x1e This is stdout again + std::string stdout_data_storage; + const char* stdout_data = data; + if (is_server_error_ || strchr(data, kServerErrorMarker)) { + // Only run this expensive code if necessary. + std::vector parts = + absl::StrSplit(data, absl::ByChar(kServerErrorMarker)); + for (size_t n = 0; n < parts.size(); ++n) { + if (is_server_error_) { + server_error_.append(parts[n]); + } else { + stdout_data_storage.append(parts[n]); + } + if (n + 1 < parts.size()) { + is_server_error_ = !is_server_error_; + } + } + stdout_data = stdout_data_storage.c_str(); + } + + printer_.Print(stdout_data, false, Util::GetConsoleWidth()); + if (!is_server_listening_) { + server_output_.append(stdout_data); + is_server_listening_ = + server_output_.find("Server is listening") != std::string::npos; + } + + return absl::OkStatus(); +} + +absl::Status GgpRsyncClient::Sync() { + absl::Status status = SendOptions(); + if (!status.ok()) { + return WrapStatus(status, "Failed to send options to server"); + } + + status = FindAndSendAllSourceFiles(); + if (!status.ok()) { + return WrapStatus(status, "Failed to find and send all source files"); + } + + status = ReceiveFileStats(); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive file stats"); + } + + if (options_.delete_) { + status = ReceiveDeletedFiles(); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive paths of deleted files"); + } + } + + status = ReceiveFileIndices("missing", &missing_file_indices_); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive missing file indices"); + } + status = SendMissingFiles(); + if (!status.ok()) { + return WrapStatus(status, "Failed to send missing files"); + } + + status = ReceiveFileIndices("changed", &changed_file_indices_); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive changed file indices"); + } + + status = ReceiveSignaturesAndSendDelta(); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive signatures and send delta"); + } + + // Set sync point for shutdown (waits for the server to finish). + ShutdownRequest shutdown_request; + status = message_pump_.SendMessage(PacketType::kShutdown, shutdown_request); + if (!status.ok()) { + return WrapStatus(status, "Failed to send shutdown request"); + } + + ShutdownResponse response; + status = message_pump_.ReceiveMessage(PacketType::kShutdown, &response); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive shutdown response"); + } + + return status; +} + +absl::Status GgpRsyncClient::DeployServer() { + assert(!server_process_); + + std::string exe_dir; + absl::Status status = path::GetExeDir(&exe_dir); + if (!status.ok()) { + return WrapStatus(status, "Failed to get exe directory"); + } + + std::string deploy_msg; + if (server_exit_code_ == kExitCodeNotFound) { + deploy_msg = "Server not deployed. Deploying..."; + } else if (server_exit_code_ == kExitCodeCouldNotExecute) { + deploy_msg = "Server failed to start. Redeploying..."; + } else if (server_exit_code_ == kServerExitCodeOutOfDate) { + deploy_msg = "Server outdated. Redeploying..."; + } else { + deploy_msg = "Deploying server..."; + } + printer_.Print(deploy_msg, true, Util::GetConsoleWidth()); + + // scp cdc_rsync_server to a temp location on the gamelet. + std::string remoteServerTmpPath = + absl::StrFormat("%s%s.%s", kRemoteToolsBinDir, kGgpServerFilename, + Util::GenerateUniqueId()); + std::string localServerPath = path::Join(exe_dir, kGgpServerFilename); + status = remote_util_.Scp({localServerPath}, remoteServerTmpPath, + /*compress=*/true); + if (!status.ok()) { + return WrapStatus(status, "Failed to copy cdc_rsync_server to instance"); + } + + // Make cdc_rsync_server executable. + status = remote_util_.Chmod("a+x", remoteServerTmpPath); + if (!status.ok()) { + return WrapStatus(status, + "Failed to set executable flag on cdc_rsync_server"); + } + + // Make old file writable. Mv might fail to overwrite it, e.g. if someone made + // it read-only. + std::string remoteServerPath = + std::string(kRemoteToolsBinDir) + kGgpServerFilename; + status = remote_util_.Chmod("u+w", remoteServerPath, /*quiet=*/true); + if (!status.ok()) { + LOG_DEBUG("chmod u+w %s failed (expected if file does not exist): %s", + remoteServerPath, status.ToString()); + } + + // Replace old file by new file. + status = remote_util_.Mv(remoteServerTmpPath, remoteServerPath); + if (!status.ok()) { + return WrapStatus(status, "Failed to replace '%s' by '%s'", + remoteServerPath, remoteServerTmpPath); + } + + return absl::OkStatus(); +} + +absl::Status GgpRsyncClient::SendOptions() { + LOG_INFO("Sending options"); + + SetOptionsRequest request; + request.set_destination(destination_); + request.set_delete_(options_.delete_); + request.set_recursive(options_.recursive); + request.set_verbosity(options_.verbosity); + request.set_whole_file(options_.whole_file); + request.set_compress(options_.compress); + request.set_relative(options_.relative); + + for (const PathFilter::Rule& rule : path_filter_.GetRules()) { + SetOptionsRequest::FilterRule* filter_rule = request.add_filter_rules(); + filter_rule->set_type(ToProtoType(rule.type)); + filter_rule->set_pattern(rule.pattern); + } + + request.set_checksum(options_.checksum); + request.set_dry_run(options_.dry_run); + request.set_existing(options_.existing); + if (options_.copy_dest) { + request.set_copy_dest(options_.copy_dest); + } + + absl::Status status = + message_pump_.SendMessage(PacketType::kSetOptions, request); + if (!status.ok()) { + return WrapStatus(status, "SendDestination() failed"); + } + + return absl::OkStatus(); +} + +absl::Status GgpRsyncClient::FindAndSendAllSourceFiles() { + LOG_INFO("Finding and sending all sources files"); + + Stopwatch stopwatch; + + FileFinderAndSender file_finder(&path_filter_, &message_pump_, &progress_, + sources_dir_, options_.recursive, + options_.relative); + + progress_.StartFindFiles(); + for (const std::string& source : sources_) { + absl::Status status = file_finder.FindAndSendFiles(source); + if (!status.ok()) { + return status; + } + } + progress_.Finish(); + + RETURN_IF_ERROR(file_finder.Flush(), "Failed to flush file finder"); + file_finder.ReleaseFiles(&files_); + + LOG_INFO("Found and sent %u source files in %0.3f seconds", files_.size(), + stopwatch.ElapsedSeconds()); + + return absl::OkStatus(); +} + +absl::Status GgpRsyncClient::ReceiveFileStats() { + LOG_INFO("Receiving file stats"); + + SendFileStatsResponse response; + absl::Status status = + message_pump_.ReceiveMessage(PacketType::kSendFileStats, &response); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive SendFileStatsResponse"); + } + + progress_.ReportFileStats( + response.num_missing_files(), response.num_extraneous_files(), + response.num_matching_files(), response.num_changed_files(), + response.total_missing_bytes(), response.total_changed_client_bytes(), + response.total_changed_server_bytes(), response.num_missing_dirs(), + response.num_extraneous_dirs(), response.num_matching_dirs(), + options_.whole_file, options_.checksum, options_.delete_); + return absl::OkStatus(); +} + +absl::Status GgpRsyncClient::ReceiveDeletedFiles() { + LOG_INFO("Receiving path of deleted files"); + std::string current_directory; + + progress_.StartDeleteFiles(); + for (;;) { + AddDeletedFilesResponse response; + absl::Status status = + message_pump_.ReceiveMessage(PacketType::kAddDeletedFiles, &response); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive AddDeletedFilesResponse"); + } + + // An empty response indicates that all files have been sent. + if (response.files_size() == 0 && response.dirs_size() == 0) { + break; + } + + // Print info. Don't use path::Join(), it would mess up slashes. + for (const std::string& file : response.files()) { + progress_.ReportFileDeleted(response.directory() + file); + } + for (const std::string& dir : response.dirs()) { + progress_.ReportDirDeleted(response.directory() + dir); + } + } + progress_.Finish(); + + return absl::OkStatus(); +} + +absl::Status GgpRsyncClient::ReceiveFileIndices( + const char* file_type, std::vector* file_indices) { + LOG_INFO("Receiving indices of %s files", file_type); + + for (;;) { + AddFileIndicesResponse response; + absl::Status status = + message_pump_.ReceiveMessage(PacketType::kAddFileIndices, &response); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive AddFileIndicesResponse"); + } + + // An empty response indicates that all files have been sent. + if (response.client_indices_size() == 0) { + break; + } + + // Record file indices. + file_indices->insert(file_indices->end(), response.client_indices().begin(), + response.client_indices().end()); + } + + // Validate indices. + for (uint32_t index : *file_indices) { + if (index >= files_.size()) { + return MakeStatus("Received invalid index %u", index); + } + } + + LOG_INFO("Received %u indices of %s files", file_indices->size(), file_type); + + return absl::OkStatus(); +} + +absl::Status GgpRsyncClient::SendMissingFiles() { + if (missing_file_indices_.empty()) { + return absl::OkStatus(); + } + + LOG_INFO("Sending missing files"); + + if (options_.dry_run) { + for (uint32_t client_index : missing_file_indices_) { + const ClientFileInfo& file = files_[client_index]; + progress_.StartCopy(file.path.substr(file.base_dir_len), file.size); + progress_.Finish(); + } + return absl::OkStatus(); + } + + // This part is (optionally) compressed. + if (options_.compress) { + absl::Status status = StartCompressionStream(); + if (!status.ok()) { + return WrapStatus(status, "Failed to start compression process"); + } + } + + ParallelFileOpener file_opener(&files_, missing_file_indices_); + + constexpr size_t kBufferSize = 16000; + for (uint32_t server_index = 0; server_index < missing_file_indices_.size(); + ++server_index) { + uint32_t client_index = missing_file_indices_[server_index]; + const ClientFileInfo& file = files_[client_index]; + + LOG_INFO("%s", file.path); + progress_.StartCopy(file.path.substr(file.base_dir_len), file.size); + SendMissingFileDataRequest request; + request.set_server_index(server_index); + absl::Status status = + message_pump_.SendMessage(PacketType::kSendMissingFileData, request); + if (!status.ok()) { + return WrapStatus(status, "Failed to send SendMissingFileDataRequest"); + } + ProgressTracker* progress = &progress_; + auto handler = [message_pump = &message_pump_, progress](const void* data, + size_t size) { + progress->ReportCopyProgress(size); + return message_pump->SendRawData(data, size); + }; + + FILE* fp = file_opener.GetNextOpenFile(); + if (!fp) { + return MakeStatus("Failed to open file '%s'", file.path); + } + status = path::StreamReadFileContents(fp, kBufferSize, handler); + fclose(fp); + if (!status.ok()) { + return WrapStatus(status, "Failed to read file %s", file.path); + } + + progress_.Finish(); + } + + if (options_.compress) { + absl::Status status = StopCompressionStream(); + if (!status.ok()) { + return WrapStatus(status, "Failed to stop compression process"); + } + } + + return absl::OkStatus(); +} + +absl::Status GgpRsyncClient::ReceiveSignaturesAndSendDelta() { + if (changed_file_indices_.empty()) { + return absl::OkStatus(); + } + + if (options_.dry_run) { + for (uint32_t client_index : changed_file_indices_) { + const ClientFileInfo& file = files_[client_index]; + progress_.StartSync(file.path.substr(file.base_dir_len), file.size, + file.size); + progress_.ReportSyncProgress(file.size, file.size); + progress_.Finish(); + } + return absl::OkStatus(); + } + + LOG_INFO("Receiving signatures and sending deltas of changed files"); + + // This part is (optionally) compressed. + if (options_.compress) { + absl::Status status = StartCompressionStream(); + if (!status.ok()) { + return WrapStatus(status, "Failed to start compression process"); + } + } + + CdcInterface cdc(&message_pump_); + + // Open files in parallel. Speeds up many small file case. + ParallelFileOpener file_opener(&files_, changed_file_indices_); + + std::string signature_data; + for (uint32_t server_index = 0; server_index < changed_file_indices_.size(); + ++server_index) { + uint32_t client_index = changed_file_indices_[server_index]; + const ClientFileInfo& file = files_[client_index]; + + SendSignatureResponse response; + absl::Status status = + message_pump_.ReceiveMessage(PacketType::kAddSignatures, &response); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive SendSignatureResponse"); + } + + // Validate index. + if (response.client_index() != client_index) { + return MakeStatus("Received invalid index %u. Expected %u.", + response.client_index(), client_index); + } + + LOG_INFO("%s", file.path); + progress_.StartSync(file.path.substr(file.base_dir_len), file.size, + response.server_file_size()); + + FILE* fp = file_opener.GetNextOpenFile(); + if (!fp) { + return MakeStatus("Failed to open file '%s'", file.path); + } + + status = cdc.ReceiveSignatureAndCreateAndSendDiff(fp, &progress_); + fclose(fp); + if (!status.ok()) { + return WrapStatus(status, "Failed to sync file %s", file.path); + } + + progress_.Finish(); + } + + if (options_.compress) { + absl::Status status = StopCompressionStream(); + if (!status.ok()) { + return WrapStatus(status, "Failed to stop compression process"); + } + } + + return absl::OkStatus(); +} + +absl::Status GgpRsyncClient::StartCompressionStream() { + assert(!compression_stream_); + + // Notify server that data is compressed from now on. + ToggleCompressionRequest request; + absl::Status status = + message_pump_.SendMessage(PacketType::kToggleCompression, request); + if (!status.ok()) { + return WrapStatus(status, "Failed to send ToggleCompressionRequest"); + } + + // Make sure the sender thread is idle. + message_pump_.FlushOutgoingQueue(); + + // Set up compression stream. + uint32_t num_threads = std::thread::hardware_concurrency(); + compression_stream_ = std::make_unique( + &socket_, options_.compress_level, num_threads); + + // Redirect the |message_pump_| output to the compression stream. + message_pump_.RedirectOutput([this](const void* data, size_t size) { + LOG_VERBOSE("Compressing packet of size %u", size); + return compression_stream_->Write(data, size); + }); + + // The pipes are now set up like this: + // |message_pump_| -> |compression_stream_| -> |socket_|. + + return absl::OkStatus(); +} + +absl::Status GgpRsyncClient::StopCompressionStream() { + assert(compression_stream_); + + // Finish writing to |compression_process_|'s stdin and change back to + // writing to the actual network socket. + message_pump_.FlushOutgoingQueue(); + message_pump_.RedirectOutput(nullptr); + + // Flush compression stream and reset. + RETURN_IF_ERROR(compression_stream_->Flush(), + "Failed to flush compression stream"); + compression_stream_.reset(); + + // Wait for the server ack. This must be done before sending more data. + ToggleCompressionResponse response; + absl::Status status = + message_pump_.ReceiveMessage(PacketType::kToggleCompression, &response); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive ToggleCompressionResponse"); + } + + return absl::OkStatus(); +} + +} // namespace cdc_ft diff --git a/cdc_rsync/cdc_rsync_client.h b/cdc_rsync/cdc_rsync_client.h new file mode 100644 index 0000000..20203ae --- /dev/null +++ b/cdc_rsync/cdc_rsync_client.h @@ -0,0 +1,132 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_CDC_RSYNC_CLIENT_H_ +#define CDC_RSYNC_CDC_RSYNC_CLIENT_H_ + +#include +#include + +#include "absl/status/status.h" +#include "cdc_rsync/base/message_pump.h" +#include "cdc_rsync/cdc_rsync.h" +#include "cdc_rsync/client_socket.h" +#include "cdc_rsync/progress_tracker.h" +#include "common/path_filter.h" +#include "common/port_manager.h" +#include "common/remote_util.h" + +namespace cdc_ft { + +class Process; +class ZstdStream; + +class GgpRsyncClient { + public: + GgpRsyncClient(const Options& options, PathFilter filter, + std::string sources_dir, std::vector sources, + std::string destination); + + ~GgpRsyncClient(); + + // Deploys the server if necessary, starts it and runs the rsync procedure. + absl::Status Run(); + + private: + // Starts the server process. If the method returns a status with tag + // |kTagDeployServer|, Run() calls DeployServer() and tries again. + absl::Status StartServer(); + + // Stops the server process. + absl::Status StopServer(); + + // Handler for stdout and stderr data emitted by the server. + absl::Status HandleServerOutput(const char* data); + + // Runs the rsync procedure. + absl::Status Sync(); + + // Copies all gamelet components to the gamelet. + absl::Status DeployServer(); + + // Sends relevant options to the server. + absl::Status SendOptions(); + + // Finds all source files and sends the file infos to the server. + absl::Status FindAndSendAllSourceFiles(); + + // Receives the stats from the file diffs (e.g. number of missing, changed + // etc. files) from the server. + absl::Status ReceiveFileStats(); + + // Receives paths of deleted files and prints them out. + absl::Status ReceiveDeletedFiles(); + + // Receives file indices from the server. Used for missing and changed files. + absl::Status ReceiveFileIndices(const char* file_type, + std::vector* file_indices); + + // Copies missing files to the server. + absl::Status SendMissingFiles(); + + // Core rsync algorithm. Receives signatures of changed files from server, + // calculates the diffs and sends them to the server. + absl::Status ReceiveSignaturesAndSendDelta(); + + // Start the zstd compression stream. Used before file copy and diff. + absl::Status StartCompressionStream(); + + // Stops the zstd compression stream. + absl::Status StopCompressionStream(); + + Options options_; + PathFilter path_filter_; + const std::string sources_dir_; + std::vector sources_; + const std::string destination_; + + WinProcessFactory process_factory_; + RemoteUtil remote_util_; + PortManager port_manager_; + ClientSocket socket_; + MessagePump message_pump_{&socket_, MessagePump::PacketReceivedDelegate()}; + ConsoleProgressPrinter printer_; + ProgressTracker progress_; + std::unique_ptr compression_stream_; + + std::unique_ptr server_process_; + std::string server_output_; // Written in a background thread. Do not access + std::string server_error_; // while the server process is active. + int server_exit_code_ = 0; + std::atomic_bool is_server_listening_{false}; + bool is_server_error_ = false; + + // All source files found on the client. + std::vector files_; + + // All source dirs found on the client. + std::vector dirs_; + + // Indices (into files_) of files that are missing on the server. + std::vector missing_file_indices_; + + // Indices (into files_) of files that exist, but are different on the server. + std::vector changed_file_indices_; +}; + +} // namespace cdc_ft + +#endif // CDC_RSYNC_CDC_RSYNC_CLIENT_H_ diff --git a/cdc_rsync/client_file_info.h b/cdc_rsync/client_file_info.h new file mode 100644 index 0000000..22cf9ee --- /dev/null +++ b/cdc_rsync/client_file_info.h @@ -0,0 +1,43 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_CLIENT_FILE_INFO_H_ +#define CDC_RSYNC_CLIENT_FILE_INFO_H_ + +#include + +namespace cdc_ft { + +struct ClientFileInfo { + std::string path; + uint64_t size; + uint32_t base_dir_len; + + ClientFileInfo(const std::string& path, uint64_t size, uint32_t base_dir_len) + : path(path), size(size), base_dir_len(base_dir_len) {} +}; + +struct ClientDirInfo { + std::string path; + uint32_t base_dir_len; + + ClientDirInfo(const std::string& path, uint32_t base_dir_len) + : path(path), base_dir_len(base_dir_len) {} +}; + +} // namespace cdc_ft + +#endif // CDC_RSYNC_CLIENT_FILE_INFO_H_ diff --git a/cdc_rsync/client_socket.cc b/cdc_rsync/client_socket.cc new file mode 100644 index 0000000..c124ed7 --- /dev/null +++ b/cdc_rsync/client_socket.cc @@ -0,0 +1,174 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync/client_socket.h" + +#include +#include + +#include + +#include "common/log.h" +#include "common/status.h" +#include "common/util.h" + +namespace cdc_ft { +namespace { + +// Creates a status with the given |message| and the last WSA error. +// Assigns Tag::kSocketEof for WSAECONNRESET errors. +absl::Status MakeSocketStatus(const char* message) { + const int err = WSAGetLastError(); + absl::Status status = MakeStatus("%s: %s", message, Util::GetWin32Error(err)); + if (err == WSAECONNRESET) { + status = SetTag(status, Tag::kSocketEof); + } + return status; +} + +} // namespace + +struct SocketInfo { + SOCKET socket; + + SocketInfo() : socket(INVALID_SOCKET) {} +}; + +ClientSocket::ClientSocket() = default; + +ClientSocket::~ClientSocket() { Disconnect(); } + +absl::Status ClientSocket::Connect(int port) { + WSADATA wsaData; + int result = WSAStartup(MAKEWORD(2, 2), &wsaData); + if (result != 0) { + return MakeStatus("WSAStartup() failed: %i", result); + } + + addrinfo hints; + ZeroMemory(&hints, sizeof(hints)); + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = IPPROTO_TCP; + + // Resolve the server address and port. + addrinfo* addr_infos = nullptr; + result = getaddrinfo("localhost", std::to_string(port).c_str(), &hints, + &addr_infos); + if (result != 0) { + WSACleanup(); + return MakeStatus("getaddrinfo() failed: %i", result); + } + + socket_info_ = std::make_unique(); + int count = 0; + for (addrinfo* curr = addr_infos; curr; curr = curr->ai_next, count++) { + socket_info_->socket = + socket(addr_infos->ai_family, addr_infos->ai_socktype, + addr_infos->ai_protocol); + if (socket_info_->socket == INVALID_SOCKET) { + LOG_DEBUG("socket() failed for addr_info %i: %s", count, + Util::GetWin32Error(WSAGetLastError()).c_str()); + continue; + } + + // Connect to server. + result = connect(socket_info_->socket, curr->ai_addr, + static_cast(curr->ai_addrlen)); + if (result == SOCKET_ERROR) { + LOG_DEBUG("connect() failed for addr_info %i: %i", count, result); + closesocket(socket_info_->socket); + socket_info_->socket = INVALID_SOCKET; + continue; + } + + // Success! + break; + } + + freeaddrinfo(addr_infos); + + if (socket_info_->socket == INVALID_SOCKET) { + socket_info_.reset(); + WSACleanup(); + return MakeStatus("Unable to connect to port %i", port); + } + + LOG_INFO("Client socket connected to port %i", port); + return absl::OkStatus(); +} + +void ClientSocket::Disconnect() { + if (!socket_info_) { + return; + } + + if (socket_info_->socket != INVALID_SOCKET) { + closesocket(socket_info_->socket); + socket_info_->socket = INVALID_SOCKET; + } + + socket_info_.reset(); + WSACleanup(); +} + +absl::Status ClientSocket::Send(const void* buffer, size_t size) { + int result = send(socket_info_->socket, static_cast(buffer), + static_cast(size), /*flags */ 0); + if (result == SOCKET_ERROR) { + return MakeSocketStatus("send() failed"); + } + + return absl::OkStatus(); +} + +absl::Status ClientSocket::Receive(void* buffer, size_t size, + bool allow_partial_read, + size_t* bytes_received) { + *bytes_received = 0; + if (size == 0) { + return absl::OkStatus(); + } + + int flags = allow_partial_read ? 0 : MSG_WAITALL; + int bytes_read = recv(socket_info_->socket, static_cast(buffer), + static_cast(size), flags); + if (bytes_read == SOCKET_ERROR) { + return MakeSocketStatus("recv() failed"); + } + + if (bytes_read == 0) { + // EOF + return SetTag(MakeStatus("EOF detected"), Tag::kSocketEof); + } + + if (bytes_read != size && !allow_partial_read) { + // Can this happen? + return MakeStatus("Partial read"); + } + + *bytes_received = bytes_read; + return absl::OkStatus(); +} + +absl::Status ClientSocket::ShutdownSendingEnd() { + int result = shutdown(socket_info_->socket, SD_SEND); + if (result == SOCKET_ERROR) { + return MakeSocketStatus("shutdown() failed"); + } + + return absl::OkStatus(); +} + +} // namespace cdc_ft diff --git a/cdc_rsync/client_socket.h b/cdc_rsync/client_socket.h new file mode 100644 index 0000000..ec6eb91 --- /dev/null +++ b/cdc_rsync/client_socket.h @@ -0,0 +1,53 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_CLIENT_SOCKET_H_ +#define CDC_RSYNC_CLIENT_SOCKET_H_ + +#include + +#include "absl/status/status.h" +#include "cdc_rsync/base/socket.h" + +namespace cdc_ft { + +class ClientSocket : public Socket { + public: + ClientSocket(); + ~ClientSocket(); + + // Connects to localhost on |port|. + absl::Status Connect(int port); + + // Disconnects again. No-op if not connected. + void Disconnect(); + + // Shuts down the sending end of the socket. This will interrupt any receive + // calls on the server and shut it down. + absl::Status ShutdownSendingEnd(); + + // Socket: + absl::Status Send(const void* buffer, size_t size) override; + absl::Status Receive(void* buffer, size_t size, bool allow_partial_read, + size_t* bytes_received) override; + + private: + std::unique_ptr socket_info_; +}; + +} // namespace cdc_ft + +#endif // CDC_RSYNC_CLIENT_SOCKET_H_ diff --git a/cdc_rsync/cpp.hint b/cdc_rsync/cpp.hint new file mode 100644 index 0000000..5453cdf --- /dev/null +++ b/cdc_rsync/cpp.hint @@ -0,0 +1,2 @@ +#define CDC_RSYNC_API __declspec(dllexport) +#define CDC_RSYNC_API __declspec(dllimport) diff --git a/cdc_rsync/dllmain.cc b/cdc_rsync/dllmain.cc new file mode 100644 index 0000000..337cc93 --- /dev/null +++ b/cdc_rsync/dllmain.cc @@ -0,0 +1,29 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#define WIN32_LEAN_AND_MEAN +#include + +BOOL APIENTRY DllMain(HMODULE /* hModule */, DWORD ul_reason_for_call, + LPVOID /* lpReserved */ +) { + switch (ul_reason_for_call) { + case DLL_PROCESS_ATTACH: + case DLL_THREAD_ATTACH: + case DLL_THREAD_DETACH: + case DLL_PROCESS_DETACH: + break; + } + return TRUE; +} diff --git a/cdc_rsync/error_messages.h b/cdc_rsync/error_messages.h new file mode 100644 index 0000000..6268a23 --- /dev/null +++ b/cdc_rsync/error_messages.h @@ -0,0 +1,54 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_ERROR_MESSAGES_H_ +#define CDC_RSYNC_ERROR_MESSAGES_H_ + +namespace cdc_ft { + +// Server connection timed out. SSH probably stale. +constexpr char kMsgFmtConnectionTimeout[] = + "Server connection timed out. Please re-run 'ggp ssh init' and verify that " + "the IP '%s' and the port '%i' are correct."; + +// Server connection timed out and IP was not passed in. Probably network error. +constexpr char kMsgConnectionTimeoutWithIp[] = + "Server connection timed out. Please check your network connection."; + +// Receiving pipe end was shut down unexpectedly. +constexpr char kMsgConnectionLost[] = + "The connection to the instance was shut down unexpectedly."; + +// Binding to the port failed. +constexpr char kMsgAddressInUse[] = + "Failed to establish a connection to the instance. All ports are already " + "in use. This can happen if another instance of this command is running. " + "Currently, only 10 simultaneous connections are supported."; + +// Deployment failed even though gamelet components were copied successfully. +constexpr char kMsgDeployFailed[] = + "Failed to deploy the instance components for unknown reasons. " + "Please report this issue."; + +// Picking an instance is not allowed in quiet mode. +constexpr char kMsgInstancePickerNotAvailableInQuietMode[] = + "Multiple gamelet instances are reserved, but the instance picker is not " + "available in quiet mode. Please specify --instance or remove -q resp. " + "--quiet."; + +} // namespace cdc_ft + +#endif // CDC_RSYNC_ERROR_MESSAGES_H_ diff --git a/cdc_rsync/file_finder_and_sender.cc b/cdc_rsync/file_finder_and_sender.cc new file mode 100644 index 0000000..2fbc8d5 --- /dev/null +++ b/cdc_rsync/file_finder_and_sender.cc @@ -0,0 +1,248 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync/file_finder_and_sender.h" + +#include "absl/strings/match.h" +#include "absl/strings/str_format.h" +#include "cdc_rsync/base/message_pump.h" +#include "common/log.h" +#include "common/path.h" +#include "common/path_filter.h" +#include "common/status.h" + +namespace cdc_ft { +namespace { + +bool EndsWithSpecialDir(const std::string& source) { + return source == "." || source == ".." || absl::EndsWith(source, "\\.") || + absl::EndsWith(source, "\\.."); +} + +// Returns C:\ from C:\path\to\file or an empty string if there is no drive. +std::string GetDrivePrefixWithBackslash(const std::string& source) { + std::string prefix = path::GetDrivePrefix(source); + if (source[prefix.size()] == '\\') { + prefix += "\\"; + } + return prefix; +} + +// Basically returns |sources_dir| + |source|, but removes drive letters from +// |source| if present and |sources_dir| is not empty. +std::string GetFullSource(const std::string& source, + const std::string& sources_dir) { + if (sources_dir.empty()) { + return source; + } + + // Combine |sources_dir_| and |source|, but remove the drive prefix, so + // that we don't get stuff like "source_dir\C:\path\to\file". + return path::Join(sources_dir, + source.substr(GetDrivePrefixWithBackslash(source).size())); +} + +std::string GetBaseDir(const std::string& source, + const std::string& sources_dir, bool relative) { + if (!relative) { + // For non-relative mode, the base dir is the directory part, so that + // path\to\file is copied to remote_dir/file and files in path\to\ are + // copied to remote_dir. + if (path::EndsWithPathSeparator(source)) return source; + std::string dir = path::DirName(source); + if (!dir.empty()) path::EnsureEndsWithPathSeparator(&dir); + return dir; + } + + // A "\.\" is a marker for where the relative path should start. + // The base dir is the part up to that marker, so that + // path\.\to\file is copied to remote_dir/to/file. + size_t pos = source.find("\\.\\"); + if (pos != std::string::npos) { + return source.substr(0, pos + 3); + } + + // If there is a sources dir, the base dir is the sources dir, so that + // sources_dir\path\to\file is copied to remote_dir/path/to/file. + if (!sources_dir.empty()) { + assert(source.find(sources_dir) == 0); + return sources_dir; + } + + // If there is a drive prefix, the base dir is that part, so that + // C:\path\to\file is copied to remote_dir/path/to/file. + return GetDrivePrefixWithBackslash(source); +} + +} // namespace + +FileFinderAndSender::FileFinderAndSender(PathFilter* path_filter, + MessagePump* message_pump, + ReportFindFilesProgress* progress, + std::string sources_dir, + bool recursive, bool relative, + size_t request_byte_threshold) + : path_filter_(path_filter), + message_pump_(message_pump), + progress_(progress), + sources_dir_(std::move(sources_dir)), + recursive_(recursive), + relative_(relative), + request_size_threshold_(request_byte_threshold) { + // (internal): Support / instead of \ in the source folder. + path::FixPathSeparators(&sources_dir_); +} + +FileFinderAndSender::~FileFinderAndSender() = default; + +absl::Status FileFinderAndSender::FindAndSendFiles(std::string source) { + // (internal): Support / instead of \ in sources. + path::FixPathSeparators(&source); + // Special case, "." and ".." should not specify the directory, but the files + // inside this directory! + if (EndsWithSpecialDir(source)) { + path::EnsureEndsWithPathSeparator(&source); + } + + // Combine |source| and |sources_dir_| if present. + std::string full_source = GetFullSource(source, sources_dir_); + + // Get the part of the path to remove before sending it to the server. + base_dir_ = GetBaseDir(full_source, sources_dir_, relative_); + + size_t prev_size = files_.size() + dirs_.size(); + + auto handler = [this](std::string dir, std::string filename, + int64_t modified_time, uint64_t size, + bool is_directory) { + return HandleFoundFileOrDir(std::move(dir), std::move(filename), + modified_time, size, is_directory); + }; + + absl::Status status = path::SearchFiles(full_source, recursive_, handler); + if (!status.ok()) { + return WrapStatus(status, + "Failed to gather source files and directories for '%s'", + full_source); + } + + if (files_.size() + dirs_.size() == prev_size) { + LOG_WARNING("Neither files nor directories found that match source '%s'", + full_source.c_str()); + // This isn't fatal. + } + + return absl::OkStatus(); +} + +absl::Status FileFinderAndSender::Flush() { + // Flush remaining files. + absl::Status status = SendFilesAndDirs(); + if (!status.ok()) { + return WrapStatus(status, "SendFilesAndDirs() failed"); + } + + // Send an empty batch as EOF indicator. + assert(request_.files_size() == 0); + status = message_pump_->SendMessage(PacketType::kAddFiles, request_); + if (!status.ok()) { + return WrapStatus(status, "Failed to send EOF indicator"); + } + + return absl::OkStatus(); +} + +void FileFinderAndSender::ReleaseFiles(std::vector* files) { + *files = std::move(files_); +} + +void FileFinderAndSender::ReleaseDirs(std::vector* dirs) { + *dirs = std::move(dirs_); +} + +absl::Status FileFinderAndSender::HandleFoundFileOrDir(std::string dir, + std::string filename, + int64_t modified_time, + uint64_t size, + bool is_directory) { + std::string relative_dir = dir.substr(base_dir_.size()); + + // Is the path excluded? Check IsEmpty() first to save the path::Join() + // if no filter is used (pretty common case). + if (!path_filter_->IsEmpty() && + !path_filter_->IsMatch(path::Join(relative_dir, filename))) { + return absl::OkStatus(); + } + if (is_directory) { + progress_->ReportDirFound(); + } else { + progress_->ReportFileFound(); + } + + if (request_.directory() != relative_dir) { + // Flush files in previous directory. + absl::Status status = SendFilesAndDirs(); + if (!status.ok()) { + return WrapStatus(status, "SendFilesAndDirs() failed"); + } + + // Set new directory. + request_.set_directory(relative_dir); + request_size_ = request_.directory().length(); + } + + if (is_directory) { + dirs_.emplace_back(path::Join(dir, filename), + static_cast(base_dir_.size())); + request_.add_dirs(filename); + request_size_ += filename.size(); + } else { + files_.emplace_back(path::Join(dir, filename), size, + static_cast(base_dir_.size())); + + AddFilesRequest::File* file = request_.add_files(); + file->set_filename(filename); + file->set_modified_time(modified_time); + file->set_size(size); + // The serialized proto might have a slightly different length due to + // packing, but this doesn't need to be exact. + request_size_ += filename.size() + sizeof(modified_time) + sizeof(size); + } + if (request_size_ >= request_size_threshold_) { + absl::Status status = SendFilesAndDirs(); + if (!status.ok()) { + return WrapStatus(status, "SendFilesAndDirs() failed"); + } + } + + return absl::OkStatus(); +} + +absl::Status FileFinderAndSender::SendFilesAndDirs() { + if (request_.files_size() == 0 && request_.dirs_size() == 0) { + return absl::OkStatus(); + } + absl::Status status = + message_pump_->SendMessage(PacketType::kAddFiles, request_); + if (!status.ok()) { + return WrapStatus(status, "Failed to send AddFilesRequest"); + } + + request_.clear_files(); + request_.clear_dirs(); + request_size_ = request_.directory().length(); + return absl::OkStatus(); +} + +} // namespace cdc_ft diff --git a/cdc_rsync/file_finder_and_sender.h b/cdc_rsync/file_finder_and_sender.h new file mode 100644 index 0000000..b1c8641 --- /dev/null +++ b/cdc_rsync/file_finder_and_sender.h @@ -0,0 +1,90 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_FILE_FINDER_AND_SENDER_H_ +#define CDC_RSYNC_FILE_FINDER_AND_SENDER_H_ + +#include +#include + +#include "absl/status/status.h" +#include "cdc_rsync/client_file_info.h" +#include "cdc_rsync/protos/messages.pb.h" + +namespace cdc_ft { + +class MessagePump; +class PathFilter; + +class ReportFindFilesProgress { + public: + virtual ~ReportFindFilesProgress() = default; + virtual void ReportFileFound() = 0; + virtual void ReportDirFound() = 0; +}; + +class FileFinderAndSender { + public: + // Send AddFileRequests in packets of roughly 10k max by default. + static constexpr size_t kDefaultRequestSizeThreshold = 10000; + + FileFinderAndSender( + PathFilter* path_filter, MessagePump* message_pump, + ReportFindFilesProgress* progress_, std::string sources_dir, + bool recursive, bool relative, + size_t request_byte_threshold = kDefaultRequestSizeThreshold); + ~FileFinderAndSender(); + + absl::Status FindAndSendFiles(std::string source); + + // Sends the remaining file batch to the client, followed by an EOF indicator. + // Should be called once all files have been deleted. + absl::Status Flush(); + + void ReleaseFiles(std::vector* files); + void ReleaseDirs(std::vector* dirs); + + private: + absl::Status HandleFoundFileOrDir(std::string dir, std::string filename, + int64_t modified_time, uint64_t size, + bool is_directory); + + // Sends the current file and directory batch to the server. + absl::Status SendFilesAndDirs(); + + PathFilter* const path_filter_; + MessagePump* const message_pump_; + ReportFindFilesProgress* const progress_; + std::string sources_dir_; + const bool recursive_; + const bool relative_; + const size_t request_size_threshold_; + + // Prefix removed from found files before they are sent to the server. + std::string base_dir_; + AddFilesRequest request_; + size_t request_size_ = 0; + + // Found files. + std::vector files_; + + // Found directories. + std::vector dirs_; +}; + +} // namespace cdc_ft + +#endif // CDC_RSYNC_FILE_FINDER_AND_SENDER_H_ diff --git a/cdc_rsync/file_finder_and_sender_test.cc b/cdc_rsync/file_finder_and_sender_test.cc new file mode 100644 index 0000000..a6d5438 --- /dev/null +++ b/cdc_rsync/file_finder_and_sender_test.cc @@ -0,0 +1,408 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync/file_finder_and_sender.h" + +#include + +#include "absl/strings/str_format.h" +#include "absl/strings/str_join.h" +#include "cdc_rsync/base/fake_socket.h" +#include "cdc_rsync/base/message_pump.h" +#include "common/log.h" +#include "common/path.h" +#include "common/path_filter.h" +#include "common/status_test_macros.h" +#include "common/test_main.h" +#include "gtest/gtest.h" + +namespace cdc_ft { +namespace { + +// Definitions to improve readability. +constexpr bool kNotRecursive = false; +constexpr bool kRecursive = true; + +constexpr bool kNotRelative = false; +constexpr bool kRelative = true; + +class FakeFindFilesProgress : public ReportFindFilesProgress { + public: + FakeFindFilesProgress() {} + void ReportFileFound() override { num_files_++; } + void ReportDirFound() override { num_dirs_++; } + + uint64_t num_files_ = 0; + uint64_t num_dirs_ = 0; +}; + +class FileFinderAndSenderTest : public ::testing::Test { + void SetUp() override { + Log::Initialize(std::make_unique(LogLevel::kInfo)); + message_pump_.StartMessagePump(); + } + + void TearDown() override { + socket_.ShutdownSendingEnd(); + message_pump_.StopMessagePump(); + Log::Shutdown(); + } + + protected: + struct ReceivedFile { + std::string dir; + std::string file; + ReceivedFile(std::string dir, std::string file) + : dir(std::move(dir)), file(std::move(file)) {} + }; + + struct ReceivedFileFormatter { + void operator()(std::string* out, const ReceivedFile& val) const { + absl::StrAppend(out, absl::StrFormat("{ %s, %s }", val.dir, val.file)); + } + }; + + void ExpectReceiveFiles(std::vector expected_data, + std::vector expected_batch_count = {}) { + std::vector data; + std::vector batch_count; + AddFilesRequest request; + for (;;) { + EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kAddFiles, &request)); + if (request.files_size() == 0 && request.dirs_size() == 0) { + // EOF. + break; + } + + batch_count.push_back(request.files_size() + request.dirs_size()); + for (const auto& file : request.files()) { + data.emplace_back(request.directory(), file.filename()); + } + for (const auto& dir : request.dirs()) { + data.emplace_back(request.directory(), dir); + } + } + + // expected_batch_count can be empty for convenience. + if (!expected_batch_count.empty()) { + EXPECT_EQ(absl::StrJoin(batch_count, ", "), + absl::StrJoin(expected_batch_count, ", ")); + } + + EXPECT_EQ(absl::StrJoin(data, ", ", ReceivedFileFormatter()), + absl::StrJoin(expected_data, ", ", ReceivedFileFormatter())); + } + + FakeSocket socket_; + FakeFindFilesProgress progress_; + PathFilter path_filter_; + MessagePump message_pump_{&socket_, MessagePump::PacketReceivedDelegate()}; + + std::string base_dir_ = GetTestDataDir("file_finder_and_sender"); +}; + +TEST_F(FileFinderAndSenderTest, FindNonRecursive) { + FileFinderAndSender finder(&path_filter_, &message_pump_, &progress_, "", + kNotRecursive, kNotRelative); + + EXPECT_OK(finder.FindAndSendFiles(base_dir_)); + EXPECT_EQ(progress_.num_files_, 0); + std::vector files; + EXPECT_OK(finder.Flush()); + finder.ReleaseFiles(&files); + + ASSERT_TRUE(files.empty()); + ExpectReceiveFiles({{}}); +} + +TEST_F(FileFinderAndSenderTest, FindRecursive) { + FileFinderAndSender finder(&path_filter_, &message_pump_, &progress_, "", + kRecursive, kNotRelative); + + EXPECT_OK(finder.FindAndSendFiles(base_dir_)); + EXPECT_EQ(progress_.num_files_, 5); + EXPECT_EQ(progress_.num_dirs_, 2); + + EXPECT_OK(finder.Flush()); + std::vector files; + finder.ReleaseFiles(&files); + std::vector dirs; + finder.ReleaseDirs(&dirs); + + ASSERT_EQ(files.size(), 5); + EXPECT_EQ(files[0].path, path::Join(base_dir_, "a.txt")); + EXPECT_EQ(files[1].path, path::Join(base_dir_, "b.txt")); + EXPECT_EQ(files[2].path, path::Join(base_dir_, "c.txt")); + EXPECT_EQ(files[3].path, path::Join(base_dir_, "subdir", "d.txt")); + EXPECT_EQ(files[4].path, path::Join(base_dir_, "subdir", "e.txt")); + + ASSERT_EQ(dirs.size(), 2); + EXPECT_EQ(dirs[0].path, base_dir_); + EXPECT_EQ(dirs[1].path, path::Join(base_dir_, "subdir")); + + // Verify that the data sent to the socket matches. + ExpectReceiveFiles({{"", "file_finder_and_sender"}, + {"file_finder_and_sender\\", "a.txt"}, + {"file_finder_and_sender\\", "b.txt"}, + {"file_finder_and_sender\\", "c.txt"}, + {"file_finder_and_sender\\", "subdir"}, + {"file_finder_and_sender\\subdir\\", "d.txt"}, + {"file_finder_and_sender\\subdir\\", "e.txt"}}, + {1, 4, 2}); +} + +TEST_F(FileFinderAndSenderTest, FindWithSmallerBatchSize) { + // Tweak size threshold so that we get 2 files per batch for the base dir. + // This tests that the batch gets flushed when the directory changes since + // the base dir has 3 files. + int request_byte_size_threshold = static_cast( + strlen("file_finder_and_sender\\") + // directory + sizeof(int64_t) + sizeof(uint64_t) + // modified_time + size + strlen("a.txt") + 3); // filename + some slack + FileFinderAndSender finder(&path_filter_, &message_pump_, &progress_, "", + kRecursive, kNotRelative, + request_byte_size_threshold); + + EXPECT_OK(finder.FindAndSendFiles(base_dir_)); + EXPECT_OK(finder.Flush()); + EXPECT_EQ(progress_.num_files_, 5); + ASSERT_EQ(progress_.num_dirs_, 2); + + // Note that the expected batch size is {1, 2, 2, 1, 1} here due to the + // smaller request sizes. + ExpectReceiveFiles({{"", "file_finder_and_sender"}, + {"file_finder_and_sender\\", "a.txt"}, + {"file_finder_and_sender\\", "b.txt"}, + {"file_finder_and_sender\\", "c.txt"}, + {"file_finder_and_sender\\", "subdir"}, + {"file_finder_and_sender\\subdir\\", "d.txt"}, + {"file_finder_and_sender\\subdir\\", "e.txt"}}, + {1, 2, 2, 1, 1}); +} + +TEST_F(FileFinderAndSenderTest, FindWithFilter) { + path_filter_.AddRule(PathFilter::Rule::Type::kExclude, "*b.txt"); + FileFinderAndSender finder(&path_filter_, &message_pump_, &progress_, "", + kNotRecursive, kNotRelative); + + EXPECT_OK(finder.FindAndSendFiles(path::Join(base_dir_, "*"))); + EXPECT_EQ(progress_.num_files_, 2); + EXPECT_EQ(progress_.num_dirs_, 1); + + EXPECT_OK(finder.Flush()); + std::vector files; + finder.ReleaseFiles(&files); + std::vector dirs; + finder.ReleaseDirs(&dirs); + + ASSERT_EQ(files.size(), 2); + EXPECT_EQ(files[0].path, path::Join(base_dir_, "a.txt")); + EXPECT_EQ(files[1].path, path::Join(base_dir_, "c.txt")); + + ASSERT_EQ(dirs.size(), 1); + EXPECT_EQ(dirs[0].path, path::Join(base_dir_, "subdir")); + + ExpectReceiveFiles({{"", "a.txt"}, {"", "c.txt"}, {"", "subdir"}}); +} + +TEST_F(FileFinderAndSenderTest, FindWithDot) { + FileFinderAndSender finder(&path_filter_, &message_pump_, &progress_, "", + kRecursive, kNotRelative); + + EXPECT_OK(finder.FindAndSendFiles(base_dir_ + "\\.")); + EXPECT_EQ(progress_.num_files_, 5); + EXPECT_EQ(progress_.num_dirs_, 1); + + EXPECT_OK(finder.Flush()); + + ExpectReceiveFiles({{"", "a.txt"}, + {"", "b.txt"}, + {"", "c.txt"}, + {"", "subdir"}, + {"subdir\\", "d.txt"}, + {"subdir\\", "e.txt"}}, + {}); +} + +TEST_F(FileFinderAndSenderTest, FindWithForwardSlash) { + FileFinderAndSender finder(&path_filter_, &message_pump_, &progress_, "", + kRecursive, kNotRelative); + + std::string base_dir_forward = GetTestDataDir("file_finder_and_sender"); + std::replace(base_dir_forward.begin(), base_dir_forward.end(), '\\', '/'); + + EXPECT_OK(finder.FindAndSendFiles(base_dir_forward + "/.")); + EXPECT_EQ(progress_.num_files_, 5); + EXPECT_EQ(progress_.num_dirs_, 1); + + EXPECT_OK(finder.Flush()); + + ExpectReceiveFiles({{"", "a.txt"}, + {"", "b.txt"}, + {"", "c.txt"}, + {"", "subdir"}, + {"subdir\\", "d.txt"}, + {"subdir\\", "e.txt"}}, + {}); +} + +TEST_F(FileFinderAndSenderTest, FindWithRelative) { + FileFinderAndSender finder(&path_filter_, &message_pump_, &progress_, "", + kNotRecursive, kRelative); + + std::vector sources = { + path::Join(base_dir_, "a.txt"), path::Join(base_dir_, ".", "b.txt"), + path::Join(base_dir_, ".", "subdir", "d.txt"), + path::Join(base_dir_, "subdir", ".", "e.txt")}; + + for (const std::string& source : sources) { + EXPECT_OK(finder.FindAndSendFiles(source)); + } + + EXPECT_EQ(progress_.num_files_, 4); + std::vector files; + EXPECT_OK(finder.Flush()); + finder.ReleaseFiles(&files); + + ASSERT_EQ(files.size(), 4); + EXPECT_EQ(files[0].path, sources[0]); + EXPECT_EQ(files[1].path, sources[1]); + EXPECT_EQ(files[2].path, sources[2]); + EXPECT_EQ(files[3].path, sources[3]); + + // The paths sent to the socket should have the correct relative paths. + std::string rel = + base_dir_.substr(path::GetDrivePrefix(base_dir_).size() + 1) + "\\"; + + ExpectReceiveFiles( + {{rel, "a.txt"}, {"", "b.txt"}, {"subdir\\", "d.txt"}, {"", "e.txt"}}); +} + +TEST_F(FileFinderAndSenderTest, FindWithRelativeAndSourcesDir) { + // Just go to the parent directory, we just need some existing dir. + std::string sources_dir = path::DirName(base_dir_); + path::EnsureEndsWithPathSeparator(&sources_dir); + std::string rel_dir = base_dir_.substr(sources_dir.size()); + + FileFinderAndSender finder(&path_filter_, &message_pump_, &progress_, + sources_dir, kNotRecursive, kRelative); + + std::vector sources = { + path::Join(rel_dir, "a.txt"), path::Join(rel_dir, ".", "b.txt"), + path::Join(rel_dir, "subdir", "d.txt"), + path::Join(rel_dir, "subdir", ".", "e.txt")}; + + for (const std::string& source : sources) { + EXPECT_OK(finder.FindAndSendFiles(source)); + } + + EXPECT_EQ(progress_.num_files_, 4); + std::vector files; + EXPECT_OK(finder.Flush()); + finder.ReleaseFiles(&files); + + ASSERT_EQ(files.size(), 4); + EXPECT_EQ(files[0].path, path::Join(sources_dir, sources[0])); + EXPECT_EQ(files[1].path, path::Join(sources_dir, sources[1])); + EXPECT_EQ(files[2].path, path::Join(sources_dir, sources[2])); + EXPECT_EQ(files[3].path, path::Join(sources_dir, sources[3])); + + path::EnsureEndsWithPathSeparator(&rel_dir); + ExpectReceiveFiles({{rel_dir, "a.txt"}, + {"", "b.txt"}, + {rel_dir + "subdir\\", "d.txt"}, + {"", "e.txt"}}); +} + +TEST_F(FileFinderAndSenderTest, + FindWithRelativeAndSourcesDirForwardSlashInSouceDir) { + // Just go to the parent directory, we just need some existing dir. + std::string sources_dir = path::DirName(base_dir_); + path::EnsureEndsWithPathSeparator(&sources_dir); + std::string rel_dir = base_dir_.substr(sources_dir.size()); + + std::string sources_dir_forward(sources_dir); + std::replace(sources_dir_forward.begin(), sources_dir_forward.end(), '\\', + '/'); + FileFinderAndSender finder(&path_filter_, &message_pump_, &progress_, + sources_dir_forward, kNotRecursive, kRelative); + + std::vector sources = { + path::Join(rel_dir, "a.txt"), path::Join(rel_dir, ".", "b.txt"), + path::Join(rel_dir, "subdir", "d.txt"), + path::Join(rel_dir, "subdir", ".", "e.txt")}; + + for (const std::string& source : sources) { + EXPECT_OK(finder.FindAndSendFiles(source)); + } + + EXPECT_EQ(progress_.num_files_, 4); + std::vector files; + EXPECT_OK(finder.Flush()); + finder.ReleaseFiles(&files); + + ASSERT_EQ(files.size(), 4); + EXPECT_EQ(files[0].path, path::Join(sources_dir, sources[0])); + EXPECT_EQ(files[1].path, path::Join(sources_dir, sources[1])); + EXPECT_EQ(files[2].path, path::Join(sources_dir, sources[2])); + EXPECT_EQ(files[3].path, path::Join(sources_dir, sources[3])); + + path::EnsureEndsWithPathSeparator(&rel_dir); + ExpectReceiveFiles({{rel_dir, "a.txt"}, + {"", "b.txt"}, + {rel_dir + "subdir\\", "d.txt"}, + {"", "e.txt"}}); +} +TEST_F(FileFinderAndSenderTest, + FindWithRelativeAndSourcesDirForwardSlashInSourceDirFiles) { + // Just go to the parent directory, we just need some existing dir. + std::string sources_dir = path::DirName(base_dir_); + path::EnsureEndsWithPathSeparator(&sources_dir); + std::string rel_dir = base_dir_.substr(sources_dir.size()); + + std::string sources_dir_forward(sources_dir); + std::replace(sources_dir_forward.begin(), sources_dir_forward.end(), '\\', + '/'); + FileFinderAndSender finder(&path_filter_, &message_pump_, &progress_, + sources_dir_forward, kNotRecursive, kRelative); + + std::vector sources = {rel_dir + "/a.txt", rel_dir + "/./b.txt", + rel_dir + "/subdir/d.txt", + rel_dir + "/subdir/./e.txt"}; + + for (const std::string& source : sources) { + EXPECT_OK(finder.FindAndSendFiles(source)); + } + + EXPECT_EQ(progress_.num_files_, 4); + std::vector files; + EXPECT_OK(finder.Flush()); + finder.ReleaseFiles(&files); + + ASSERT_EQ(files.size(), 4); + for (std::string& source : sources) path::FixPathSeparators(&source); + EXPECT_EQ(files[0].path, path::Join(sources_dir, sources[0])); + EXPECT_EQ(files[1].path, path::Join(sources_dir, sources[1])); + EXPECT_EQ(files[2].path, path::Join(sources_dir, sources[2])); + EXPECT_EQ(files[3].path, path::Join(sources_dir, sources[3])); + + path::EnsureEndsWithPathSeparator(&rel_dir); + ExpectReceiveFiles({{rel_dir, "a.txt"}, + {"", "b.txt"}, + {rel_dir + "subdir\\", "d.txt"}, + {"", "e.txt"}}); +} + +} // namespace +} // namespace cdc_ft diff --git a/cdc_rsync/parallel_file_opener.cc b/cdc_rsync/parallel_file_opener.cc new file mode 100644 index 0000000..4dd0f85 --- /dev/null +++ b/cdc_rsync/parallel_file_opener.cc @@ -0,0 +1,122 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync/parallel_file_opener.h" + +#include "absl/status/statusor.h" +#include "common/path.h" + +namespace cdc_ft { +namespace { + +// Number of threads in the pool. +size_t GetPoolSize() { + uint32_t num_threads = std::thread::hardware_concurrency(); + if (num_threads == 0) return 4; + return num_threads; +} + +// Number of file open operations to queue in advance. +const size_t kNumQueuedTasks = 256; + +} // namespace + +namespace internal { + +class FileOpenTask : public Task { + public: + FileOpenTask(size_t index, ClientFileInfo file) + : index_(index), file_(file) {} + + ~FileOpenTask() { + if (*fp_) { + fclose(*fp_); + *fp_ = nullptr; + } + } + + FileOpenTask(const FileOpenTask& other) = delete; + FileOpenTask(const FileOpenTask&& other) = delete; + + FileOpenTask& operator=(FileOpenTask&) = delete; + FileOpenTask& operator=(FileOpenTask&&) = delete; + + void ThreadRun(IsCancelledPredicate is_cancelled) override { + fp_ = path::OpenFile(file_.path, "rb"); + } + + size_t Index() const { return index_; } + + FILE* ReleaseFile() { + FILE* fp = *fp_; + *fp_ = nullptr; + return fp; + } + + private: + size_t index_; + ClientFileInfo file_; + absl::StatusOr fp_ = nullptr; +}; + +} // namespace internal + +ParallelFileOpener::ParallelFileOpener( + const std::vector* files, + const std::vector& file_indices) + : files_(files), file_indices_(file_indices), pool_(GetPoolSize()) { + // Queue the first |kNumQueuedTasks| files (if available). + size_t num_to_queue = std::min(kNumQueuedTasks, file_indices_.size()); + for (size_t n = 0; n < num_to_queue; ++n) { + QueueNextFile(); + } +} + +ParallelFileOpener::~ParallelFileOpener() = default; + +FILE* ParallelFileOpener::GetNextOpenFile() { + if (curr_index_ >= file_indices_.size()) { + return nullptr; + } + + QueueNextFile(); + + // Wait until the file at |curr_index_| is available. + // Note that |index_to_completed_tasks_| is sorted by index. + while (index_to_completed_tasks_.empty() || + index_to_completed_tasks_.begin()->first != curr_index_) { + std::unique_ptr task = pool_.GetCompletedTask(); + auto* fopen_task = static_cast(task.release()); + index_to_completed_tasks_[fopen_task->Index()].reset(fopen_task); + } + + // The first completed task should be the one for |curr_index_|. + const auto& first_iter = index_to_completed_tasks_.begin(); + FILE* file = first_iter->second->ReleaseFile(); + index_to_completed_tasks_.erase(first_iter); + curr_index_++; + return file; +} + +void ParallelFileOpener::QueueNextFile() { + if (look_ahead_index_ >= file_indices_.size()) { + return; + } + + pool_.QueueTask(std::make_unique( + look_ahead_index_, files_->at(file_indices_[look_ahead_index_]))); + ++look_ahead_index_; +} + +} // namespace cdc_ft diff --git a/cdc_rsync/parallel_file_opener.h b/cdc_rsync/parallel_file_opener.h new file mode 100644 index 0000000..b8d5c45 --- /dev/null +++ b/cdc_rsync/parallel_file_opener.h @@ -0,0 +1,74 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_PARALLEL_FILE_OPENER_H_ +#define CDC_RSYNC_PARALLEL_FILE_OPENER_H_ + +#include +#include +#include + +#include "cdc_rsync/client_file_info.h" +#include "common/threadpool.h" + +namespace cdc_ft { + +namespace internal { +class FileOpenTask; +} + +// Opens files on a background worker thread pool. This improves performance in +// cases where many files have to be opened quickly. +class ParallelFileOpener { + public: + // Starts opening the |files| indexed by |file_indices| in the background. + ParallelFileOpener(const std::vector* files, + const std::vector& file_indices); + + ~ParallelFileOpener(); + + // Returns FILE* pointer from opening a file from |files| in rb-mode. + // The first call returns the FILE* pointer for files[file_indices[0]], + // the second call returns the FILE* pointer for files[file_indices[1]] etc. + // The caller must close the file. + FILE* GetNextOpenFile(); + + private: + // Queues another open file task. + void QueueNextFile(); + + // Pointer to list of files, not owned. + const std::vector* files_; + + // Indices into the |files_| to open. + std::vector file_indices_; + + // Index into |file_indices_| of the next file returned by GetNextOpenFile(). + size_t curr_index_ = 0; + + // Index into |file_indices_| of the file queued by QueueNextFile(). + size_t look_ahead_index_ = 0; + + // Maps index into |file_indices_| to completed task. + std::map> + index_to_completed_tasks_; + + Threadpool pool_; +}; + +} // namespace cdc_ft + +#endif // CDC_RSYNC_PARALLEL_FILE_OPENER_H_ diff --git a/cdc_rsync/parallel_file_opener_test.cc b/cdc_rsync/parallel_file_opener_test.cc new file mode 100644 index 0000000..4eed827 --- /dev/null +++ b/cdc_rsync/parallel_file_opener_test.cc @@ -0,0 +1,78 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync/parallel_file_opener.h" + +#include "common/path.h" +#include "common/test_main.h" +#include "gtest/gtest.h" + +namespace cdc_ft { +namespace { + +class ParallelFileOpenerTest : public ::testing::Test { + protected: + std::string base_dir_ = GetTestDataDir("parallel_file_opener"); + + // Args 2 (file size) and 3 (base dir len) are not used. + std::vector files_ = { + ClientFileInfo(path::Join(base_dir_, "file1.txt"), 0, 0), + ClientFileInfo(path::Join(base_dir_, "file2.txt"), 0, 0), + ClientFileInfo(path::Join(base_dir_, "file3.txt"), 0, 0), + }; + + std::string ReadAndClose(FILE* file) { + char line[256] = {0}; + EXPECT_TRUE(fgets(line, sizeof(line) - 1, file)); + fclose(file); + return line; + } +}; + +TEST_F(ParallelFileOpenerTest, OpenNoFiles) { + ParallelFileOpener file_opener(&files_, {}); + EXPECT_EQ(file_opener.GetNextOpenFile(), nullptr); +} + +TEST_F(ParallelFileOpenerTest, OpenSingleFile) { + ASSERT_GE(files_.size(), 3); + ParallelFileOpener file_opener(&files_, {1}); + + FILE* file2 = file_opener.GetNextOpenFile(); + ASSERT_NE(file2, nullptr); + EXPECT_EQ(ReadAndClose(file2), "data2"); + + EXPECT_EQ(file_opener.GetNextOpenFile(), nullptr); +} + +TEST_F(ParallelFileOpenerTest, OpenManyFiles) { + const int num_indices = 500; + std::vector indices; + for (int n = 0; n < num_indices; ++n) { + indices.push_back(n % files_.size()); + } + + ParallelFileOpener file_opener(&files_, indices); + + for (int n = 0; n < num_indices; ++n) { + FILE* file = file_opener.GetNextOpenFile(); + ASSERT_NE(file, nullptr); + EXPECT_EQ(ReadAndClose(file), "data" + std::to_string(indices[n] + 1)); + } + + EXPECT_EQ(file_opener.GetNextOpenFile(), nullptr); +} + +} // namespace +} // namespace cdc_ft diff --git a/cdc_rsync/progress_tracker.cc b/cdc_rsync/progress_tracker.cc new file mode 100644 index 0000000..964b7bd --- /dev/null +++ b/cdc_rsync/progress_tracker.cc @@ -0,0 +1,549 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync/progress_tracker.h" + +#include +#include + +#include "absl/strings/str_format.h" +#include "common/util.h" +#include "json/json.h" + +namespace cdc_ft { + +namespace { + +// Count signature progress 1/40 because it's probably faster than the rest. +// This assumes a sig speed of 800 MB/sec and a diffing speed of 20 MB/sec, +// but since we don't know the exact numbers, we're estimating them. +constexpr int kSigFactor = 40; + +// Fills up |str| with spaces up to a string length of |size|. +void PaddRight(std::string* str, size_t size) { + if (str->size() < size) { + str->insert(str->size(), size - str->size(), ' '); + } +} + +// Shortens |filepath| if it is longer than |max_len|. +// TODO: Improve this, e.g. by replacing directories in the middle by "..". +// TODO: Also make sure it plays nicely with UTF-8 code points. +std::string ShortenFilePath(const std::string filepath, size_t max_len) { + if (filepath.size() > max_len && max_len >= 3) { + return "..." + filepath.substr(filepath.size() - max_len + 3); + } + return filepath; +} + +// Formatting might be messed up for >9999 ZB. Please don't sync large files! +const char* kSizeUnits[] = {"B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB"}; + +// Divides |size| by 1024 as long as it has more than 4 digits and returns the +// corresponding unit, e.g. 10240 -> 10KB. +const char* FormatIntBytes(uint64_t* size) { + int unitIdx = 0; + + while (*size > 9999 && unitIdx < std::size(kSizeUnits) - 1) { + ++unitIdx; + *size /= 1024; + } + + return kSizeUnits[unitIdx]; +} + +// Divides |size| by 1024 as long as it has more than 3 digits and returns the +// corresponding unit, e.g. 1500 -> 1.5KB. +const char* FormatDoubleBytes(double* size) { + int unitIdx = 0; + + while (*size > 999.9 && unitIdx < std::size(kSizeUnits) - 1) { + ++unitIdx; + *size /= 1024.0; + } + + return kSizeUnits[unitIdx]; +} + +// Formats |sec| seconds into hh::mm:ss if more than one hour or else mm:ss. +std::string FormatTime(double sec) { + int isec = static_cast(sec); + int ihour = isec / 3600; + isec -= ihour * 3600; + int imin = isec / 60; + isec -= imin * 60; + + if (ihour > 0) { + return absl::StrFormat("%02i:%02i:%02i", ihour, imin, isec); + } + + return absl::StrFormat("%02i:%02i", imin, isec); +} + +// Returns curr/total as double number, clamped to [0,1]. +double GetProgress(uint64_t curr, uint64_t total) { + double progress = static_cast(curr) / std::max(1, total); + return std::min(std::max(progress, 0.0), 1.0); +} + +} // namespace + +ConsoleProgressPrinter::ConsoleProgressPrinter(bool quiet, bool is_tty) + : ProgressPrinter(quiet, is_tty) {} + +void ConsoleProgressPrinter::Print(std::string text, bool newline, + int output_width) { + if (quiet()) { + return; + } + + char linechar = newline || !is_tty() ? '\n' : '\r'; + if (is_tty()) PaddRight(&text, output_width); + printf("%s%c", text.c_str(), linechar); + if (!is_tty()) fflush(stdout); +} + +ProgressTracker::ProgressTracker(ProgressPrinter* printer, int verbosity, + bool json, int fixed_output_width, + SteadyClock* clock) + : printer_(printer), + only_total_progress_(verbosity == 0), + json_(json), + fixed_output_width_(fixed_output_width), + display_delay_sec_(printer_->is_tty() ? 0.1 : 1.0), + total_timer_(clock), + file_timer_(clock), + sig_timer_(clock), + print_timer_(clock) {} + +ProgressTracker::~ProgressTracker() = default; + +void ProgressTracker::StartFindFiles() { + assert(state_ == State::kIdle); + state_ = State::kSearch; + + files_found_ = 0; +} + +void ProgressTracker::ReportFileFound() { + assert(state_ == State::kSearch); + ++files_found_; + + UpdateOutput(false); +} + +void ProgressTracker::ReportDirFound() { + assert(state_ == State::kSearch); + ++dirs_found_; + + UpdateOutput(false); +} + +void ProgressTracker::ReportFileStats( + uint32_t num_missing_files, uint32_t num_extraneous_files, + uint32_t num_matching_files, uint32_t num_changed_files, + uint64_t total_missing_bytes, uint64_t total_changed_client_bytes, + uint64_t total_changed_server_bytes, uint32_t num_missing_dirs, + uint32_t num_extraneous_dirs, uint32_t num_matching_dirs, + bool whole_file_arg, bool checksum_arg, bool delete_arg) { + const char* fmt[] = { + "%6u file(s) and %u folder(s) are not present on the instance and will " + "be copied.", + "%6u file(s) changed and will be updated.", + "%6u file(s) and %u folder(s) match and do not have to be updated.", + "%6u file(s) and %u folder(s) on the instance do not exist on this " + "machine."}; + + if (whole_file_arg) { + fmt[1] = "%6u file(s) changed and will be copied due to -W/--whole-file."; + } + + if (checksum_arg) { + fmt[2] = + "%6u file(s) and %u folder(s) have matching modified time and size, " + "but will be synced due to -c/--checksum."; + } + + if (checksum_arg & whole_file_arg) { + fmt[2] = + "%6u file(s) and %u folder(s) have matching modified time and size, " + "but will be copied due to -c/--checksum and -W/--whole-file."; + } + + if (delete_arg) { + fmt[3] = + "%6u file(s) and %u folder(s) on the instance do not exist on this " + "machine and will be deleted due to --delete."; + } + + Print(absl::StrFormat(fmt[0], num_missing_files, num_missing_dirs), true); + Print(absl::StrFormat(fmt[1], num_changed_files), true); + Print(absl::StrFormat(fmt[2], num_matching_files, num_matching_dirs), true); + Print(absl::StrFormat(fmt[3], num_extraneous_files, num_extraneous_dirs), + true); + + total_bytes_to_copy_ = total_missing_bytes; + total_bytes_to_diff_ = total_changed_client_bytes; + total_sig_bytes_ = total_changed_server_bytes; + total_files_to_delete_ = num_extraneous_files; + total_dirs_to_delete_ = num_extraneous_dirs; +} + +void ProgressTracker::StartCopy(const std::string& filepath, + uint64_t filesize) { + assert(state_ == State::kIdle); + state_ = State::kCopy; + file_timer_.Reset(); + sig_time_sec_ = 0; + + curr_filepath_ = filepath; + curr_filesize_ = filesize; + curr_bytes_copied_ = 0; +} + +void ProgressTracker::ReportCopyProgress(uint64_t num_bytes_copied) { + assert(state_ == State::kCopy); + curr_bytes_copied_ += num_bytes_copied; + total_bytes_copied_ += num_bytes_copied; + + UpdateOutput(false); +} + +void ProgressTracker::StartSync(const std::string& filepath, + uint64_t client_size, uint64_t server_size) { + assert(state_ == State::kIdle); + state_ = State::kSyncDiff; + file_timer_.Reset(); + sig_timer_.Reset(); + sig_time_sec_ = 0; + + curr_filepath_ = filepath; + curr_filesize_ = client_size; + server_filesize_ = server_size; + curr_sig_bytes_read_ = 0; + curr_bytes_diffed_ = 0; +} + +void ProgressTracker::ReportSyncProgress(size_t num_client_bytes_processed, + size_t num_server_bytes_processed) { + assert(state_ == State::kSyncSig || state_ == State::kSyncDiff); + + // If diffing is blocked on getting more server chunks, switch to kSyncSig, + // which effectively changes the output from "Dxxx%" to "Sxxx%" and removes + // the ETA. + State new_state = state_; + if (num_client_bytes_processed > 0) { + new_state = State::kSyncDiff; + } else if (num_server_bytes_processed > 0) { + new_state = State::kSyncSig; + } + + // Measure time exclusively spent in server-side signature computation. This + // is later taken into account to get a better speed estimation for diffing. + if (state_ == State::kSyncDiff && new_state == State::kSyncSig) { + sig_timer_.Reset(); + } else if (state_ == State::kSyncSig && new_state == State::kSyncDiff) { + sig_time_sec_ += sig_timer_.ElapsedSeconds(); + } + state_ = new_state; + + curr_bytes_diffed_ += num_client_bytes_processed; + total_bytes_diffed_ += num_client_bytes_processed; + curr_sig_bytes_read_ += num_server_bytes_processed; + total_sig_bytes_read_ += num_server_bytes_processed; + + UpdateOutput(false); +} + +void ProgressTracker::StartDeleteFiles() { + assert(state_ == State::kIdle); + state_ = State::kDelete; + + files_deleted_ = 0; +} + +void ProgressTracker::ReportFileDeleted(const std::string& filepath) { + curr_filepath_ = filepath; + ++files_deleted_; + + UpdateOutput(false); +} + +void ProgressTracker::ReportDirDeleted(const std::string& filepath) { + curr_filepath_ = filepath; + ++dirs_deleted_; + + UpdateOutput(false); +} + +void ProgressTracker::Finish() { + assert(state_ != State::kIdle); + + UpdateOutput(true); + if (state_ == State::kSearch) { + // Total time does not count file search time. + total_timer_.Reset(); + } + + state_ = State::kIdle; +} + +void ProgressTracker::UpdateOutput(bool finished) { + if (printer_->quiet()) { + return; + } + + if (state_ == State::kDelete) { + if (total_files_to_delete_ + total_dirs_to_delete_ == 0 || + (!only_total_progress_ && finished)) { + // No need to print here, it would result in "0/0" or duplicate lines. + return; + } + + if (only_total_progress_) { + if (!finished && print_timer_.ElapsedSeconds() < display_delay_sec_) { + return; + } + print_timer_.Reset(); + + Print(absl::StrFormat("%u/%u file(s) and %u/%u folder(s) deleted.", + files_deleted_, total_files_to_delete_, + dirs_deleted_, total_dirs_to_delete_), + finished); + return; + } + + std::string txt = + absl::StrFormat("deleted %u / %u", files_deleted_ + dirs_deleted_, + total_files_to_delete_ + total_dirs_to_delete_); + + int width = GetOutputWidth(); + int file_width = std::max(12, width - static_cast(txt.size())); + std::string short_path = ShortenFilePath(curr_filepath_, file_width); + PaddRight(&short_path, file_width); + printer_->Print(short_path + txt, true, width); + return; + } + + if (only_total_progress_ && state_ != State::kSearch) { + finished &= GetTotalProgress() == 1; + if (!finished && print_timer_.ElapsedSeconds() < display_delay_sec_) { + return; + } + print_timer_.Reset(); + + if (json_) { + double total_progress, total_sec, total_eta_sec; + GetTotalProgressStats(&total_progress, &total_sec, &total_eta_sec); + + Json::Value val; + val["total_progress"] = total_progress; + val["total_duration"] = total_sec; + val["total_eta"] = total_eta_sec; + PrintJson(val, finished); + } else { + Print(GetTotalProgressText(), finished); + } + return; + } + + // Always print if finished (to make sure to get the line feed). + if (!finished && print_timer_.ElapsedSeconds() < display_delay_sec_) { + return; + } + print_timer_.Reset(); + + switch (state_) { + case State::kSearch: { + Print(absl::StrFormat("%u file(s) and %u folder(s) found", files_found_, + dirs_found_), + finished); + break; + } + + case State::kCopy: { + // file C 50% 12345MB 123.4MB/s 00:10 ETA 50% TOT 05:00 ETA + double progress = GetProgress(curr_bytes_copied_, curr_filesize_); + OutputFileProgress(OutputType::kCopy, progress, finished); + break; + } + + case State::kSyncSig: { + // file S 50% 12345MB ---.-MB/s 00:10 ETA 50% TOT 05:00 ETA + double progress = + GetProgress(curr_bytes_diffed_ + curr_sig_bytes_read_ / kSigFactor, + curr_filesize_ + server_filesize_ / kSigFactor); + OutputFileProgress(OutputType::kSig, progress, finished); + break; + } + + case State::kSyncDiff: { + // file D 50% 12345MB 123.4MB/s 00:10 ETA 50% TOT 05:00 ETA + double progress = + GetProgress(curr_bytes_diffed_ + curr_sig_bytes_read_ / kSigFactor, + curr_filesize_ + server_filesize_ / kSigFactor); + OutputFileProgress(OutputType::kDiff, progress, finished); + break; + } + + case State::kDelete: + // Should have been handled above. + assert(false); + case State::kIdle: + break; + } +} + +void ProgressTracker::OutputFileProgress(OutputType type, double progress, + bool finished) const { + // Just in case some calculation wasn't 100% right. + if (finished) { + progress = 1.0; + } + + double file_sec = file_timer_.ElapsedSeconds(); + double file_eta_sec = file_sec / std::max(1e-3, progress) - file_sec; + double filespeed = 0; + + // Don't bother trying to estimate transfer speed for signature. + if (type == OutputType::kCopy) { + filespeed = curr_filesize_ * progress / std::max(1e-3, file_sec); + } else if (type == OutputType::kDiff) { + // Take the sig time into account to estimate the effective transfer speed, + // using the actual diffing progress, not the combined diffing + signing + // progress. + double diff_progress = GetProgress(curr_bytes_diffed_, curr_filesize_); + double diff_sec = file_sec - sig_time_sec_; + double final_file_sec = + diff_sec / std::max(1e-3, diff_progress) + sig_time_sec_; + filespeed = curr_filesize_ / std::max(1e-3, final_file_sec); + } + + if (json_) { + const char* op = type == OutputType::kCopy ? "Copy" + : type == OutputType::kSig ? "Sign" + : "Diff"; + + double total_progress, total_sec, total_eta_sec; + GetTotalProgressStats(&total_progress, &total_sec, &total_eta_sec); + + Json::Value val; + val["file"] = curr_filepath_; + val["operation"] = op; + val["size"] = curr_filesize_; + val["bytes_per_second"] = filespeed; + val["duration"] = file_sec; + val["eta"] = file_eta_sec; + val["total_progress"] = total_progress; + val["total_duration"] = total_sec; + val["total_eta"] = total_eta_sec; + PrintJson(val, finished); + } else { + char ch = type == OutputType::kCopy ? 'C' + : type == OutputType::kSig ? 'S' + : 'D'; + + int progress_percent = static_cast(progress * 100); + + uint64_t filesize = curr_filesize_; + const char* filesize_unit = FormatIntBytes(&filesize); + std::string filetime_str = FormatTime(file_sec); + + const char* filespeed_unit = "B"; + std::string filespeed_str = "---.-"; + + // Don't bother trying to estimate transfer speed for signature. + if (type != OutputType::kSig) { + filespeed_unit = FormatDoubleBytes(&filespeed); + filespeed_str = absl::StrFormat("%5.1f", filespeed); + } + + const char* fileeta_str = " "; + if (progress < 1.0) { + // While in progress, time is an ETA. + filetime_str = FormatTime(file_eta_sec); + fileeta_str = "ETA"; + } + + // file S 50% 12345MB ---.-MB/s --:-- ETA 50% TOT 005:00 ETA + std::string txt = absl::StrFormat( + " %c%3i%% %5i%-2s " + "%s%-2s/s %s %3s " + "%s", + ch, progress_percent, filesize, filesize_unit, filespeed_str.c_str(), + filespeed_unit, filetime_str.c_str(), fileeta_str, + GetTotalProgressText().c_str()); + + int width = GetOutputWidth(); + int file_width = std::max(12, width - static_cast(txt.size())); + std::string short_path = ShortenFilePath(curr_filepath_, file_width); + PaddRight(&short_path, file_width); + + printer_->Print(short_path + txt, finished, width); + } +} + +void ProgressTracker::Print(std::string text, bool finished) const { + if (printer_->quiet()) return; + + printer_->Print(std::move(text), finished, GetOutputWidth()); +} + +void ProgressTracker::PrintJson(const Json::Value& val, bool finished) const { + if (printer_->quiet()) return; + + Json::FastWriter writer; + std::string json = writer.write(val); + if (!json.empty() && json.back() == '\n') json.pop_back(); + printer_->Print(json, finished, 0); +} + +double ProgressTracker::GetTotalProgress() const { + return GetProgress(total_bytes_copied_ + total_bytes_diffed_ + + total_sig_bytes_read_ / kSigFactor, + total_bytes_to_copy_ + total_bytes_to_diff_ + + total_sig_bytes_ / kSigFactor); +} + +void ProgressTracker::GetTotalProgressStats(double* total_progress, + double* total_sec, + double* total_eta_sec) const { + *total_progress = GetTotalProgress(); + *total_sec = total_timer_.ElapsedSeconds(); + *total_eta_sec = *total_sec / std::max(1e-3, *total_progress) - *total_sec; +} + +std::string ProgressTracker::GetTotalProgressText() const { + double total_progress, total_sec, total_eta_sec; + GetTotalProgressStats(&total_progress, &total_sec, &total_eta_sec); + int total_progress_percent = static_cast(total_progress * 100); + + const char* total_eta_str = " "; + if (total_progress < 1.0) { + // total_sec is an ETA. + total_sec = total_eta_sec; + total_eta_str = "ETA"; + } + std::string total_sec_str = FormatTime(total_sec); + + return absl::StrFormat("%3i%% TOT %s %s", total_progress_percent, + total_sec_str.c_str(), total_eta_str); +} + +int ProgressTracker::GetOutputWidth() const { + return fixed_output_width_ > 0 ? fixed_output_width_ + : Util::GetConsoleWidth(); +} + +} // namespace cdc_ft diff --git a/cdc_rsync/progress_tracker.h b/cdc_rsync/progress_tracker.h new file mode 100644 index 0000000..893d649 --- /dev/null +++ b/cdc_rsync/progress_tracker.h @@ -0,0 +1,244 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_PROGRESS_TRACKER_H_ +#define CDC_RSYNC_PROGRESS_TRACKER_H_ + +#include + +#include "cdc_rsync/base/cdc_interface.h" +#include "cdc_rsync/file_finder_and_sender.h" +#include "common/stopwatch.h" + +namespace Json { +class Value; +} + +namespace cdc_ft { + +class ProgressPrinter { + public: + ProgressPrinter(bool quiet, bool is_tty) : quiet_(quiet), is_tty_(is_tty) {} + virtual ~ProgressPrinter() = default; + + virtual void Print(std::string text, bool newline, int output_width) = 0; + + bool quiet() const { return quiet_; } + bool is_tty() const { return is_tty_; } + + private: + const bool quiet_; + const bool is_tty_; +}; + +class ConsoleProgressPrinter : public ProgressPrinter { + public: + ConsoleProgressPrinter(bool quiet, bool is_tty); + + // Prints |text| to stdout. Adds a line feed (\n) if |newline| is true or + // is_tty() is false (e.g. logging to a file). Otherwise, just adds a carriage + // return (\r), so that the next call to Output overwrites the current line. + // Fills the rest of the line up to |output_width| characters with spaces to + // properly overwrite the last line. + // No-op if quiet(). + void Print(std::string text, bool newline, int output_width) override; +}; + +// Tracks progress of the various stages of rsync and displays them in a human- +// readable manner. +class ProgressTracker : public ReportCdcProgress, + public ReportFindFilesProgress { + public: + // |verbosity| (number of -v arguments) impacts the display verbosity. + // 0 only prints total progress and ETA/time. + // 1 prints per-file process and ETA/time. + // |json| prints JSON progress. + // If |fixed_output_width| > 0, formats output to that width, otherwise, uses + // the console width. + ProgressTracker(ProgressPrinter* printer, int verbosity, bool json, + int fixed_output_width = 0, + SteadyClock* clock = DefaultSteadyClock::GetInstance()); + ~ProgressTracker(); + + // Starts reporting finding all source files to copy. + // Must be in idle state. Must be called before ReportFileFound(). + void StartFindFiles(); + + // Reports that a file has been found. + void ReportFileFound(); + + // Reports that a directory has been found. + void ReportDirFound(); + + // Prints out the 4 files numbers and stores the total bytes for progress + // calculations. See SendFileStatsResponse in messages.proto for more info. + void ReportFileStats(uint32_t num_missing_files, + uint32_t num_extraneous_files, + uint32_t num_matching_files, uint32_t num_changed_files, + uint64_t total_missing_bytes, + uint64_t total_changed_client_bytes, + uint64_t total_changed_server_bytes, + uint32_t num_missing_dirs, uint32_t num_extraneous_dirs, + uint32_t num_matching_dirs, bool whole_file_arg = false, + bool checksum_arg = false, bool delete_arg = false); + + // Starts reporting the copy of the file at |filepath| of size |filesize|. + // Must be in idle state. Must be called before ReportCopyProgress(). + void StartCopy(const std::string& filepath, uint64_t filesize); + + // Reports that |num_bytes_copied| have been copied for the current file. + void ReportCopyProgress(uint64_t num_bytes_copied); + + // Starts reporting the delta sync of the file at |filepath| of size + // |client_size|. |server_size| is the size of the corresponding file on the + // server. + // Must be in idle state. Must be called before ReportSyncProgress(). + void StartSync(const std::string& filepath, uint64_t client_size, + uint64_t server_size); + + // ReportCdcProgress: + + // Reports that |num_client_bytes_processed| of the current client-side file + // have been read and processed by the delta-transfer algorithm, and that + // |num_server_bytes_processed| of the current server-side file have been + // read and processed. + void ReportSyncProgress(uint64_t num_client_bytes_processed, + uint64_t num_server_bytes_processed) override; + + // Starts reporting deletion of extraneous files. + // Must be in idle state. Must be called before ReportFileDeleted(). + void StartDeleteFiles(); + + // Reports that a file has been deleted. + void ReportFileDeleted(const std::string& filepath); + + // Reports that a directory has been deleted. + void ReportDirDeleted(const std::string& filepath); + + // Prints final stats (e.g. 100% progress for copy/diff), feeds line and + // resets state to idle. Must be called + // - when all files have been found, + // - after each file copy and + // - after each diff. + void Finish(); + + // Gets the time delay (in seconds) between two display updates. Returns a + // lower number (more updates) in TTY mode, e.g. when running from a terminal, + // and a higher number otherwise, e.g. when piping stdout to a file. + double GetDisplayDelaySecForTesting() const { return display_delay_sec_; } + + private: + // Prints progress to the console. Rate-limited, so that it can be called + // often without performance overhead. |finished| should be set if the current + // item is finished. It adds a line feed. + // No-op if printer_->quiet(). + void UpdateOutput(bool finished); + + enum class OutputType { kCopy, kSig, kDiff }; + + // Prints out the progress of the current file (for copy/diff state). + // Used if |verbosity_| > 0 and not printer_->quiet(). + void OutputFileProgress(OutputType type, double progress, + bool finished) const; + + // Wrapper for printer_->Print(), but checks printer_->quiet(). + void Print(std::string text, bool finished) const; + + // Prints the JSON value |val|. + void PrintJson(const Json::Value& val, bool finished) const; + + // Returns the total progress (between 0 and 1). + double GetTotalProgress() const; + + // Gets the |total_progress| (between 0 and 1), the total duration so far in + // |total_sec| and the estimated ETA (time left) in |total_eta_sec|. + void GetTotalProgressStats(double* total_progress, double* total_sec, + double* total_eta_sec) const; + + // Returns a string with the total progress, e.g. " 19% TOT 03:59 ETA". + std::string GetTotalProgressText() const; + + // Returns |fixed_output_width_| if > 0 or Util::GetConsoleWidth(). + int GetOutputWidth() const; + + ProgressPrinter* const printer_; + const bool only_total_progress_ = false; + const bool json_ = false; + const int fixed_output_width_; + const double display_delay_sec_; + + // Timer for total copy and diff time (not file search, that's not timed). + Stopwatch total_timer_; + // Timer for single file copy and diff time. + Stopwatch file_timer_; + // Timer for processing signatures of server files. + Stopwatch sig_timer_; + // Timer to limit the rate of console outputs. + Stopwatch print_timer_; + + enum class State { kIdle, kSearch, kCopy, kSyncSig, kSyncDiff, kDelete }; + + State state_ = State::kIdle; + + // Number of files found so far. + uint32_t files_found_ = 0; + + // Number of directories found so far. + uint32_t dirs_found_ = 0; + + // Path of the file currently copied or synced. + std::string curr_filepath_; + // Size of the file at |current_filepath_|. + uint64_t curr_filesize_ = 0; + + // Number of bytes of the file at |current_filepath_| already copied. + uint64_t curr_bytes_copied_ = 0; + // Total number of bytes already copied. + uint64_t total_bytes_copied_ = 0; + // Total number of bytes to copy. + uint64_t total_bytes_to_copy_ = 0; + + // Number of signature bytes read so far on the server. + uint64_t curr_sig_bytes_read_ = 0; + // Total number of signature bytes already processed on the server. + uint64_t total_sig_bytes_read_ = 0; + // Total number of signature bytes. + uint64_t total_sig_bytes_ = 0; + // Total size of the server-side file. + uint64_t server_filesize_ = 0; + // Duration for signature computation. + double sig_time_sec_ = 0.0; + + // Number of bytes of the file at |current_filepath_| already diffed. + uint64_t curr_bytes_diffed_ = 0; + // Total number of bytes already diffed. + uint64_t total_bytes_diffed_ = 0; + // Total number of bytes to diff. + uint64_t total_bytes_to_diff_ = 0; + + // Number of files deleted so far. + uint32_t files_deleted_ = 0; + // Total number of files to be deleted. + uint32_t total_files_to_delete_ = 0; + // Number of directories deleted so far. + uint32_t dirs_deleted_ = 0; + // Total number of directories to be deleted. + uint32_t total_dirs_to_delete_ = 0; +}; + +} // namespace cdc_ft + +#endif // CDC_RSYNC_PROGRESS_TRACKER_H_ diff --git a/cdc_rsync/progress_tracker_test.cc b/cdc_rsync/progress_tracker_test.cc new file mode 100644 index 0000000..4c2000d --- /dev/null +++ b/cdc_rsync/progress_tracker_test.cc @@ -0,0 +1,491 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync/progress_tracker.h" + +#include + +#include "common/testing_clock.h" +#include "gtest/gtest.h" + +namespace cdc_ft { +namespace { + +// Create custom sizes, so that progress can be easily split up into steps, e.g. +// kFileSize / 3 + kFileSize / 3, + kFileSize / 3, and the end result is still +// kFileSize. +constexpr uint64_t kFileSize = 1 * 2 * 3 * 4 * 5 * 6; + +// Verbosity. +const int kV0 = 0; +const int kV1 = 1; + +const bool kJson = true; +const bool kNoJson = false; + +const bool kQuiet = true; +const bool kNoQuiet = false; + +const bool kTTY = true; +const bool kNoTTY = false; + +// Class that just creates a list of things it was supposed to print. +class FakeProgressPrinter : public ProgressPrinter { + public: + FakeProgressPrinter(bool quiet, bool is_tty) + : ProgressPrinter(quiet, is_tty) {} + + void Print(std::string text, bool newline, int /*output_width*/) override { + lines_.push_back(text + (newline || !is_tty() ? "\n" : "\r")); + } + + void ExpectLinesMatch(std::vector expected_lines) { + EXPECT_EQ(lines_, expected_lines); + } + + private: + std::vector lines_; +}; + +class ProgressTrackerTest : public ::testing::Test { + protected: + // Returns the time (in ms) that needs to pass to trigger an output update. + double GetTriggerPrintTimeDeltaMs(const ProgressTracker& progress) { + return progress.GetDisplayDelaySecForTesting() * 1000 * 1.5; + } + + TestingSteadyClock clock_; + + uint32_t two_seconds_time_delta_ms_ = 2 * 1000; + uint32_t two_minutes_time_delta_ms_ = 2 * 60 * 1000; + uint32_t four_hours_time_delta_ms_ = 4 * 60 * 60 * 1000; + uint32_t eight_days_time_delta_ms_ = 8 * 24 * 60 * 60 * 1000; + + const int output_width_ = 66; +}; + +TEST_F(ProgressTrackerTest, FindFiles) { + FakeProgressPrinter printer(kNoQuiet, kTTY); + ProgressTracker progress(&printer, kV0, kNoJson, output_width_, &clock_); + + progress.StartFindFiles(); + progress.ReportFileFound(); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportFileFound(); + progress.ReportFileFound(); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportFileFound(); + progress.ReportFileFound(); + progress.Finish(); + + printer.ExpectLinesMatch({"2 file(s) and 0 folder(s) found\r", + "4 file(s) and 0 folder(s) found\r", + "5 file(s) and 0 folder(s) found\n"}); +} + +TEST_F(ProgressTrackerTest, FindFilesVerbose) { + FakeProgressPrinter printer(kNoQuiet, kTTY); + ProgressTracker progress(&printer, kV1, kNoJson, output_width_, &clock_); + + progress.StartFindFiles(); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportFileFound(); + progress.Finish(); + + // Find files should be the same as non-verbose. + printer.ExpectLinesMatch({"1 file(s) and 0 folder(s) found\r", + "1 file(s) and 0 folder(s) found\n"}); +} + +TEST_F(ProgressTrackerTest, CopyFiles) { + FakeProgressPrinter printer(kNoQuiet, kTTY); + ProgressTracker progress(&printer, kV0, kNoJson, output_width_, &clock_); + + progress.StartCopy("file.txt", kFileSize); + progress.ReportCopyProgress(kFileSize / 3); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportCopyProgress(kFileSize / 3); + progress.ReportCopyProgress(kFileSize / 3); + progress.Finish(); + + printer.ExpectLinesMatch({"100% TOT 00:00 \r", "100% TOT 00:00 \n"}); +} + +TEST_F(ProgressTrackerTest, CopyFilesVerbose) { + FakeProgressPrinter printer(kNoQuiet, kTTY); + ProgressTracker progress(&printer, kV1, kNoJson, output_width_, &clock_); + + progress.StartCopy("file.txt", kFileSize); + progress.ReportCopyProgress(kFileSize / 3); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportCopyProgress(kFileSize / 3); + progress.ReportCopyProgress(kFileSize / 3); + progress.Finish(); + + printer.ExpectLinesMatch( + {"file.txt C 66% 720B 3.1KB/s 00:00 ETA 100% TOT 00:00 \r", + "file.txt C100% 720B 4.7KB/s 00:00 100% TOT 00:00 \n"}); +} + +TEST_F(ProgressTrackerTest, SyncFiles) { + FakeProgressPrinter printer(kNoQuiet, kTTY); + ProgressTracker progress(&printer, kV0, kNoJson, output_width_, &clock_); + + // 1 changed file. + progress.ReportFileStats(0, 0, 0, 1, 0, kFileSize, kFileSize, 0, 0, 0); + + progress.StartSync("file.txt", kFileSize, kFileSize); + progress.ReportSyncProgress(0, kFileSize / 2); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportSyncProgress(0, kFileSize / 2); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportSyncProgress(kFileSize / 3, 0); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportSyncProgress(kFileSize / 3, 0); + progress.ReportSyncProgress(kFileSize / 3, 0); + progress.Finish(); + + printer.ExpectLinesMatch( + {" 0 file(s) and 0 folder(s) are not present on the instance and " + "will be copied.\n", + " 1 file(s) changed and will be updated.\n", + " 0 file(s) and 0 folder(s) match and do not have to be updated.\n", + " 0 file(s) and 0 folder(s) on the instance do not exist on this " + "machine.\n", + " 2% TOT 00:05 ETA\r", " 34% TOT 00:00 ETA\r", " 67% TOT 00:00 ETA\r", + "100% TOT 00:00 \n"}); +} + +TEST_F(ProgressTrackerTest, SyncFilesVerbose) { + FakeProgressPrinter printer(kNoQuiet, kTTY); + ProgressTracker progress(&printer, kV1, kNoJson, output_width_, &clock_); + + // 1 changed file. + progress.ReportFileStats(0, 0, 0, 1, 0, kFileSize, kFileSize, 0, 0, 0); + + progress.StartSync("file.txt", kFileSize, kFileSize); + clock_.Advance(two_seconds_time_delta_ms_); + progress.ReportSyncProgress(0, kFileSize / 3); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportSyncProgress(kFileSize / 3, 0); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportSyncProgress(0, kFileSize / 3); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportSyncProgress(0, kFileSize / 3); + progress.ReportSyncProgress(kFileSize / 3, 0); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportSyncProgress(kFileSize / 3, 0); + progress.Finish(); + + printer.ExpectLinesMatch( + {" 0 file(s) and 0 folder(s) are not present on the instance and " + "will be copied.\n", + " 1 file(s) changed and will be updated.\n", + " 0 file(s) and 0 folder(s) match and do not have to be updated.\n", + " 0 file(s) and 0 folder(s) on the instance do not exist on this " + "machine.\n", + "file.txt S 0% 720B ---.-B /s 04:03 ETA 0% TOT 04:03 ETA\r", + "file.txt D 33% 720B 117.1B /s 00:04 ETA 33% TOT 00:04 ETA\r", + "file.txt S 34% 720B ---.-B /s 00:04 ETA 34% TOT 00:04 ETA\r", + "file.txt S 34% 720B ---.-B /s 00:04 ETA 34% TOT 00:04 ETA\r", + "file.txt D100% 720B 276.9B /s 00:02 100% TOT 00:02 \r", + "file.txt D100% 720B 276.9B /s 00:02 100% TOT 00:02 \n"}); +} + +TEST_F(ProgressTrackerTest, DeleteFiles) { + FakeProgressPrinter printer(kNoQuiet, kTTY); + ProgressTracker progress(&printer, kV0, kNoJson, output_width_, &clock_); + + // 2 extraneous files. + progress.ReportFileStats(0, 4, 0, 0, 0, 0, 0, 0, 0, 0); + progress.StartDeleteFiles(); + progress.ReportFileDeleted("file1.txt"); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportFileDeleted("file2.txt"); + progress.ReportFileDeleted("file3.txt"); + progress.ReportFileDeleted("file4.txt"); + progress.Finish(); + + printer.ExpectLinesMatch( + {" 0 file(s) and 0 folder(s) are not present on the instance and " + "will be copied.\n", + " 0 file(s) changed and will be updated.\n", + " 0 file(s) and 0 folder(s) match and do not have to be updated.\n", + " 4 file(s) and 0 folder(s) on the instance do not exist on this " + "machine.\n", + "2/4 file(s) and 0/0 folder(s) deleted.\r", + "4/4 file(s) and 0/0 folder(s) deleted.\n"}); +} + +TEST_F(ProgressTrackerTest, DeleteFilesVerbose) { + FakeProgressPrinter printer(kNoQuiet, kTTY); + ProgressTracker progress(&printer, kV1, kNoJson, output_width_, &clock_); + + // 2 extraneous files. + progress.ReportFileStats(0, 2, 0, 0, 0, 0, 0, 0, 0, 0); + progress.StartDeleteFiles(); + progress.ReportFileDeleted("file1.txt"); + progress.ReportFileDeleted("file2.txt"); + progress.Finish(); + + printer.ExpectLinesMatch( + {" 0 file(s) and 0 folder(s) are not present on the instance and " + "will be copied.\n", + " 0 file(s) changed and will be updated.\n", + " 0 file(s) and 0 folder(s) match and do not have to be updated.\n", + " 2 file(s) and 0 folder(s) on the instance do not exist on this " + "machine.\n", + "file1.txt deleted 1 / 2\n", + "file2.txt deleted 2 / 2\n"}); +} + +TEST_F(ProgressTrackerTest, DeleteFilesNoFiles) { + FakeProgressPrinter printer(kNoQuiet, kTTY); + ProgressTracker progress(&printer, kV0, kNoJson, output_width_, &clock_); + + progress.ReportFileStats(0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + progress.StartDeleteFiles(); + progress.Finish(); + + printer.ExpectLinesMatch( + {" 0 file(s) and 0 folder(s) are not present on the instance and " + "will be copied.\n", + " 0 file(s) changed and will be updated.\n", + " 0 file(s) and 0 folder(s) match and do not have to be updated.\n", + " 0 file(s) and 0 folder(s) on the instance do not exist on this " + "machine.\n"}); +} + +TEST_F(ProgressTrackerTest, SetFileStatsAndUnits) { + FakeProgressPrinter printer(kNoQuiet, kTTY); + ProgressTracker progress(&printer, kV1, kNoJson, output_width_, &clock_); + + // Have a very large file and trigger different ETAs and transfer speeds. + constexpr uint64_t large_file_size = 2ull * 1024 * 1024 * 1024 * 1024; + progress.ReportFileStats(1, 0, 0, 0, large_file_size, 0, 0, 0, 0, 0); + + progress.StartCopy("file.txt", large_file_size); + progress.ReportCopyProgress(large_file_size / 4); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportCopyProgress(large_file_size / 4); + clock_.Advance(two_seconds_time_delta_ms_); + progress.ReportCopyProgress(large_file_size / 4); + clock_.Advance(two_minutes_time_delta_ms_); + progress.ReportCopyProgress(large_file_size / 8); + clock_.Advance(four_hours_time_delta_ms_); + progress.ReportCopyProgress(large_file_size / 8); + clock_.Advance(eight_days_time_delta_ms_); + progress.Finish(); + + printer.ExpectLinesMatch( + {" 1 file(s) and 0 folder(s) are not present on the instance and " + "will be copied.\n", + " 0 file(s) changed and will be updated.\n", + " 0 file(s) and 0 folder(s) match and do not have to be updated.\n", + " 0 file(s) and 0 folder(s) on the instance do not exist on this " + "machine.\n", + "file.txt C 50% 2048GB 6.7TB/s 00:00 ETA 50% TOT 00:00 ETA\r", + "file.txt C 75% 2048GB 714.4GB/s 00:00 ETA 75% TOT 00:00 ETA\r", + "file.txt C 87% 2048GB 14.7GB/s 00:17 ETA 87% TOT 00:17 ETA\r", + "file.txt C100% 2048GB 144.4MB/s 04:02:02 100% TOT 04:02:02 " + "\r", + "file.txt C100% 2048GB 3.0MB/s 196:02:02 100% TOT 196:02:02 " + " \n"}); +} + +TEST_F(ProgressTrackerTest, QuietMode) { + FakeProgressPrinter printer(kQuiet, kTTY); + ProgressTracker progress(&printer, kV1, kNoJson, output_width_, &clock_); + + progress.StartFindFiles(); + progress.ReportFileFound(); + progress.Finish(); + + // 1 missing, 1 extraneous, and 1 changed files + // 1 extraneous folder + progress.ReportFileStats(1, 1, 1, 0, kFileSize, kFileSize, kFileSize, 0, 0, + 1); + + progress.StartCopy("file.txt", kFileSize); + progress.ReportCopyProgress(kFileSize); + progress.Finish(); + + progress.StartSync("file.txt", kFileSize, kFileSize); + progress.ReportSyncProgress(kFileSize, kFileSize); + progress.Finish(); + + progress.StartDeleteFiles(); + progress.ReportFileDeleted("file.txt"); + progress.ReportDirDeleted("folder"); + progress.Finish(); + + printer.ExpectLinesMatch({}); +} + +TEST_F(ProgressTrackerTest, NoTTY) { + FakeProgressPrinter tty_printer(kNoQuiet, kTTY); + ProgressTracker tty_progress(&tty_printer, kV1, kNoJson, output_width_, + &clock_); + double tty_delta_ms = GetTriggerPrintTimeDeltaMs(tty_progress); + + // In no-TTY-mode (e.g. cdc_rsync .. > out.txt), the display rate should be + // lower (currently every 1 second instead of 0.1 seconds). + FakeProgressPrinter printer(kNoQuiet, kNoTTY); + ProgressTracker progress(&printer, kV1, kNoJson, output_width_, &clock_); + EXPECT_GT(GetTriggerPrintTimeDeltaMs(progress), tty_delta_ms); + + progress.StartCopy("file.txt", kFileSize); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportCopyProgress(kFileSize / 3); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportCopyProgress(kFileSize / 3); + progress.ReportCopyProgress(kFileSize / 3); + progress.Finish(); + + printer.ExpectLinesMatch( + {"file.txt C 33% 720B 160.0B /s 00:03 ETA 100% TOT 00:01 \n", + "file.txt C 66% 720B 160.0B /s 00:01 ETA 100% TOT 00:03 \n", + "file.txt C100% 720B 240.0B /s 00:03 100% TOT 00:03 \n"}); +} + +TEST_F(ProgressTrackerTest, JsonPerFile) { + FakeProgressPrinter printer(kNoQuiet, kNoTTY); + ProgressTracker progress(&printer, kV1, kJson, output_width_, &clock_); + + progress.StartCopy("file.txt", kFileSize); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportCopyProgress(kFileSize / 3); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportCopyProgress(kFileSize / 3); + progress.ReportCopyProgress(kFileSize / 3); + progress.Finish(); + + printer.ExpectLinesMatch( + {"{\"bytes_per_second\":160.0,\"duration\":1.5,\"eta\":3.0,\"file\":" + "\"file.txt\",\"operation\":\"Copy\",\"size\":720,\"total_duration\":1." + "5,\"total_eta\":0.0,\"total_progress\":1.0}\n", + "{\"bytes_per_second\":160.0,\"duration\":3.0,\"eta\":1.5,\"file\":" + "\"file.txt\",\"operation\":\"Copy\",\"size\":720,\"total_duration\":3." + "0,\"total_eta\":0.0,\"total_progress\":1.0}\n", + "{\"bytes_per_second\":240.0,\"duration\":3.0,\"eta\":0.0,\"file\":" + "\"file.txt\",\"operation\":\"Copy\",\"size\":720,\"total_duration\":3." + "0,\"total_eta\":0.0,\"total_progress\":1.0}\n"}); +} + +TEST_F(ProgressTrackerTest, JsonTotal) { + FakeProgressPrinter printer(kNoQuiet, kNoTTY); + ProgressTracker progress(&printer, kV0, kJson, output_width_, &clock_); + + progress.ReportFileStats(0, 0, 0, 1, 0, kFileSize, kFileSize, 0, 0, 0); + + progress.StartCopy("file.txt", kFileSize); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportCopyProgress(kFileSize / 3); + clock_.Advance(GetTriggerPrintTimeDeltaMs(progress)); + progress.ReportCopyProgress(kFileSize / 3); + progress.ReportCopyProgress(kFileSize / 3); + progress.Finish(); + + printer.ExpectLinesMatch( + {" 0 file(s) and 0 folder(s) are not present on the instance and " + "will be copied.\n", + " 1 file(s) changed and will be updated.\n", + " 0 file(s) and 0 folder(s) match and do not have to be updated.\n", + " 0 file(s) and 0 folder(s) on the instance do not exist on this " + "machine.\n", + "{\"total_duration\":1.5,\"total_eta\":3.1124999999999998,\"total_" + "progress\":0.32520325203252032}\n", + "{\"total_duration\":3.0,\"total_eta\":1.6124999999999998,\"total_" + "progress\":0.65040650406504064}\n"}); +} + +TEST_F(ProgressTrackerTest, SyncFilesWithWholeFile) { + FakeProgressPrinter printer(kNoQuiet, kTTY); + ProgressTracker progress(&printer, kV0, kNoJson, output_width_, &clock_); + + // 1 changed file with -W arg. + progress.ReportFileStats(0, 0, 0, 1, 0, kFileSize, kFileSize, 0, 0, 0, true); + progress.StartCopy("file.txt", kFileSize); + progress.Finish(); + + printer.ExpectLinesMatch( + {" 0 file(s) and 0 folder(s) are not present on the instance and " + "will be copied.\n", + " 1 file(s) changed and will be copied due to -W/--whole-file.\n", + " 0 file(s) and 0 folder(s) match and do not have to be updated.\n", + " 0 file(s) and 0 folder(s) on the instance do not exist on this " + "machine.\n"}); +} + +TEST_F(ProgressTrackerTest, SyncFilesWithChecksum) { + FakeProgressPrinter printer(kNoQuiet, kTTY); + ProgressTracker progress(&printer, kV0, kNoJson, output_width_, &clock_); + + // 1 matching file with -c arg. + progress.ReportFileStats(0, 0, 1, 0, 0, kFileSize, kFileSize, 0, 0, 0, false, + true); + progress.StartCopy("file.txt", kFileSize); + progress.Finish(); + + printer.ExpectLinesMatch( + {" 0 file(s) and 0 folder(s) are not present on the instance and " + "will be copied.\n", + " 0 file(s) changed and will be updated.\n", + " 1 file(s) and 0 folder(s) have matching modified time and size, " + "but will be synced due to -c/--checksum.\n", + " 0 file(s) and 0 folder(s) on the instance do not exist on this " + "machine.\n"}); +} + +TEST_F(ProgressTrackerTest, SyncFilesWithChecksumAndWholeFile) { + FakeProgressPrinter printer(kNoQuiet, kTTY); + ProgressTracker progress(&printer, kV0, kNoJson, output_width_, &clock_); + + // 1 changed file, 1 matching file. with -c and -W args. + progress.ReportFileStats(0, 0, 1, 1, 0, kFileSize, kFileSize, 0, 0, 0, true, + true); + progress.StartCopy("file.txt", kFileSize); + progress.Finish(); + + printer.ExpectLinesMatch( + {" 0 file(s) and 0 folder(s) are not present on the instance and " + "will be copied.\n", + " 1 file(s) changed and will be copied due to -W/--whole-file.\n", + " 1 file(s) and 0 folder(s) have matching modified time and size, " + "but will be copied due to -c/--checksum and -W/--whole-file.\n", + " 0 file(s) and 0 folder(s) on the instance do not exist on this " + "machine.\n"}); +} + +TEST_F(ProgressTrackerTest, SyncFilesWithDelete) { + FakeProgressPrinter printer(kNoQuiet, kTTY); + ProgressTracker progress(&printer, kV0, kNoJson, output_width_, &clock_); + + // 1 extraneous file with --delete arg. + progress.ReportFileStats(0, 1, 0, 0, 0, kFileSize, kFileSize, 0, 0, 0, false, + false, true); + progress.StartCopy("file.txt", kFileSize); + progress.Finish(); + + printer.ExpectLinesMatch( + {" 0 file(s) and 0 folder(s) are not present on the instance and " + "will be copied.\n", + " 0 file(s) changed and will be updated.\n", + " 0 file(s) and 0 folder(s) match and do not have to be updated.\n", + " 1 file(s) and 0 folder(s) on the instance do not exist on this " + "machine and will be deleted due to --delete.\n"}); +} + +} // namespace +} // namespace cdc_ft diff --git a/cdc_rsync/protos/BUILD b/cdc_rsync/protos/BUILD new file mode 100644 index 0000000..443cd45 --- /dev/null +++ b/cdc_rsync/protos/BUILD @@ -0,0 +1,14 @@ +package(default_visibility = [ + "//:__subpackages__", +]) + +proto_library( + name = "messages_proto", + srcs = ["messages.proto"], + visibility = ["//visibility:private"], +) + +cc_proto_library( + name = "messages_cc_proto", + deps = [":messages_proto"], +) diff --git a/cdc_rsync/protos/messages.proto b/cdc_rsync/protos/messages.proto new file mode 100644 index 0000000..65a8d18 --- /dev/null +++ b/cdc_rsync/protos/messages.proto @@ -0,0 +1,193 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +option optimize_for = LITE_RUNTIME; + +package cdc_ft; + +// Used for testing. +message TestRequest { + string message = 1; +} + +// Notify server that subsequent messages are going to be compressed, e.g. when +// the client is about to send missing files. Once all compressed data is sent, +// the client waits for the ToggleCompressionResponse. +message ToggleCompressionRequest {} + +// Notify client that all compressed messages have been received (e.g. all +// missing files have been copied to the server) and that the client may switch +// to uncompressed transfer again. This "write fence" or "sync point" is +// necessary to prevent that the server reads past the compressed data because +// it doesn't know where compressed data ends. +message ToggleCompressionResponse {} + +// Send command line options to server. +// The options largely match the command line args. +message SetOptionsRequest { + message FilterRule { + enum Type { + TYPE_INCLUDE = 0; + TYPE_EXCLUDE = 1; + } + + Type type = 1; + string pattern = 2; + } + + string destination = 1; + bool delete = 2; + bool recursive = 3; + int32 verbosity = 4; + bool whole_file = 5; + bool compress = 6; + repeated FilterRule filter_rules = 7; + bool checksum = 8; + bool relative = 9; + bool dry_run = 10; + bool existing = 11; + string copy_dest = 12; +} + +// Send file list to server. +message AddFilesRequest { + message File { + string filename = 1; + + // Linux epoch time. time_t, basically. + int64 modified_time = 2; + + uint64 size = 3; + } + + // Files are relative to this directory. + string directory = 1; + + // Files in |directory|. + repeated File files = 2; + + // Directories in |directory|. + repeated string dirs = 3; +} + +// Send stats to client for logging purposes. +message SendFileStatsResponse { + // Number of files present on the client, but not on the server. + uint32 num_missing_files = 1; + + // Number of files present on the server, but not on the client. + uint32 num_extraneous_files = 2; + + // Number of files present on both and matching. + uint32 num_matching_files = 3; + + // Number of files present on both, but not matching. + uint32 num_changed_files = 4; + + // Sum of the size of all missing files. + uint64 total_missing_bytes = 5; + + // Sum of the client size of all changed files. + uint64 total_changed_client_bytes = 6; + + // Sum of the server size of all changed files. + uint64 total_changed_server_bytes = 7; + + // Number of directories present on the client, but not on the server. + uint32 num_missing_dirs = 8; + + // Number of directories present on the server, but not on the client. + uint32 num_extraneous_dirs = 9; + + // Number of directories present on both and matching. + uint32 num_matching_dirs = 10; +} + +// Send indices of missing and changed files to client. +message AddFileIndicesResponse { + // Client-side index of the file. + repeated uint32 client_indices = 1; +} + +// Tell server that client will send data of a missing file. +message SendMissingFileDataRequest { + // Server-side of the missing file. + uint32 server_index = 1; + + // The actual file data is sent as raw data. +} + +// Tell client that server is about to send signature data for diffing files. +message SendSignatureResponse { + // Client-side index of the file. + uint32 client_index = 1; + + // The total size of the server-side file. + uint64 server_file_size = 2; +} + +// Send signatures for diffing file data to client. Uses SOA layout to save +// bandwidth. The arrays are expected to be of the same length. +message AddSignaturesResponse { + // Chunk sizes. + repeated uint32 sizes = 1; + + // Chunk hashes, size should match (size of sizes) * (hash length). + bytes hashes = 2; +} + +// Send patching information to server. Uses SOA layout to save bandwidth. +// The arrays are expected to be of the same length. +message AddPatchCommandsRequest { + enum Source { + // Use bytes [offset, offset + size) from |data| contained in this message. + // This means that no existing chunk can be reused. + SOURCE_DATA = 0; + + // Use bytes [offset, offset + size) from the basis file. + // This means that an existing chunk can be reused. + SOURCE_BASIS_FILE = 1; + } + + // Whether this is a reused chunk or a new chunk. + repeated Source sources = 1; + + // Offsets into |data| or the basis file, depending on the source. + repeated uint64 offsets = 2; + + // Sizes in |data| or the basis file, depending on the source. + repeated uint32 sizes = 3; + + // Data bytes, for SOURCE_DATA. + bytes data = 4; +} + +// Send list of to-be-deleted files to the client. +message AddDeletedFilesResponse { + // Files are relative to this directory. + string directory = 1; + + // Files in |directory|. + repeated string files = 2; + + // Directories in |directory|. + repeated string dirs = 3; +} + +// Tell server to shut the frick down. +message ShutdownRequest {} + +// Ack for ShutdownRequest. +message ShutdownResponse {} diff --git a/cdc_rsync/testdata/file_finder_and_sender/a.txt b/cdc_rsync/testdata/file_finder_and_sender/a.txt new file mode 100644 index 0000000..56a6051 --- /dev/null +++ b/cdc_rsync/testdata/file_finder_and_sender/a.txt @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/cdc_rsync/testdata/file_finder_and_sender/b.txt b/cdc_rsync/testdata/file_finder_and_sender/b.txt new file mode 100644 index 0000000..8fdd954 --- /dev/null +++ b/cdc_rsync/testdata/file_finder_and_sender/b.txt @@ -0,0 +1 @@ +22 \ No newline at end of file diff --git a/cdc_rsync/testdata/file_finder_and_sender/c.txt b/cdc_rsync/testdata/file_finder_and_sender/c.txt new file mode 100644 index 0000000..4f37670 --- /dev/null +++ b/cdc_rsync/testdata/file_finder_and_sender/c.txt @@ -0,0 +1 @@ +333 \ No newline at end of file diff --git a/cdc_rsync/testdata/file_finder_and_sender/subdir/d.txt b/cdc_rsync/testdata/file_finder_and_sender/subdir/d.txt new file mode 100644 index 0000000..1b82438 --- /dev/null +++ b/cdc_rsync/testdata/file_finder_and_sender/subdir/d.txt @@ -0,0 +1 @@ +4444 \ No newline at end of file diff --git a/cdc_rsync/testdata/file_finder_and_sender/subdir/e.txt b/cdc_rsync/testdata/file_finder_and_sender/subdir/e.txt new file mode 100644 index 0000000..baf2663 --- /dev/null +++ b/cdc_rsync/testdata/file_finder_and_sender/subdir/e.txt @@ -0,0 +1 @@ +55555 \ No newline at end of file diff --git a/cdc_rsync/testdata/parallel_file_opener/file1.txt b/cdc_rsync/testdata/parallel_file_opener/file1.txt new file mode 100644 index 0000000..0abc8f1 --- /dev/null +++ b/cdc_rsync/testdata/parallel_file_opener/file1.txt @@ -0,0 +1 @@ +data1 \ No newline at end of file diff --git a/cdc_rsync/testdata/parallel_file_opener/file2.txt b/cdc_rsync/testdata/parallel_file_opener/file2.txt new file mode 100644 index 0000000..fee93d1 --- /dev/null +++ b/cdc_rsync/testdata/parallel_file_opener/file2.txt @@ -0,0 +1 @@ +data2 \ No newline at end of file diff --git a/cdc_rsync/testdata/parallel_file_opener/file3.txt b/cdc_rsync/testdata/parallel_file_opener/file3.txt new file mode 100644 index 0000000..cdca2c1 --- /dev/null +++ b/cdc_rsync/testdata/parallel_file_opener/file3.txt @@ -0,0 +1 @@ +data3 \ No newline at end of file diff --git a/cdc_rsync/testdata/root.txt b/cdc_rsync/testdata/root.txt new file mode 100644 index 0000000..e69de29 diff --git a/cdc_rsync/zstd_stream.cc b/cdc_rsync/zstd_stream.cc new file mode 100644 index 0000000..7e0e61c --- /dev/null +++ b/cdc_rsync/zstd_stream.cc @@ -0,0 +1,182 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync/zstd_stream.h" + +#include + +#include "common/log.h" +#include "common/status.h" +#include "common/status_macros.h" + +namespace cdc_ft { +namespace { + +// If the compressor gets less data than 1 buffer (128k) every 500 ms, then +// trigger a flush. This happens when files with no changes are diff'ed (this +// produces very low volume data). Flushing prevents that the server gets stale +// and becomes overwhelmed later. +constexpr absl::Duration kMinCompressPeriod = absl::Milliseconds(500); + +} // namespace + +ZstdStream::ZstdStream(Socket* socket, int level, uint32_t num_threads) + : socket_(socket), cctx_(nullptr) { + status_ = WrapStatus(Initialize(level, num_threads), + "Failed to initialize stream compressor"); +} + +ZstdStream::~ZstdStream() { + if (cctx_) { + ZSTD_freeCCtx(cctx_); + cctx_ = nullptr; + } + + { + absl::MutexLock lock(&mutex_); + shutdown_ = true; + } + if (compressor_thread_.joinable()) { + compressor_thread_.join(); + } +} + +absl::Status ZstdStream::Write(const void* data, size_t size) { + absl::MutexLock lock(&mutex_); + if (!status_.ok()) return status_; + + size_t data_bytes_left = size; + const char* data_ptr = static_cast(data); + while (data_bytes_left > 0) { + // Wait until the compressor thread has consumed data from |in_buffer_|. + auto cond = [&]() { + return shutdown_ || in_buffer_.size() < in_buffer_.capacity() || + !status_.ok(); + }; + mutex_.Await(absl::Condition(&cond)); + if (shutdown_) return MakeStatus("Compression stream was shut down"); + if (!status_.ok()) return status_; + + // Copy data to input buffer. + size_t free_in_buffer_bytes = in_buffer_.capacity() - in_buffer_.size(); + const size_t to_copy = std::min(data_bytes_left, free_in_buffer_bytes); + in_buffer_.append(data_ptr, to_copy); + data_bytes_left -= to_copy; + free_in_buffer_bytes -= to_copy; + data_ptr += to_copy; + } + return absl::OkStatus(); +} + +absl::Status ZstdStream::Flush() { + absl::MutexLock lock(&mutex_); + if (!status_.ok()) return status_; + + last_chunk_ = true; + last_chunk_sent_ = false; + + // Wait until data is flushed. + auto cond = [&]() { return shutdown_ || last_chunk_sent_ || !status_.ok(); }; + mutex_.Await(absl::Condition(&cond)); + if (shutdown_) return MakeStatus("Compression stream was shut down"); + return status_; +} + +absl::Status ZstdStream::Initialize(int level, uint32_t num_threads) { + cctx_ = ZSTD_createCCtx(); + if (!cctx_) { + return MakeStatus("Failed to create compression context"); + } + + size_t res = ZSTD_CCtx_setParameter(cctx_, ZSTD_c_compressionLevel, level); + if (ZSTD_isError(res)) { + return MakeStatus("Failed to set compression level: %s", + ZSTD_getErrorName(res)); + } + + // This fails if ZStd was not compiled with -DZSTD_MULTITHREAD. + res = ZSTD_CCtx_setParameter(cctx_, ZSTD_c_nbWorkers, num_threads); + if (ZSTD_isError(res)) { + return MakeStatus("Failed to set number of worker threads: %s", + ZSTD_getErrorName(res)); + } + + { + absl::MutexLock lock(&mutex_); + in_buffer_.reserve(ZSTD_CStreamInSize()); + } + + compressor_thread_ = std::thread([this]() { ThreadCompressorMain(); }); + + return absl::OkStatus(); +} + +void ZstdStream::ThreadCompressorMain() { + std::vector out_buffer; + out_buffer.resize(ZSTD_CStreamOutSize()); + + absl::MutexLock lock(&mutex_); + while (!shutdown_) { + // Wait for input data. + auto cond = [&]() { + return shutdown_ || last_chunk_ || + in_buffer_.size() == in_buffer_.capacity(); + }; + bool flush = + !mutex_.AwaitWithTimeout(absl::Condition(&cond), kMinCompressPeriod); + if (shutdown_) { + return; + } + + // If data arrives at a very slow rate (<1 buffer per kMinCompressPeriod), + // then flush the compression pipes. + const ZSTD_EndDirective mode = last_chunk_ ? ZSTD_e_end + : flush ? ZSTD_e_flush + : ZSTD_e_continue; + LOG_DEBUG("Compressing %u bytes (mode=%s)", in_buffer_.size(), + mode == ZSTD_e_end ? "end" + : mode == ZSTD_e_flush ? "flush" + : "continue"); + ZSTD_inBuffer input = {in_buffer_.data(), in_buffer_.size(), 0}; + bool finished = false; + do { + ZSTD_outBuffer output = {out_buffer.data(), out_buffer.size(), 0}; + size_t remaining = ZSTD_compressStream2(cctx_, &output, &input, mode); + if (ZSTD_isError(remaining)) { + status_ = MakeStatus("Failed to compress data: %s", + ZSTD_getErrorName(remaining)); + return; + } + + if (output.pos > 0) { + status_ = socket_->Send(output.dst, output.pos); + if (!status_.ok()) return; + } + + finished = mode != ZSTD_e_continue ? (remaining == 0) + : (input.pos == input.size); + } while (!finished); + + if (last_chunk_) { + last_chunk_ = false; + last_chunk_sent_ = true; + } + + // zstd should only return 0 when the input is consumed. + assert(input.pos == input.size); + in_buffer_.clear(); + } +} + +} // namespace cdc_ft diff --git a/cdc_rsync/zstd_stream.h b/cdc_rsync/zstd_stream.h new file mode 100644 index 0000000..a757847 --- /dev/null +++ b/cdc_rsync/zstd_stream.h @@ -0,0 +1,65 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_ZSTD_STREAM_H_ +#define CDC_RSYNC_ZSTD_STREAM_H_ + +#include + +#include "absl/status/status.h" +#include "absl/synchronization/mutex.h" +#include "cdc_rsync/base/socket.h" +#include "common/buffer.h" +#include "lib/zstd.h" + +namespace cdc_ft { + +// Streaming compression using zstd. +class ZstdStream { + public: + ZstdStream(Socket* socket, int level, uint32_t num_threads); + ~ZstdStream(); + + // Sends the given |data| to the compressor. + absl::Status Write(const void* data, size_t size) ABSL_LOCKS_EXCLUDED(mutex_); + + // Flushes all remaining data and sends the compressed data to the socket. + absl::Status Flush() ABSL_LOCKS_EXCLUDED(mutex_); + + private: + // Initializes the compressor and related data. + absl::Status Initialize(int level, uint32_t num_threads) + ABSL_LOCKS_EXCLUDED(mutex_); + + // Compressor thread, pushes |in_buffer_| to the zstd compressor and sends + // compressed data to the socket. + void ThreadCompressorMain() ABSL_LOCKS_EXCLUDED(mutex_); + + Socket* const socket_; + ZSTD_CCtx* cctx_; + + absl::Mutex mutex_; + Buffer in_buffer_ ABSL_GUARDED_BY(mutex_); + bool shutdown_ ABSL_GUARDED_BY(mutex_) = false; + bool last_chunk_ ABSL_GUARDED_BY(mutex_) = false; + bool last_chunk_sent_ ABSL_GUARDED_BY(mutex_) = false; + absl::Status status_ ABSL_GUARDED_BY(mutex_); + std::thread compressor_thread_; +}; + +} // namespace cdc_ft + +#endif // CDC_RSYNC_ZSTD_STREAM_H_ diff --git a/cdc_rsync/zstd_stream_test.cc b/cdc_rsync/zstd_stream_test.cc new file mode 100644 index 0000000..2a10f97 --- /dev/null +++ b/cdc_rsync/zstd_stream_test.cc @@ -0,0 +1,72 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync/zstd_stream.h" + +#include "cdc_rsync/base/fake_socket.h" +#include "cdc_rsync_server/unzstd_stream.h" +#include "common/status_test_macros.h" +#include "gtest/gtest.h" + +namespace cdc_ft { +namespace { + +class ZstdStreamTest : public ::testing::Test { + protected: + FakeSocket socket_; + ZstdStream cstream_{&socket_, /*level=*/6, /*num_threads=*/8}; + UnzstdStream dstream_{&socket_}; +}; + +TEST_F(ZstdStreamTest, Small) { + const std::string want = "Lorem ipsum gibberisulum foobarberis"; + EXPECT_OK(cstream_.Write(want.data(), want.size())); + EXPECT_OK(cstream_.Flush()); + + Buffer buff(1024); + size_t bytes_read; + bool eof = false; + EXPECT_OK(dstream_.Read(buff.data(), buff.size(), &bytes_read, &eof)); + EXPECT_TRUE(eof); + std::string got(buff.data(), bytes_read); + EXPECT_EQ(got, want); +} + +TEST_F(ZstdStreamTest, Large) { + Buffer want(1024 * 1024 * 10 + 12345); + constexpr uint64_t prime = 919393; + for (size_t n = 0; n < want.size(); ++n) { + want.data()[n] = ((n * prime) % 26) + 'a'; + } + + constexpr int kChunkSize = 19 * 1024; + for (size_t pos = 0; pos < want.size(); pos += kChunkSize) { + size_t size = std::min(kChunkSize, want.size() - pos); + EXPECT_OK(cstream_.Write(want.data() + pos, size)); + } + EXPECT_OK(cstream_.Flush()); + + bool eof = false; + Buffer buff(128 * 1024); + Buffer got; + while (!eof) { + size_t bytes_read; + EXPECT_OK(dstream_.Read(buff.data(), buff.size(), &bytes_read, &eof)); + got.append(buff.data(), bytes_read); + } + EXPECT_EQ(want, got); +} + +} // namespace +} // namespace cdc_ft diff --git a/cdc_rsync_cli/.gitignore b/cdc_rsync_cli/.gitignore new file mode 100644 index 0000000..7dc8dde --- /dev/null +++ b/cdc_rsync_cli/.gitignore @@ -0,0 +1,3 @@ +x64/* +*.log +*.user \ No newline at end of file diff --git a/cdc_rsync_cli/BUILD b/cdc_rsync_cli/BUILD new file mode 100644 index 0000000..79ec2af --- /dev/null +++ b/cdc_rsync_cli/BUILD @@ -0,0 +1,44 @@ +package(default_visibility = [ + "//:__subpackages__", +]) + +cc_binary( + name = "cdc_rsync", + srcs = ["main.cc"], + deps = [ + ":params", + "//cdc_rsync", + ], +) + +cc_library( + name = "params", + srcs = ["params.cc"], + hdrs = ["params.h"], + deps = [ + "//cdc_rsync", + "@com_github_zstd//:zstd", + "@com_google_absl//absl/status", + ], +) + +cc_test( + name = "params_test", + srcs = ["params_test.cc"], + data = ["testdata/root.txt"] + glob(["testdata/params/**"]), + deps = [ + ":params", + "//common:test_main", + "@com_google_googletest//:gtest", + ], +) + +filegroup( + name = "all_test_sources", + srcs = glob(["*_test.cc"]), +) + +filegroup( + name = "all_test_data", + srcs = glob(["testdata/**"]), +) diff --git a/cdc_rsync_cli/cdc_rsync_cli.vcxproj b/cdc_rsync_cli/cdc_rsync_cli.vcxproj new file mode 100644 index 0000000..b0603e0 --- /dev/null +++ b/cdc_rsync_cli/cdc_rsync_cli.vcxproj @@ -0,0 +1,87 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {3FAC852A-00A8-4CFB-9160-07EFF2B73562} + Win32Proj + cdc_rsync + $([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0')) + 10.0 + + + + Makefile + true + v141 + v142 + + + Makefile + false + v141 + v142 + + + + + + + + + + + + + $(SolutionDir)bazel-out\x64_windows-dbg\bin\cdc_rsync_cli\ + /std:c++17 + UNICODE + + + $(SolutionDir)bazel-out\x64_windows-opt\bin\cdc_rsync_cli\ + UNICODE + /std:c++17 + + + + {4ece65e0-d950-4b96-8ad5-0313261b8c8d} + false + false + + + + + + Console + + + + + //cdc_rsync_cli:cdc_rsync + cdc_rsync.exe + ..\;..\third_party\absl;..\third_party\blake3\c;..\bazel-stadia-file-transfer\external\com_github_zstd\lib;..\third_party\googletest\googletest\include;..\third_party\protobuf\src;$(VC_IncludePath);$(WindowsSDK_IncludePath) + ..\/ + + + + + + + + \ No newline at end of file diff --git a/cdc_rsync_cli/cdc_rsync_cli.vcxproj.filters b/cdc_rsync_cli/cdc_rsync_cli.vcxproj.filters new file mode 100644 index 0000000..9cd8510 --- /dev/null +++ b/cdc_rsync_cli/cdc_rsync_cli.vcxproj.filters @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/cdc_rsync_cli/main.cc b/cdc_rsync_cli/main.cc new file mode 100644 index 0000000..a308428 --- /dev/null +++ b/cdc_rsync_cli/main.cc @@ -0,0 +1,72 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#define WIN32_LEAN_AND_MEAN +#include + +#include +#include + +#include "cdc_rsync/cdc_rsync.h" +#include "cdc_rsync_cli/params.h" +#include "common/util.h" + +int wmain(int argc, wchar_t* argv[]) { + cdc_ft::params::Parameters parameters; + + // Convert args from wide to UTF8 strings. + std::vector utf8_str_args; + utf8_str_args.reserve(argc); + for (int i = 0; i < argc; i++) { + utf8_str_args.push_back(cdc_ft::Util::WideToUtf8Str(argv[i])); + } + + // Convert args from UTF8 strings to UTF8 c-strings. + std::vector utf8_args; + utf8_args.reserve(argc); + for (const auto& utf8_str_arg : utf8_str_args) { + utf8_args.push_back(utf8_str_arg.c_str()); + } + + if (!cdc_ft::params::Parse(argc, utf8_args.data(), ¶meters)) { + return 1; + } + + // Convert sources from string-vec to c-str-vec. + std::vector sources_ptr; + sources_ptr.reserve(parameters.sources.size()); + for (const std::string& source : parameters.sources) { + sources_ptr.push_back(source.c_str()); + } + + // Convert filter rules from string-structs to c-str-structs. + std::vector filter_rules; + filter_rules.reserve(parameters.filter_rules.size()); + for (const cdc_ft::params::Parameters::FilterRule& rule : + parameters.filter_rules) { + filter_rules.emplace_back(rule.type, rule.pattern.c_str()); + } + + const char* error_message = nullptr; + cdc_ft::ReturnCode code = cdc_ft::Sync( + ¶meters.options, filter_rules.data(), parameters.filter_rules.size(), + parameters.sources_dir.c_str(), sources_ptr.data(), + parameters.sources.size(), parameters.destination.c_str(), + &error_message); + + if (error_message) { + fprintf(stderr, "Error: %s\n", error_message); + } + return static_cast(code); +} diff --git a/cdc_rsync_cli/params.cc b/cdc_rsync_cli/params.cc new file mode 100644 index 0000000..665b14a --- /dev/null +++ b/cdc_rsync_cli/params.cc @@ -0,0 +1,442 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync_cli/params.h" + +#include + +#include "absl/status/status.h" +#include "absl/strings/str_format.h" +#include "common/path.h" +#include "lib/zstd.h" + +namespace cdc_ft { +namespace params { +namespace { + +template +void PrintError(const absl::FormatSpec& format, Args... args) { + std::cerr << "Error: " << absl::StrFormat(format, args...) << std::endl; +} + +enum class OptionResult { kConsumedKey, kConsumedKeyValue, kError }; + +const char kHelpText[] = + R"(Copy local files to a gamelet + +Synchronizes local files and files on a gamelet. Matching files are skipped. +For partially matching files only the deltas are transferred. + +Usage: + cdc_rsync [options] source [source]... destination + +Parameters: + source Local file or folder to be copied + destination Destination folder on the gamelet + +Options: + --ip string Gamelet IP. Required. + --port number SSH port to use. Required. + --contimeout sec Gamelet connection timeout in seconds (default: 10) +-q, --quiet Quiet mode, only print errors +-v, --verbose Increase output verbosity + --json Print JSON progress +-n, --dry-run Perform a trial run with no changes made +-r, --recursive Recurse into directories + --delete Delete extraneous files from destination folder +-z, --compress Compress file data during the transfer + --compress-level num Explicitly set compression level (default: 6) +-c, --checksum Skip files based on checksum, not mod-time & size +-W, --whole-file Always copy files whole, + do not apply delta-transfer algorithm + --exclude pattern Exclude files matching pattern + --exclude-from file Read exclude patterns from file + --include pattern Don't exclude files matching pattern + --include-from file Read include patterns from file + --files-from file Read list of source files from file +-R, --relative Use relative path names + --existing Skip creating new files on instance + --copy-dest dir Use files from dir as sync base if files are missing + from destination folder +-h --help Help for cdc_rsync +)"; + +// Handles the --exclude-from and --include-from options. +OptionResult HandleFilterRuleFile(const std::string& option_name, + const char* path, FilterRule::Type type, + Parameters* params) { + if (!path) { + PrintError("Option '%s' needs a value", option_name); + return OptionResult::kError; + } + + std::vector patterns; + absl::Status status = path::ReadAllLines( + path, &patterns, + path::ReadFlags::kRemoveEmpty | path::ReadFlags::kTrimWhitespace); + if (!status.ok()) { + PrintError("Failed to read file '%s' for %s option: %s", path, option_name, + status.message()); + return OptionResult::kError; + } + + for (std::string& pattern : patterns) { + params->filter_rules.emplace_back(type, std::move(pattern)); + } + return OptionResult::kConsumedKeyValue; +} + +// Loads sources for --files-from option. |sources| must contain at most one +// path and that path must be an existing directory. This directory is returned +// in |sources_dir|. The method then loads all sources line-by-line from +// |sources_file| and stores them into |sources|. +bool LoadFilesFrom(const std::string& files_from, + std::vector* sources, + std::string* sources_dir) { + if (sources->size() > 1) { + PrintError( + "Expected at most 1 source for the --files-from option, but %u " + "provided", + sources->size()); + return false; + } + if (sources->size() == 1 && !path::DirExists(sources->at(0))) { + PrintError( + "The source '%s' must be an existing directory for the --files-from " + "option", + sources->at(0)); + return false; + } + *sources_dir = sources->empty() ? std::string() : sources->at(0); + if (!sources_dir->empty()) { + path::EnsureEndsWithPathSeparator(sources_dir); + } + + sources->clear(); + absl::Status status = path::ReadAllLines( + files_from, sources, + path::ReadFlags::kRemoveEmpty | path::ReadFlags::kTrimWhitespace); + if (!status.ok()) { + PrintError("Failed to read sources file '%s' for files-from option: %s", + files_from, status.message()); + return false; + } + + if (sources->empty()) { + PrintError("The file '%s' specified in the --files-from option is empty", + files_from); + } + + return true; +} + +OptionResult HandleParameter(const std::string& key, const char* value, + Parameters* params, bool* help) { + if (key == "ip") { + params->options.ip = value; + return OptionResult::kConsumedKeyValue; + } + + if (key == "port") { + if (value) { + params->options.port = atoi(value); + } + return OptionResult::kConsumedKeyValue; + } + + if (key == "delete") { + params->options.delete_ = true; + return OptionResult::kConsumedKey; + } + + if (key == "r" || key == "recursive") { + params->options.recursive = true; + return OptionResult::kConsumedKey; + } + + if (key == "v" || key == "verbosity") { + params->options.verbosity++; + return OptionResult::kConsumedKey; + } + + if (key == "q" || key == "quiet") { + params->options.quiet = true; + return OptionResult::kConsumedKey; + } + + if (key == "W" || key == "whole-file") { + params->options.whole_file = true; + return OptionResult::kConsumedKey; + } + + if (key == "include") { + params->filter_rules.emplace_back(FilterRule::Type::kInclude, value); + return OptionResult::kConsumedKeyValue; + } + + if (key == "include-from") { + return HandleFilterRuleFile(key, value, FilterRule::Type::kInclude, params); + } + + if (key == "exclude") { + params->filter_rules.emplace_back(FilterRule::Type::kExclude, value); + return OptionResult::kConsumedKeyValue; + } + + if (key == "exclude-from") { + return HandleFilterRuleFile(key, value, FilterRule::Type::kExclude, params); + } + + if (key == "files-from") { + // Implies -R. + params->options.relative = true; + params->files_from = value; + return OptionResult::kConsumedKeyValue; + } + + if (key == "R" || key == "relative") { + params->options.relative = true; + return OptionResult::kConsumedKey; + } + + if (key == "z" || key == "compress") { + params->options.compress = true; + return OptionResult::kConsumedKey; + } + + if (key == "compress-level") { + if (value) { + params->options.compress_level = atoi(value); + } + return OptionResult::kConsumedKeyValue; + } + + if (key == "contimeout") { + if (value) { + params->options.connection_timeout_sec = atoi(value); + } + return OptionResult::kConsumedKeyValue; + } + + if (key == "h" || key == "help") { + *help = true; + return OptionResult::kConsumedKey; + } + + if (key == "c" || key == "checksum") { + params->options.checksum = true; + return OptionResult::kConsumedKey; + } + + if (key == "n" || key == "dry-run") { + params->options.dry_run = true; + return OptionResult::kConsumedKey; + } + + if (key == "existing") { + params->options.existing = true; + return OptionResult::kConsumedKey; + } + + if (key == "copy-dest") { + params->options.copy_dest = value; + return OptionResult::kConsumedKeyValue; + } + + if (key == "json") { + params->options.json = true; + return OptionResult::kConsumedKey; + } + + PrintError("Unknown option: '%s'", key); + return OptionResult::kError; +} + +bool CheckParameters(const Parameters& params, bool help) { + if (help) { + printf("%s", kHelpText); + return false; + } + + if (params.options.delete_ && !params.options.recursive) { + PrintError("--delete does not work without --recursive (-r)."); + return false; + } + + if (!params.options.ip || params.options.ip[0] == '\0') { + PrintError("--ip must specify a valid IP address"); + return false; + } + + if (!params.options.port || params.options.port <= 0 || + params.options.port > UINT16_MAX) { + PrintError("--port must specify a valid port"); + return false; + } + + // Note: ZSTD_minCLevel() is ridiculously small (-131072), so use a + // reasonable value. + assert(ZSTD_minCLevel() <= Options::kMinCompressLevel); + assert(ZSTD_maxCLevel() == Options::kMaxCompressLevel); + static_assert(Options::kMinCompressLevel < 0); + static_assert(Options::kMaxCompressLevel > 0); + if (params.options.compress_level < Options::kMinCompressLevel || + params.options.compress_level > Options::kMaxCompressLevel || + params.options.compress_level == 0) { + PrintError("--compress_level must be between %i..-1 or 1..%i", + Options::kMinCompressLevel, Options::kMaxCompressLevel); + return false; + } + + // Warn that any include rules not followed by an exclude rule are pointless + // as the files would be included, anyway. + for (int n = static_cast(params.filter_rules.size()) - 1; n >= 0; --n) { + const Parameters::FilterRule& rule = params.filter_rules[n]; + if (rule.type == FilterRule::Type::kExclude) { + break; + } + std::cout << "Warning: Include pattern '" << rule.pattern + << "' has no effect, not followed by exclude pattern" + << std::endl; + } + + return true; +} + +bool CheckOptionResult(OptionResult result, const std::string& name, + const char* value) { + switch (result) { + case OptionResult::kConsumedKey: + return true; + + case OptionResult::kConsumedKeyValue: + if (!value) { + PrintError("Option '%s' needs a value", name); + return false; + } + return true; + + case OptionResult::kError: + // Error message was already printed. + return false; + } + + return true; +} + +} // namespace + +const char* HelpText() { return kHelpText; } + +// Note that abseil has a flags library, but the C++ version doesn't support +// short names ("-q"), see https://abseil.io/docs/cpp/guides/flags. However, we +// aim to be roughly compatible with vanilla rsync, which does have short flag +// names like "-q". +bool Parse(int argc, const char* const* argv, Parameters* parameters) { + if (argc <= 1) { + std::cout << kHelpText; + return false; + } + + bool help = false; + for (int index = 1; index < argc; ++index) { + // Handle '--key [value]' and '--key=value' options. + bool equality_used = false; + if (strncmp(argv[index], "--", 2) == 0) { + std::string key(argv[index] + 2); + const char* value = nullptr; + size_t equality_pos = key.find("="); + if (equality_pos != std::string::npos) { + if (equality_pos + 1 < key.size()) { + value = argv[index] + 2 + equality_pos + 1; + } + key = key.substr(0, equality_pos); + equality_used = true; + } else { + value = index + 1 < argc && argv[index + 1][0] != '-' ? argv[index + 1] + : nullptr; + } + OptionResult result = HandleParameter(key, value, parameters, &help); + if (!CheckOptionResult(result, key, value)) { + return false; + } + if (!equality_used && result == OptionResult::kConsumedKeyValue) { + ++index; + } + continue; + } + + // Handle '-abc' options. + if (strncmp(argv[index], "-", 1) == 0) { + char key[] = "x"; + char name[] = "-x"; + for (const char* c = argv[index] + 1; *c != 0; ++c) { + key[0] = *c; + name[1] = *c; + OptionResult result = HandleParameter(key, nullptr, parameters, &help); + // These args shouldn't try to consume values. + assert(result != OptionResult::kConsumedKeyValue); + if (!CheckOptionResult(result, name, nullptr)) { + return false; + } + } + continue; + } + + // The last added option is the destination. Move previously added options + // to the sources. + if (!parameters->destination.empty()) { + parameters->sources.push_back(std::move(parameters->destination)); + } + parameters->destination = argv[index]; + } + + // Load files-from file (can't do it when --files-from is handled since not + // all sources might have been read at that point. + if (parameters->files_from && + !LoadFilesFrom(parameters->files_from, ¶meters->sources, + ¶meters->sources_dir)) { + return false; + } + + if (!CheckParameters(*parameters, help)) { + return false; + } + + if (parameters->sources.empty() && parameters->destination.empty()) { + PrintError("Missing source and destination"); + return false; + } + + if (parameters->destination.empty()) { + PrintError("Missing destination"); + return false; + } + + if (parameters->sources.empty()) { + // If one arg was passed on the command line, it is not clear whether it + // was supposed to be a source or destination. Try to infer that, e.g. + // cdc_rsync *.txt -> Missing destination + // cdc_rsync /mnt/developer -> Missing source + bool missing_src = parameters->destination[0] == '/'; + + PrintError("Missing %s", missing_src ? "source" : "destination"); + return false; + } + + return true; +} + +} // namespace params +} // namespace cdc_ft diff --git a/cdc_rsync_cli/params.h b/cdc_rsync_cli/params.h new file mode 100644 index 0000000..97f0ec0 --- /dev/null +++ b/cdc_rsync_cli/params.h @@ -0,0 +1,54 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_CLI_PARAMS_H_ +#define CDC_RSYNC_CLI_PARAMS_H_ + +#include +#include + +#include "cdc_rsync/cdc_rsync.h" + +namespace cdc_ft { +namespace params { + +// All cdc_rsync command line parameters. +struct Parameters { + // Copy of cdc_ft::FilterRule with std::string instead of const char*. + struct FilterRule { + using Type = ::cdc_ft::FilterRule::Type; + FilterRule(Type type, std::string pattern) + : type(type), pattern(std::move(pattern)) {} + Type type; + std::string pattern; + }; + + Options options; + std::vector filter_rules; + std::vector sources; + std::string destination; + const char* files_from = nullptr; + std::string sources_dir; // Base directory for files loaded for --files-from. +}; + +// Parses sources, destination and options from the command line args. +// Prints a help text if not enough arguments were given or -h/--help was given. +bool Parse(int argc, const char* const* argv, Parameters* parameters); + +} // namespace params +} // namespace cdc_ft + +#endif // CDC_RSYNC_CLI_PARAMS_H_ diff --git a/cdc_rsync_cli/params_test.cc b/cdc_rsync_cli/params_test.cc new file mode 100644 index 0000000..1315427 --- /dev/null +++ b/cdc_rsync_cli/params_test.cc @@ -0,0 +1,512 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync_cli/params.h" + +#include "absl/strings/match.h" +#include "common/log.h" +#include "common/path.h" +#include "common/test_main.h" +#include "gtest/gtest.h" + +namespace cdc_ft { +namespace params { +namespace { + +class TestLog : public Log { + public: + explicit TestLog() : Log(LogLevel::kInfo) {} + + protected: + void WriteLogMessage(LogLevel level, const char* file, int line, + const char* func, const char* message) override { + errors_ += message; + } + + private: + std::string errors_; +}; + +std::string NeedsValueError(const char* option_name) { + return absl::StrFormat("Option '%s' needs a value", option_name); +} + +class ParamsTest : public ::testing::Test { + public: + void SetUp() override { prev_stderr_ = std::cerr.rdbuf(errors_.rdbuf()); } + + void TearDown() override { std::cerr.rdbuf(prev_stderr_); } + + protected: + void ExpectNoError() const { + EXPECT_TRUE(errors_.str().empty()) + << "Expected empty stderr but got\n'" << errors_.str() << "'"; + } + + void ExpectError(const std::string& expected) const { + EXPECT_TRUE(absl::StrContains(errors_.str(), expected)) + << "Expected stderr to contain '" << expected << "' but got\n'" + << errors_.str() << "'"; + } + + void ClearErrors() { errors_.str(std::string()); } + + std::string base_dir_ = GetTestDataDir("params"); + std::string sources_file_ = path::Join(base_dir_, "source_files.txt"); + std::string empty_sources_file_ = + path::Join(base_dir_, "empty_source_files.txt"); + + Parameters parameters_; + std::stringstream errors_; + std::streambuf* prev_stderr_; +}; + +TEST_F(ParamsTest, ParseSucceedsDefaults) { + const char* argv[] = {"cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", + "source", "destination", NULL}; + EXPECT_TRUE(Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + EXPECT_STREQ("1.2.3.4", parameters_.options.ip); + EXPECT_EQ(1234, parameters_.options.port); + EXPECT_FALSE(parameters_.options.delete_); + EXPECT_FALSE(parameters_.options.recursive); + EXPECT_EQ(0, parameters_.options.verbosity); + EXPECT_FALSE(parameters_.options.quiet); + EXPECT_FALSE(parameters_.options.whole_file); + EXPECT_FALSE(parameters_.options.compress); + EXPECT_FALSE(parameters_.options.checksum); + EXPECT_FALSE(parameters_.options.dry_run); + EXPECT_EQ(parameters_.options.copy_dest, nullptr); + EXPECT_EQ(6, parameters_.options.compress_level); + EXPECT_EQ(10, parameters_.options.connection_timeout_sec); + EXPECT_EQ(1, parameters_.sources.size()); + EXPECT_EQ(parameters_.sources[0], "source"); + EXPECT_EQ(parameters_.destination, "destination"); + ExpectNoError(); +} + +TEST_F(ParamsTest, ParseSucceedsWithOptionFromTwoArguments) { + const char* argv[] = { + "cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", "--compress-level", "2", + "source", "destination", NULL}; + EXPECT_TRUE(Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + EXPECT_EQ(parameters_.options.compress_level, 2); + ExpectNoError(); +} + +TEST_F(ParamsTest, + ParseSucceedsWithOptionFromOneArgumentWithEqualityWithValue) { + const char* argv[] = { + "cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", "--compress-level=2", + "source", "destination", NULL}; + EXPECT_TRUE(Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + ASSERT_EQ(parameters_.sources.size(), 1); + EXPECT_EQ(parameters_.options.compress_level, 2); + EXPECT_EQ(parameters_.sources[0], "source"); + EXPECT_EQ(parameters_.destination, "destination"); + ExpectNoError(); +} + +TEST_F(ParamsTest, ParseFailsOnCompressLevelEqualsNoValue) { + const char* argv[] = {"cdc_rsync.exe", "--compress-level=", "source", + "destination", NULL}; + EXPECT_FALSE( + Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + ExpectError(NeedsValueError("compress-level")); +} + +TEST_F(ParamsTest, ParseFailsOnPortEqualsNoValue) { + const char* argv[] = {"cdc_rsync.exe", "--port=", "source", "destination", + NULL}; + EXPECT_FALSE( + Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + ExpectError(NeedsValueError("port")); +} + +TEST_F(ParamsTest, ParseFailsOnContimeoutEqualsNoValue) { + const char* argv[] = {"cdc_rsync.exe", "--contimeout=", "source", + "destination", NULL}; + EXPECT_FALSE( + Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + ExpectError(NeedsValueError("contimeout")); +} + +TEST_F(ParamsTest, ParseFailsOnIpEqualsNoValue) { + const char* argv[] = {"cdc_rsync.exe", "--ip=", "source", "destination", + NULL}; + EXPECT_FALSE( + Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + ExpectError(NeedsValueError("ip")); +} + +TEST_F(ParamsTest, ParseWithoutParametersFailsOnMissingSourceAndDestination) { + const char* argv[] = {"cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", NULL}; + EXPECT_FALSE( + Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + ExpectError("Missing source"); +} + +TEST_F(ParamsTest, ParseWithSingleParameterFailsOnMissingDestination) { + const char* argv[] = {"cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", + "source", NULL}; + EXPECT_FALSE( + Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + ExpectError("Missing destination"); +} + +TEST_F(ParamsTest, ParseSuccessedsWithMultipleLetterKeyConsumed) { + const char* argv[] = { + "cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", "-rvqWRzcn", + "source", "destination", NULL}; + EXPECT_TRUE(Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + EXPECT_TRUE(parameters_.options.recursive); + EXPECT_EQ(parameters_.options.verbosity, 1); + EXPECT_TRUE(parameters_.options.quiet); + EXPECT_TRUE(parameters_.options.whole_file); + EXPECT_TRUE(parameters_.options.relative); + EXPECT_TRUE(parameters_.options.compress); + EXPECT_TRUE(parameters_.options.checksum); + EXPECT_TRUE(parameters_.options.dry_run); + ExpectNoError(); +} + +TEST_F(ParamsTest, + ParseFailsOnMultipleLetterKeyConsumedOptionsWithUnsupportedOne) { + const char* argv[] = {"cdc_rsync.exe", "-rvqaWRzcn", "source", "destination", + NULL}; + EXPECT_FALSE( + Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + ExpectError("Unknown option: 'a'"); +} + +TEST_F(ParamsTest, ParseSuccessedsWithMultipleLongKeyConsumedOptions) { + const char* argv[] = {"cdc_rsync.exe", + "--ip=1.2.3.4", + "--port=1234", + "--recursive", + "--verbosity", + "--quiet", + "--whole-file", + "--compress", + "--relative", + "--delete", + "--checksum", + "--dry-run", + "--existing", + "--json", + "source", + "destination", + NULL}; + EXPECT_TRUE(Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + EXPECT_TRUE(parameters_.options.recursive); + EXPECT_EQ(parameters_.options.verbosity, 1); + EXPECT_TRUE(parameters_.options.quiet); + EXPECT_TRUE(parameters_.options.whole_file); + EXPECT_TRUE(parameters_.options.relative); + EXPECT_TRUE(parameters_.options.compress); + EXPECT_TRUE(parameters_.options.delete_); + EXPECT_TRUE(parameters_.options.checksum); + EXPECT_TRUE(parameters_.options.dry_run); + EXPECT_TRUE(parameters_.options.existing); + EXPECT_TRUE(parameters_.options.json); + ExpectNoError(); +} + +TEST_F(ParamsTest, ParseFailsOnUnknownKey) { + const char* argv[] = {"cdc_rsync.exe", "-unknownKey", "source", "destination", + NULL}; + EXPECT_FALSE( + Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + ExpectError("Unknown option: 'u'"); +} + +TEST_F(ParamsTest, ParseSuccessedsWithSupportedKeyValue) { + const char* argv[] = { + "cdc_rsync.exe", "--compress-level", "11", "--port=4086", + "--ip=127.0.0.1", "--contimeout", "99", "--copy-dest=dest", + "source", "destination", NULL}; + EXPECT_TRUE(Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + EXPECT_EQ(parameters_.options.compress_level, 11); + EXPECT_EQ(parameters_.options.connection_timeout_sec, 99); + EXPECT_EQ(parameters_.options.port, 4086); + EXPECT_STREQ(parameters_.options.ip, "127.0.0.1"); + EXPECT_STREQ(parameters_.options.copy_dest, "dest"); + ExpectNoError(); +} + +TEST_F(ParamsTest, + ParseSuccessedsWithSupportedKeyValueWithoutEqualityForChars) { + const char* argv[] = {"cdc_rsync.exe", "--port", "4086", "--ip", + "127.0.0.1", "--copy-dest", "dest", "source", + "destination", NULL}; + EXPECT_TRUE(Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + EXPECT_EQ(parameters_.options.port, 4086); + EXPECT_STREQ(parameters_.options.ip, "127.0.0.1"); + EXPECT_STREQ(parameters_.options.copy_dest, "dest"); + ExpectNoError(); +} + +TEST_F(ParamsTest, ParseFailsOnGameletIpNeedsPort) { + const char* argv[] = {"cdc_rsync.exe", "--ip=127.0.0.1", "source", + "destination", NULL}; + EXPECT_FALSE( + Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + ExpectError("--port must specify a valid port"); +} + +TEST_F(ParamsTest, ParseFailsOnDeleteNeedsRecursive) { + const char* argv[] = { + "cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", "--delete", + "source", "destination", NULL}; + EXPECT_FALSE( + Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + ExpectError("--delete does not work without --recursive (-r)"); +} + +TEST_F(ParamsTest, ParseChecksCompressLevel) { + int minLevel = Options::kMinCompressLevel; + int maxLevel = Options::kMaxCompressLevel; + int levels[] = {minLevel - 1, minLevel, 0, maxLevel, maxLevel + 1}; + bool valid[] = {false, true, false, true, false}; + + for (int n = 0; n < std::size(levels); ++n) { + std::string level = "--compress-level=" + std::to_string(levels[n]); + const char* argv[] = {"cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", + level.c_str(), "source", "destination"}; + EXPECT_TRUE(Parse(static_cast(std::size(argv)) - 1, argv, + ¶meters_) == valid[n]); + if (valid[n]) { + ExpectNoError(); + } else { + ExpectError("--compress_level must be between"); + } + ClearErrors(); + } +} + +TEST_F(ParamsTest, ParseFailsOnUnknownKeyValue) { + const char* argv[] = {"cdc_rsync.exe", "--unknownKey=5", "source", + "destination", NULL}; + EXPECT_FALSE( + Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + ExpectError("unknownKey"); +} + +TEST_F(ParamsTest, ParseFailsWithHelpOption) { + const char* argv[] = {"cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", + "source", "destination", NULL}; + EXPECT_TRUE(Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + + const char* argv2[] = { + "cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", "source", + "destination", "--help", NULL}; + EXPECT_FALSE( + Parse(static_cast(std::size(argv2)) - 1, argv2, ¶meters_)); + ExpectNoError(); + + const char* argv3[] = { + "cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", "source", + "destination", "-h", NULL}; + EXPECT_FALSE( + Parse(static_cast(std::size(argv3)) - 1, argv3, ¶meters_)); + ExpectNoError(); +} + +TEST_F(ParamsTest, ParseSucceedsWithIncludeExclude) { + const char* argv[] = { + "cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", "--include=*.txt", + "--exclude", "*.dat", "--include", "*.exe", + "source", "destination", NULL}; + EXPECT_TRUE(Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + ASSERT_EQ(parameters_.filter_rules.size(), 3); + ASSERT_EQ(parameters_.filter_rules[0].type, FilterRule::Type::kInclude); + ASSERT_EQ(parameters_.filter_rules[0].pattern, "*.txt"); + ASSERT_EQ(parameters_.filter_rules[1].type, FilterRule::Type::kExclude); + ASSERT_EQ(parameters_.filter_rules[1].pattern, "*.dat"); + ASSERT_EQ(parameters_.filter_rules[2].type, FilterRule::Type::kInclude); + ASSERT_EQ(parameters_.filter_rules[2].pattern, "*.exe"); + ExpectNoError(); +} + +TEST_F(ParamsTest, FilesFrom_NoFile) { + const char* argv[] = { + "cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", "source", + "destination", "--files-from", NULL}; + EXPECT_FALSE( + Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + ExpectError(NeedsValueError("files-from")); +} + +TEST_F(ParamsTest, FilesFrom_ImpliesRelative) { + const char* argv[] = { + "cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", "--files-from", + sources_file_.c_str(), base_dir_.c_str(), "destination", NULL}; + EXPECT_TRUE(Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + EXPECT_TRUE(parameters_.options.relative); + ExpectNoError(); +} + +TEST_F(ParamsTest, FilesFrom_WithoutSourceArg) { + const char* argv[] = { + "cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", "--files-from", + sources_file_.c_str(), "destination", NULL}; + EXPECT_TRUE(Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + EXPECT_TRUE(parameters_.sources_dir.empty()); + EXPECT_EQ(parameters_.destination, "destination"); + ExpectNoError(); +} + +TEST_F(ParamsTest, FilesFrom_WithSourceArg) { + const char* argv[] = { + "cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", "--files-from", + sources_file_.c_str(), base_dir_.c_str(), "destination", NULL}; + EXPECT_TRUE(Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + + std::string expected_sources_dir = base_dir_; + path::EnsureEndsWithPathSeparator(&expected_sources_dir); + EXPECT_EQ(parameters_.sources_dir, expected_sources_dir); + EXPECT_EQ(parameters_.destination, "destination"); + ExpectNoError(); +} + +TEST_F(ParamsTest, FilesFrom_ParsesFile) { + const char* argv[] = { + "cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", "--files-from", + sources_file_.c_str(), "destination", NULL}; + EXPECT_TRUE(Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + + std::vector expected = {"file1", "file2", "file3"}; + ASSERT_EQ(parameters_.sources.size(), expected.size()); + for (size_t n = 0; n < expected.size(); ++n) { + EXPECT_EQ(parameters_.sources[n], expected[n]); + } + ExpectNoError(); +} + +TEST_F(ParamsTest, FilesFrom_EmptyFile_WithoutSourceArg) { + const char* argv[] = {"cdc_rsync.exe", + "--ip=1.2.3.4", + "--port=1234", + "--files-from", + empty_sources_file_.c_str(), + "destination", + NULL}; + EXPECT_FALSE( + Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + ExpectError(empty_sources_file_); + ExpectError("--files-from option is empty"); +} + +TEST_F(ParamsTest, FilesFrom_EmptyFile_WithSourceArg) { + const char* argv[] = {"cdc_rsync.exe", + "--ip=1.2.3.4", + "--port=1234", + "--files-from", + empty_sources_file_.c_str(), + base_dir_.c_str(), + "destination", + NULL}; + EXPECT_FALSE( + Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + ExpectError(empty_sources_file_); + ExpectError("--files-from option is empty"); +} + +TEST_F(ParamsTest, FilesFrom_NoDestination) { + const char* argv[] = {"cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", + "--files-from", sources_file_.c_str(), NULL}; + EXPECT_FALSE( + Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + ExpectError("Missing destination"); +} + +TEST_F(ParamsTest, IncludeFrom_NoFile) { + const char* argv[] = { + "cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", "source", + "destination", "--include-from", NULL}; + EXPECT_FALSE( + Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + ExpectError(NeedsValueError("include-from")); +} + +TEST_F(ParamsTest, IncludeFrom_ParsesFile) { + std::string file = path::Join(base_dir_, "include_files.txt"); + const char* argv[] = { + "cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", "--include-from", + file.c_str(), "source", "destination", NULL}; + EXPECT_TRUE(Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + + ASSERT_EQ(parameters_.filter_rules.size(), 1); + ASSERT_EQ(parameters_.filter_rules[0].type, FilterRule::Type::kInclude); + ASSERT_EQ(parameters_.filter_rules[0].pattern, "file3"); + ExpectNoError(); +} + +TEST_F(ParamsTest, ExcludeFrom_NoFile) { + const char* argv[] = {"cdc_rsync.exe", "source", "destination", + "--exclude-from", NULL}; + EXPECT_FALSE( + Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + ExpectError(NeedsValueError("exclude-from")); +} + +TEST_F(ParamsTest, ExcludeFrom_ParsesFile) { + std::string file = path::Join(base_dir_, "exclude_files.txt"); + const char* argv[] = { + "cdc_rsync.exe", "--ip=1.2.3.4", "--port=1234", "--exclude-from", + file.c_str(), "source", "destination", NULL}; + EXPECT_TRUE(Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + + ASSERT_EQ(parameters_.filter_rules.size(), 2); + EXPECT_EQ(parameters_.filter_rules[0].type, FilterRule::Type::kExclude); + EXPECT_EQ(parameters_.filter_rules[0].pattern, "file1"); + EXPECT_EQ(parameters_.filter_rules[1].type, FilterRule::Type::kExclude); + EXPECT_EQ(parameters_.filter_rules[1].pattern, "file2"); + ExpectNoError(); +} + +TEST_F(ParamsTest, IncludeExcludeMixed_ProperOrder) { + std::string exclude_file = path::Join(base_dir_, "exclude_files.txt"); + std::string include_file = path::Join(base_dir_, "include_files.txt"); + const char* argv[] = {"cdc_rsync.exe", + "--ip=1.2.3.4", + "--port=1234", + "--include-from", + include_file.c_str(), + "--exclude=excl1", + "source", + "--exclude-from", + exclude_file.c_str(), + "destination", + "--include", + "incl1", + NULL}; + EXPECT_TRUE(Parse(static_cast(std::size(argv)) - 1, argv, ¶meters_)); + + ASSERT_EQ(parameters_.filter_rules.size(), 5); + EXPECT_EQ(parameters_.filter_rules[0].type, FilterRule::Type::kInclude); + EXPECT_EQ(parameters_.filter_rules[0].pattern, "file3"); + EXPECT_EQ(parameters_.filter_rules[1].type, FilterRule::Type::kExclude); + EXPECT_EQ(parameters_.filter_rules[1].pattern, "excl1"); + EXPECT_EQ(parameters_.filter_rules[2].type, FilterRule::Type::kExclude); + EXPECT_EQ(parameters_.filter_rules[2].pattern, "file1"); + EXPECT_EQ(parameters_.filter_rules[3].type, FilterRule::Type::kExclude); + EXPECT_EQ(parameters_.filter_rules[3].pattern, "file2"); + EXPECT_EQ(parameters_.filter_rules[4].type, FilterRule::Type::kInclude); + EXPECT_EQ(parameters_.filter_rules[4].pattern, "incl1"); + ExpectNoError(); +} + +} // namespace +} // namespace params +} // namespace cdc_ft diff --git a/cdc_rsync_cli/testdata/params/empty_source_files.txt b/cdc_rsync_cli/testdata/params/empty_source_files.txt new file mode 100644 index 0000000..b28b04f --- /dev/null +++ b/cdc_rsync_cli/testdata/params/empty_source_files.txt @@ -0,0 +1,3 @@ + + + diff --git a/cdc_rsync_cli/testdata/params/exclude_files.txt b/cdc_rsync_cli/testdata/params/exclude_files.txt new file mode 100644 index 0000000..66728af --- /dev/null +++ b/cdc_rsync_cli/testdata/params/exclude_files.txt @@ -0,0 +1,2 @@ +file1 +file2 \ No newline at end of file diff --git a/cdc_rsync_cli/testdata/params/include_files.txt b/cdc_rsync_cli/testdata/params/include_files.txt new file mode 100644 index 0000000..7c8ac2f --- /dev/null +++ b/cdc_rsync_cli/testdata/params/include_files.txt @@ -0,0 +1 @@ +file3 diff --git a/cdc_rsync_cli/testdata/params/source_files.txt b/cdc_rsync_cli/testdata/params/source_files.txt new file mode 100644 index 0000000..5998d4b --- /dev/null +++ b/cdc_rsync_cli/testdata/params/source_files.txt @@ -0,0 +1,6 @@ +file1 + + file2 + file3 + + diff --git a/cdc_rsync_cli/testdata/root.txt b/cdc_rsync_cli/testdata/root.txt new file mode 100644 index 0000000..e69de29 diff --git a/cdc_rsync_server/.gitignore b/cdc_rsync_server/.gitignore new file mode 100644 index 0000000..4a7de2c --- /dev/null +++ b/cdc_rsync_server/.gitignore @@ -0,0 +1,4 @@ +GGP/* +generated_protos +*.log +*.user \ No newline at end of file diff --git a/cdc_rsync_server/BUILD b/cdc_rsync_server/BUILD new file mode 100644 index 0000000..402542e --- /dev/null +++ b/cdc_rsync_server/BUILD @@ -0,0 +1,158 @@ +package(default_visibility = [ + "//:__subpackages__", +]) + +cc_library( + name = "file_deleter_and_sender", + srcs = ["file_deleter_and_sender.cc"], + hdrs = ["file_deleter_and_sender.h"], + deps = [ + "//cdc_rsync/base:message_pump", + "//cdc_rsync/protos:messages_cc_proto", + "//common:path", + "//common:status", + "@com_google_absl//absl/status", + ], +) + +cc_test( + name = "file_deleter_and_sender_test", + srcs = ["file_deleter_and_sender_test.cc"], + deps = [ + ":file_deleter_and_sender", + "//cdc_rsync/base:fake_socket", + "//common:status_test_macros", + "//common:test_main", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "file_finder", + srcs = ["file_finder.cc"], + hdrs = ["file_finder.h"], + deps = [ + ":file_info", + "//common:path", + "//common:path_filter", + "//common:status", + "@com_google_absl//absl/status", + ], +) + +cc_test( + name = "file_finder_test", + srcs = ["file_finder_test.cc"], + data = ["testdata/root.txt"] + glob(["testdata/file_finder/**"]), + deps = [ + ":file_finder", + "//common:status_test_macros", + "//common:test_main", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "file_diff_generator", + srcs = ["file_diff_generator.cc"], + hdrs = ["file_diff_generator.h"], + deps = [ + ":file_info", + "//cdc_rsync/protos:messages_cc_proto", + "//common:log", + "//common:path", + "//common:util", + ], +) + +cc_test( + name = "file_diff_generator_test", + srcs = ["file_diff_generator_test.cc"], + data = ["testdata/root.txt"] + glob(["testdata/file_diff_generator/**"]), + deps = [ + ":file_diff_generator", + "//common:status_test_macros", + "//common:test_main", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "cdc_rsync_server", + srcs = [ + "cdc_rsync_server.cc", + "cdc_rsync_server.h", + "main.cc", + ], + copts = select({ + #":debug_build": ["-fstandalone-debug"], + "//conditions:default": [], + }), + deps = [ + ":file_deleter_and_sender", + ":file_diff_generator", + ":file_finder", + ":file_info", + ":server_socket", + ":unzstd_stream", + "//cdc_rsync/base:cdc_interface", + "//cdc_rsync/base:message_pump", + "//cdc_rsync/base:server_exit_code", + "//common:clock", + "//common:gamelet_component", + "//common:log", + "//common:path_filter", + "//common:status", + "//common:stopwatch", + "//common:threadpool", + "//common:util", + ], +) + +config_setting( + name = "debug_build", + values = { + "compilation_mode": "dbg", + }, +) + +cc_library( + name = "file_info", + hdrs = ["file_info.h"], +) + +cc_library( + name = "server_socket", + srcs = ["server_socket.cc"], + hdrs = ["server_socket.h"], + target_compatible_with = ["@platforms//os:linux"], + deps = [ + "//cdc_rsync/base:socket", + "//common:log", + "//common:status", + "@com_google_absl//absl/status", + ], +) + +cc_library( + name = "unzstd_stream", + srcs = ["unzstd_stream.cc"], + hdrs = ["unzstd_stream.h"], + deps = [ + "//cdc_rsync/base:message_pump", + "//cdc_rsync/base:socket", + "//common:status", + "@com_github_zstd//:zstd", + "@com_google_absl//absl/status", + ], +) + +filegroup( + name = "all_test_sources", + srcs = glob(["*_test.cc"]), +) + +filegroup( + name = "all_test_data", + srcs = glob(["testdata/**"]), +) diff --git a/cdc_rsync_server/cdc_rsync_server.cc b/cdc_rsync_server/cdc_rsync_server.cc new file mode 100644 index 0000000..07058a6 --- /dev/null +++ b/cdc_rsync_server/cdc_rsync_server.cc @@ -0,0 +1,758 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync_server/cdc_rsync_server.h" + +#include "absl/strings/str_format.h" +#include "cdc_rsync/base/cdc_interface.h" +#include "cdc_rsync/protos/messages.pb.h" +#include "cdc_rsync_server/file_deleter_and_sender.h" +#include "cdc_rsync_server/file_finder.h" +#include "cdc_rsync_server/server_socket.h" +#include "cdc_rsync_server/unzstd_stream.h" +#include "common/log.h" +#include "common/path.h" +#include "common/status.h" +#include "common/stopwatch.h" +#include "common/threadpool.h" +#include "common/util.h" + +namespace cdc_ft { + +namespace { + +// Suffix for the patched file created from the basis file and the diff. +constexpr char kIntermediatePathSuffix[] = ".__cdc_rsync_temp__"; + +uint16_t kExecutableBits = + path::MODE_IXUSR | path::MODE_IXGRP | path::MODE_IXOTH; + +// Background task that receives patch info for the base file at |base_filepath| +// and writes the patched file to |target_filepath|. If |base_filepath| and +// |target_filepath| match, writes an intermediate file and replaces +// the file at |target_filepath| with the intermediate file when all data has +// been received. +class PatchTask : public Task { + public: + PatchTask(const std::string& base_filepath, + const std::string& target_filepath, const ChangedFileInfo& file, + CdcInterface* cdc) + : base_filepath_(base_filepath), + target_filepath_(target_filepath), + file_(file), + cdc_(cdc) {} + + virtual ~PatchTask() = default; + + const ChangedFileInfo& File() const { return file_; } + + const absl::Status& Status() const { return status_; } + + // Task: + void ThreadRun(IsCancelledPredicate is_cancelled) override { + bool need_intermediate_file = target_filepath_ == base_filepath_; + std::string patched_filepath = + need_intermediate_file ? base_filepath_ + kIntermediatePathSuffix + : target_filepath_; + + absl::StatusOr patched_file = path::OpenFile(patched_filepath, "wb"); + if (!patched_file.ok()) { + status_ = patched_file.status(); + return; + } + + // Receive diff stream from server and apply. + bool is_executable = false; + status_ = cdc_->ReceiveDiffAndPatch(base_filepath_, *patched_file, + &is_executable); + fclose(*patched_file); + if (!status_.ok()) { + return; + } + + // These bits are OR'ed on top of the mode bits. + uint16_t mode_or_bits = is_executable ? kExecutableBits : 0; + + // Store mode from the original base path. + path::Stats stats; + status_ = GetStats(base_filepath_, &stats); + if (!status_.ok()) { + status_ = + WrapStatus(status_, "GetStats() failed for '%s'", base_filepath_); + return; + } + + if (need_intermediate_file) { + // Replace |base_filepath_| (==|target_filepath_|) by the intermediate + // file |patched_filepath|. + status_ = path::ReplaceFile(target_filepath_, patched_filepath); + if (!status_.ok()) { + status_ = WrapStatus(status_, "ReplaceFile() for '%s' by '%s' failed", + base_filepath_, patched_filepath); + return; + } + } else { + // An intermediate file is typically not needed when the base path is + // a file in a package. Since package files are read-only, we add the + // write bit, so that the file can be overwritten with the next sync. + mode_or_bits |= path::Mode::MODE_IWUSR; + } + + // Restore mode, possibly adding executable bit and user write bit. + status_ = path::ChangeMode(target_filepath_, stats.mode | mode_or_bits); + if (!status_.ok()) { + status_ = + WrapStatus(status_, "ChangeMode() failed for '%s'", base_filepath_); + return; + } + + status_ = path::SetFileTime(target_filepath_, file_.client_modified_time); + } + + private: + std::string base_filepath_; + std::string target_filepath_; + ChangedFileInfo file_; + CdcInterface* cdc_; + absl::Status status_; +}; + +PathFilter::Rule::Type ToInternalType( + SetOptionsRequest::FilterRule::Type type) { + switch (type) { + case SetOptionsRequest::FilterRule::TYPE_INCLUDE: + return PathFilter::Rule::Type::kInclude; + case SetOptionsRequest::FilterRule::TYPE_EXCLUDE: + return PathFilter::Rule::Type::kExclude; + // Make compiler happy... + case SetOptionsRequest_FilterRule_Type_SetOptionsRequest_FilterRule_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case SetOptionsRequest_FilterRule_Type_SetOptionsRequest_FilterRule_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } + assert(false); + return PathFilter::Rule::Type::kInclude; +} + +} // namespace + +GgpRsyncServer::GgpRsyncServer() = default; + +GgpRsyncServer::~GgpRsyncServer() { + message_pump_.reset(); + socket_.reset(); +} + +bool GgpRsyncServer::CheckComponents( + const std::vector& components) { + // Components are expected to reside in the same dir as the executable. + std::string component_dir; + absl::Status status = path::GetExeDir(&component_dir); + if (!status.ok()) { + return false; + } + + std::vector our_components; + status = GameletComponent::Get( + {path::Join(component_dir, "cdc_rsync_server")}, &our_components); + if (!status.ok() || components != our_components) { + return false; + } + + return true; +} + +absl::Status GgpRsyncServer::Run(int port) { + socket_ = std::make_unique(); + absl::Status status = socket_->StartListening(port); + if (!status.ok()) { + return WrapStatus(status, "Failed to start listening on port %i", port); + } + LOG_INFO("cdc_rsync_server listening on port %i", port); + + // This is the marker for the client, so it knows it can connect. + printf("Server is listening\n"); + fflush(stdout); + + status = socket_->WaitForConnection(); + if (!status.ok()) { + return WrapStatus(status, "Failed to establish a connection"); + } + + message_pump_ = std::make_unique( + socket_.get(), + [this](PacketType type) { Thread_OnPackageReceived(type); }); + message_pump_->StartMessagePump(); + + LOG_INFO("Client connected. Starting to sync."); + status = Sync(); + if (!status.ok()) { + socket_->ShutdownSendingEnd().IgnoreError(); + return status; + } + + LOG_INFO("Exiting cdc_rsync_server"); + return absl::OkStatus(); +} + +absl::Status GgpRsyncServer::Sync() { + // First, the client sends us options, e.g. the |destination_| directory. + absl::Status status = HandleSetOptions(); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive options"); + } + + // Find all files in the |destination_| and |copy_dest_| (if set) directories. + status = FindFiles(); + if (!status.ok()) { + return WrapStatus(status, "Failed to find files on instance"); + } + + // Get the list of all files that the client sends us. + status = HandleSendAllFiles(); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive client file info"); + } + + // Diff client and server files and send missing files to the client. + status = DiffFiles(); + if (!status.ok()) { + return WrapStatus(status, "Failed to compute file difference"); + } + + // Delete files and directories not present on the client. + if (delete_) { + status = RemoveExtraneousFilesAndDirs(); + if (!status.ok()) { + return WrapStatus(status, "Failed to delete files and directories"); + } + } + + if (!dry_run_) { + status = CreateMissingDirs(); + if (!status.ok()) { + return WrapStatus(status, "Failed to create missing directories"); + } + } + + // Send indices of missing files to the client + status = SendFileIndices("missing", diff_.missing_files); + if (!status.ok()) { + return WrapStatus(status, "Failed to send indices of missing files"); + } + + if (!dry_run_) { + // Get file data of missing files from client. + status = HandleSendMissingFileData(); + if (!status.ok()) { + return WrapStatus(status, "Failed to copy files"); + } + } + // Send indices of changed files to the client. + status = SendFileIndices("changed", diff_.changed_files); + if (!status.ok()) { + return WrapStatus(status, "Failed to send indices of changed files"); + } + + if (!dry_run_) { + // Applies the rsync algorithm to update the changed files. + status = SyncChangedFiles(); + if (!status.ok()) { + return WrapStatus(status, "Failed to sync files"); + } + } + + // Handle clean shutdown. + status = HandleShutdown(); + if (!status.ok()) { + return WrapStatus(status, "Shutdown failed"); + } + + return absl::OkStatus(); +} + +absl::Status GgpRsyncServer::HandleSetOptions() { + LOG_INFO("Receiving options"); + + SetOptionsRequest request; + absl::Status status = + message_pump_->ReceiveMessage(PacketType::kSetOptions, &request); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive SetOptionsRequest"); + } + + destination_ = request.destination(); + delete_ = request.delete_(); + recursive_ = request.recursive(); + verbosity_ = request.verbosity(); + whole_file_ = request.whole_file(); + compress_ = request.compress(); + checksum_ = request.checksum(); + relative_ = request.relative(); + dry_run_ = request.dry_run(); + existing_ = request.existing(); + copy_dest_ = request.copy_dest(); + + // (internal): Support \ instead of / in destination folders. + path::FixPathSeparators(&destination_); + path::EnsureEndsWithPathSeparator(&destination_); + if (!copy_dest_.empty()) { + path::FixPathSeparators(©_dest_); + path::EnsureEndsWithPathSeparator(©_dest_); + } + + assert(path_filter_.IsEmpty()); + for (int n = 0; n < request.filter_rules_size(); ++n) { + std::string fixed_pattern = request.filter_rules(n).pattern(); + path::FixPathSeparators(&fixed_pattern); + path_filter_.AddRule(ToInternalType(request.filter_rules(n).type()), + fixed_pattern); + } + + Log::Instance()->SetLogLevel(Log::VerbosityToLogLevel(verbosity_)); + + return absl::OkStatus(); +} + +absl::Status GgpRsyncServer::FindFiles() { + Stopwatch stopwatch; + FileFinder finder; + + LOG_INFO("Finding all files in destination folder '%s'", destination_); + absl::Status status = + finder.AddFiles(destination_, recursive_, &path_filter_); + if (!status.ok()) { + return WrapStatus(status, "Failed to search '%s'", destination_); + } + + if (!copy_dest_.empty()) { + LOG_INFO("Finding all files in copy-dest folder '%s'", copy_dest_); + status = finder.AddFiles(copy_dest_, recursive_, &path_filter_); + if (!status.ok()) { + return WrapStatus(status, "Failed to search '%s'", copy_dest_); + } + } + + finder.ReleaseFiles(&server_files_, &server_dirs_); + + LOG_INFO("Found and set %u source files in %0.3f seconds", + server_files_.size(), stopwatch.ElapsedSeconds()); + return absl::OkStatus(); +} + +absl::Status GgpRsyncServer::HandleSendAllFiles() { + std::string current_directory; + + for (;;) { + AddFilesRequest request; + absl::Status status = + message_pump_->ReceiveMessage(PacketType::kAddFiles, &request); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive AddFilesRequest"); + } + + // An empty request indicates that all files have been sent. + if (request.files_size() == 0 && request.dirs_size() == 0) { + return absl::OkStatus(); + } + + current_directory = request.directory(); + path::FixPathSeparators(¤t_directory); + + // Add client files. + for (const AddFilesRequest::File& file : request.files()) { + uint32_t client_index = client_files_.size(); + client_files_.emplace_back(path::Join(current_directory, file.filename()), + file.modified_time(), file.size(), + client_index, nullptr); + } + // Add client directories. + for (const std::string& dir : request.dirs()) { + uint32_t client_index = client_dirs_.size(); + client_dirs_.emplace_back(path::Join(current_directory, dir), + client_index, nullptr); + } + } +} + +absl::Status GgpRsyncServer::DiffFiles() { + LOG_INFO("Diffing files"); + + // Be sure to move the data. It can grow quite large with millions of files. + // Special case for relative, non-recursive mode. This puts files with a + // relative directory into the "missing" bucket since the server-side search + // doesn't look into sub-folders. Double check that they are really missing. + const bool double_check_missing = relative_ && !recursive_; + diff_ = + file_diff::Generate(std::move(client_files_), std::move(server_files_), + std::move(client_dirs_), std::move(server_dirs_), + destination_, copy_dest_, double_check_missing); + + // Take sync flags into account and generate the stats response. + SendFileStatsResponse response = file_diff::AdjustToFlagsAndGetStats( + existing_, checksum_, whole_file_, &diff_); + + // Send stats. + absl::Status status = + message_pump_->SendMessage(PacketType::kSendFileStats, response); + if (!status.ok()) { + return WrapStatus(status, "Failed to send SendFileStatsResponse"); + } + + return absl::OkStatus(); +} + +absl::Status GgpRsyncServer::RemoveExtraneousFilesAndDirs() { + FileDeleterAndSender deleter(message_pump_.get()); + + // To guarantee that the folders are empty before they are removed, files are + // removed first. + for (const FileInfo& file : diff_.extraneous_files) { + absl::Status status = deleter.DeleteAndSendFileOrDir( + destination_, file.filepath, dry_run_, false); + if (!status.ok()) { + return WrapStatus(status, "Failed to delete file '%s' and send info", + file.filepath); + } + } + + // To guarantee that the subfolders are removed first. + std::sort(diff_.extraneous_dirs.begin(), diff_.extraneous_dirs.end(), + [](const DirInfo& dir1, const DirInfo& dir2) { + return dir1.filepath > dir2.filepath; + }); + for (const DirInfo& dir : diff_.extraneous_dirs) { + absl::Status status = deleter.DeleteAndSendFileOrDir( + destination_, dir.filepath, dry_run_, true); + if (!status.ok()) { + return WrapStatus(status, "Failed to delete directory '%s' and send info", + dir.filepath); + } + } + + // Send remaining files to the client. + absl::Status status = deleter.Flush(); + if (!status.ok()) { + return WrapStatus( + status, + "Failed to send info of remaining deleted files and directories"); + } + + return absl::OkStatus(); +} + +absl::Status GgpRsyncServer::CreateMissingDirs() { + for (const DirInfo& dir : diff_.missing_dirs) { + // Make directory. + std::string path = path::Join(destination_, dir.filepath); + std::error_code error_code; + // A file with the same name already exists. + if (path::Exists(path)) { + assert(!diff_.extraneous_files.empty()); + absl::Status status = path::RemoveFile(path); + if (!status.ok()) { + return WrapStatus( + status, "Failed to remove file '%s' before creating folder '%s'", + path, path); + } + } + absl::Status status = path::CreateDirRec(path); + if (!status.ok()) { + return WrapStatus(status, "Failed to create directory %s", path); + } + } + return absl::OkStatus(); +} + +template +absl::Status GgpRsyncServer::SendFileIndices(const char* file_type, + const std::vector& files) { + LOG_INFO("Sending indices of missing files to client"); + constexpr char error_fmt[] = "Failed to send indices of %s files."; + + AddFileIndicesResponse response; + absl::Status status; + for (const T& file : files) { + response.add_client_indices(file.client_index); + + constexpr int kMaxBatchSize = 4000; + if (response.client_indices_size() >= kMaxBatchSize) { + status = + message_pump_->SendMessage(PacketType::kAddFileIndices, response); + response.clear_client_indices(); + } + + if (!status.ok()) { + return WrapStatus(status, error_fmt, file_type); + } + } + + // Send the rest. + if (response.client_indices_size() > 0) { + status = message_pump_->SendMessage(PacketType::kAddFileIndices, response); + if (!status.ok()) { + return WrapStatus(status, error_fmt, file_type); + } + + response.clear_client_indices(); + } + + // Send an empty response to indicate that we're done. + status = message_pump_->SendMessage(PacketType::kAddFileIndices, response); + if (!status.ok()) { + return WrapStatus(status, error_fmt, file_type); + } + + return absl::OkStatus(); +} + +absl::Status GgpRsyncServer::HandleSendMissingFileData() { + if (diff_.missing_files.empty()) { + return absl::OkStatus(); + } + + LOG_INFO("Receiving missing files"); + + // Expect start of compression. The server socket will actually handle + // compression transparently, there's nothing we have to do here. + if (compress_) { + ToggleCompressionRequest request; + absl::Status status = + message_pump_->ReceiveMessage(PacketType::kToggleCompression, &request); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive ToggleCompressionRequest"); + } + } + + for (uint32_t server_index = 0; server_index < diff_.missing_files.size(); + server_index++) { + const FileInfo& file = diff_.missing_files[server_index]; + std::string filepath = path::Join(destination_, file.filepath); + LOG_INFO("%s", filepath.c_str()); + + SendMissingFileDataRequest request; + absl::Status status = message_pump_->ReceiveMessage( + PacketType::kSendMissingFileData, &request); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive SendMissingFileDataRequest"); + } + + // Verify if we got the right index. + if (request.server_index() != server_index) { + return MakeStatus("Received wrong server index %u. Expected %u.", + request.server_index(), server_index); + } + + // Verify that there is no directory existing with the same name. + if (path::Exists(filepath) && path::DirExists(filepath)) { + assert(!diff_.extraneous_dirs.empty()); + absl::Status status = path::RemoveFile(filepath); + if (!status.ok()) { + return WrapStatus( + status, "Failed to remove folder '%s' before creating file '%s'", + filepath, filepath); + } + } + + // Make directory. + std::string dir = path::DirName(filepath); + std::error_code error_code; + status = path::CreateDirRec(dir); + if (!status.ok()) { + return MakeStatus("Failed to create directory %s: %s", dir, + error_code.message()); + } + + // Receive file data. + Buffer buffer; + bool is_executable = false; + bool first_chunk = true; + auto handler = [message_pump = message_pump_.get(), &buffer, &is_executable, + &first_chunk](const void** data, size_t* size) { + absl::Status status = message_pump->ReceiveRawData(&buffer); + if (!status.ok()) { + return status; + } + + // size 0 indicates EOF. + *data = buffer.size() > 0 ? buffer.data() : nullptr; + *size = buffer.size(); + + // Detect executables. + if (first_chunk && buffer.size() > 0) { + first_chunk = false; + is_executable = Util::IsExecutable(buffer.data(), buffer.size()); + } + + return absl::OkStatus(); + }; + + absl::StatusOr fp = path::OpenFile(filepath, "wb"); + if (!fp.ok()) { + return fp.status(); + } + + status = path::StreamWriteFileContents(*fp, handler); + fclose(*fp); + if (!status.ok()) { + return WrapStatus(status, "Failed to write file %s", filepath); + } + + // Set file write time. + status = path::SetFileTime(filepath, file.modified_time); + if (!status.ok()) { + return WrapStatus(status, "Failed to set file mod time for %s", filepath); + } + + // Set executable bit, but just print warnings as it's not critical. + if (is_executable) { + path::Stats stats; + status = path::GetStats(filepath, &stats); + if (status.ok()) { + status = path::ChangeMode(filepath, stats.mode | kExecutableBits); + } + if (!status.ok()) { + LOG_WARNING("Failed to set executable bit on '%s': %s", filepath, + status.ToString()); + } + } + } + + // Notify client that it can resume sending (uncompressed!) messages. + if (compress_) { + ToggleCompressionResponse response; + absl::Status status = + message_pump_->SendMessage(PacketType::kToggleCompression, response); + if (!status.ok()) { + return WrapStatus(status, "Failed to send ToggleCompressionResponse"); + } + } + + return absl::OkStatus(); +} + +absl::Status GgpRsyncServer::SyncChangedFiles() { + if (diff_.changed_files.empty()) { + return absl::OkStatus(); + } + + LOG_INFO("Synching changed files"); + + // Expect start of compression. The server socket will actually handle + // compression transparently, there's nothing we have to do here. + if (compress_) { + ToggleCompressionRequest request; + absl::Status status = + message_pump_->ReceiveMessage(PacketType::kToggleCompression, &request); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive ToggleCompressionRequest"); + } + } + + CdcInterface cdc(message_pump_.get()); + + // Pipeline sending signatures and patching files: + // MAIN THREAD: Send signatures to client. + // Only sends to the socket. + // WORKER THREAD: Receive diffs from client and patch file. + // Only reads from the socket. + Threadpool pool(1); + + for (uint32_t server_index = 0; server_index < diff_.changed_files.size(); + server_index++) { + const ChangedFileInfo& file = diff_.changed_files[server_index]; + std::string base_filepath = + path::Join(file.base_dir ? file.base_dir : destination_, file.filepath); + std::string target_filepath = path::Join(destination_, file.filepath); + LOG_INFO("%s -> %s", base_filepath, target_filepath); + + SendSignatureResponse response; + response.set_client_index(file.client_index); + response.set_server_file_size(file.server_size); + absl::Status status = + message_pump_->SendMessage(PacketType::kAddSignatures, response); + if (!status.ok()) { + return WrapStatus(status, "Failed to send SendSignatureResponse"); + } + + // Create and send signature. + status = cdc.CreateAndSendSignature(base_filepath); + if (!status.ok()) { + return status; + } + + // Queue patching task. + pool.QueueTask(std::make_unique(base_filepath, target_filepath, + file, &cdc)); + + // Wait for the last file to finish. + if (server_index + 1 == diff_.changed_files.size()) { + pool.Wait(); + } + + // Check the results of completed tasks. + std::unique_ptr task = pool.TryGetCompletedTask(); + while (task) { + PatchTask* patch_task = static_cast(task.get()); + const std::string& task_path = patch_task->File().filepath; + if (!patch_task->Status().ok()) { + return WrapStatus(patch_task->Status(), "Failed to patch file '%s'", + task_path); + } + LOG_INFO("Finished patching file %s", task_path.c_str()); + task = pool.TryGetCompletedTask(); + } + } + + // Notify client that it can resume sending (uncompressed!) messages. + if (compress_) { + ToggleCompressionResponse response; + absl::Status status = + message_pump_->SendMessage(PacketType::kToggleCompression, response); + if (!status.ok()) { + return WrapStatus(status, "Failed to send ToggleCompressionResponse"); + } + } + + LOG_INFO("Successfully synced %u files", diff_.changed_files.size()); + + return absl::OkStatus(); +} + +absl::Status GgpRsyncServer::HandleShutdown() { + ShutdownRequest request; + absl::Status status = + message_pump_->ReceiveMessage(PacketType::kShutdown, &request); + if (!status.ok()) { + return WrapStatus(status, "Failed to receive ShutdownRequest"); + } + + ShutdownResponse response; + status = message_pump_->SendMessage(PacketType::kShutdown, response); + if (!status.ok()) { + return WrapStatus(status, "Failed to send ShutdownResponse"); + } + + return absl::OkStatus(); +} + +void GgpRsyncServer::Thread_OnPackageReceived(PacketType type) { + if (type != PacketType::kToggleCompression) { + return; + } + + // Turn on decompression. + message_pump_->RedirectInput(std::make_unique(socket_.get())); +} + +} // namespace cdc_ft diff --git a/cdc_rsync_server/cdc_rsync_server.h b/cdc_rsync_server/cdc_rsync_server.h new file mode 100644 index 0000000..7a75684 --- /dev/null +++ b/cdc_rsync_server/cdc_rsync_server.h @@ -0,0 +1,121 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_SERVER_CDC_RSYNC_SERVER_H_ +#define CDC_RSYNC_SERVER_CDC_RSYNC_SERVER_H_ + +#include +#include +#include + +#include "absl/status/status.h" +#include "cdc_rsync/base/message_pump.h" +#include "cdc_rsync_server/file_diff_generator.h" +#include "cdc_rsync_server/file_info.h" +#include "common/gamelet_component.h" +#include "common/path_filter.h" + +namespace cdc_ft { + +class MessagePump; +class ServerSocket; + +class GgpRsyncServer { + public: + GgpRsyncServer(); + ~GgpRsyncServer(); + + // Checks that the gamelet components (cdc_rsync_server binary etc.) are + // up-to-date by checking their sizes and timestamps. + bool CheckComponents(const std::vector& components); + + // Listens to |port|, accepts a connection from the client and runs the rsync + // procedure. + absl::Status Run(int port); + + // Returns the verbosity sent from the client. 0 by default. + int GetVerbosity() const { return verbosity_; } + + private: + // Runs the rsync procedure. + absl::Status Sync(); + + // Receives options from the client. + absl::Status HandleSetOptions(); + + // Finds all server-side files in the |destination_| folder. + absl::Status FindFiles(); + + // Receives all client-side files. + absl::Status HandleSendAllFiles(); + + // Diffs client- and server-side files. + absl::Status DiffFiles(); + + // Deletes files and directories present on the server, but not on the client. + absl::Status RemoveExtraneousFilesAndDirs(); + + // Creates missing directories. + absl::Status CreateMissingDirs(); + + // Sends file indices to the client. Used for missing and changed files. + template + absl::Status SendFileIndices(const char* file_type, + const std::vector& files); + + // Receives missing files from the client. + absl::Status HandleSendMissingFileData(); + + // Core rsync algorithm. Sends signatures of changed files to the client, + // receives diffs and applies them. + absl::Status SyncChangedFiles(); + + // Waits for the shutdown message and send an ack. + absl::Status HandleShutdown(); + + // Called on |message_pump_|'s receiver thread whenever a package is received. + // Used to toggle decompression. + void Thread_OnPackageReceived(PacketType type); + + std::unique_ptr socket_; + std::unique_ptr message_pump_; + + std::string destination_; + + // Options. + bool delete_ = false; + bool recursive_ = false; + int verbosity_ = 0; + bool whole_file_ = false; + bool compress_ = false; + bool checksum_ = false; + bool relative_ = false; + bool dry_run_ = false; + bool existing_ = false; + std::string copy_dest_; + + PathFilter path_filter_; + + std::vector client_files_; + std::vector client_dirs_; + std::vector server_files_; + std::vector server_dirs_; + file_diff::Result diff_; +}; + +} // namespace cdc_ft + +#endif // CDC_RSYNC_SERVER_CDC_RSYNC_SERVER_H_ diff --git a/cdc_rsync_server/cdc_rsync_server.vcxproj b/cdc_rsync_server/cdc_rsync_server.vcxproj new file mode 100644 index 0000000..4aa0d73 --- /dev/null +++ b/cdc_rsync_server/cdc_rsync_server.vcxproj @@ -0,0 +1,59 @@ + + + + + Debug + GGP + + + Release + GGP + + + + {4ece65e0-d950-4b96-8ad5-0313261b8c8d} + cdc_rsync_server + + + + Makefile + true + + + Makefile + false + + + $(SolutionDir)bazel-out\k8-dbg\bin\cdc_rsync_server\ + /std:c++17 + + + $(SolutionDir)bazel-out\k8-opt\bin\cdc_rsync_server\ + /std:c++17 + + + + + + + + + + + + + + + + + + //cdc_rsync_server:cdc_rsync_server + cdc_rsync_server + ..\;..\third_party\absl;..\third_party\blake3\c;..\bazel-stadia-file-transfer\external\com_github_zstd\lib;..\third_party\googletest\googletest\include;..\third_party\protobuf\src + ..\/ + + + + + + \ No newline at end of file diff --git a/cdc_rsync_server/cdc_rsync_server.vcxproj.filters b/cdc_rsync_server/cdc_rsync_server.vcxproj.filters new file mode 100644 index 0000000..9cd8510 --- /dev/null +++ b/cdc_rsync_server/cdc_rsync_server.vcxproj.filters @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/cdc_rsync_server/file_deleter_and_sender.cc b/cdc_rsync_server/file_deleter_and_sender.cc new file mode 100644 index 0000000..fed76a7 --- /dev/null +++ b/cdc_rsync_server/file_deleter_and_sender.cc @@ -0,0 +1,111 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync_server/file_deleter_and_sender.h" + +#include "cdc_rsync/base/message_pump.h" +#include "common/log.h" +#include "common/path.h" +#include "common/status.h" + +namespace cdc_ft { + +FileDeleterAndSender::FileDeleterAndSender(MessagePump* message_pump, + size_t request_size_threshold) + : message_pump_(message_pump), + request_size_threshold_(request_size_threshold) { + assert(message_pump_); +} + +FileDeleterAndSender::~FileDeleterAndSender() = default; + +absl::Status FileDeleterAndSender::DeleteAndSendFileOrDir( + const std::string& base_dir, const std::string& relative_path, bool dry_run, + bool is_directory) { + std::string filepath = path::Join(base_dir, relative_path); + if (!dry_run) { + LOG_INFO("Removing %s", filepath.c_str()); + absl::Status status = path::RemoveFile(filepath); + if (!status.ok()) { + return WrapStatus(status, "Failed to remove '%s'", filepath); + } + } + + std::string relative_dir = path::DirName(relative_path); + if (!relative_dir.empty()) path::EnsureEndsWithPathSeparator(&relative_dir); + if (response_.directory() != relative_dir) { + // Flush files in previous directory. + absl::Status status = SendFilesAndDirs(); + if (!status.ok()) { + return WrapStatus( + status, "Failed to send deleted files and directories to client"); + } + + // Set new directory. + response_.set_directory(relative_dir); + response_size_ = response_.directory().length(); + } + + std::string filename = path::BaseName(relative_path); + if (is_directory) { + *response_.add_dirs() = filename; + } else { + *response_.add_files() = filename; + } + response_size_ += filename.size(); + + if (response_size_ >= request_size_threshold_) { + absl::Status status = SendFilesAndDirs(); + if (!status.ok()) { + return WrapStatus( + status, "Failed to send deleted files and directories to client"); + } + } + + return absl::OkStatus(); +} + +absl::Status FileDeleterAndSender::Flush() { + absl::Status status = SendFilesAndDirs(); + if (!status.ok()) { + return WrapStatus(status, + "Failed to send deleted files and directories to client"); + } + + // Send an empty batch as EOF indicator. + assert(response_.files_size() == 0 && response_.dirs_size() == 0); + status = message_pump_->SendMessage(PacketType::kAddDeletedFiles, response_); + if (!status.ok()) { + return WrapStatus(status, "Failed to send EOF indicator"); + } + + return absl::OkStatus(); +} + +absl::Status FileDeleterAndSender::SendFilesAndDirs() { + if (response_.files_size() == 0 && response_.dirs_size() == 0) { + return absl::OkStatus(); + } + + absl::Status status = + message_pump_->SendMessage(PacketType::kAddDeletedFiles, response_); + if (!status.ok()) { + return WrapStatus(status, "Failed to send AddDeletedFilesResponse"); + } + response_.Clear(); + response_size_ = response_.directory().length(); + return absl::OkStatus(); +} + +} // namespace cdc_ft diff --git a/cdc_rsync_server/file_deleter_and_sender.h b/cdc_rsync_server/file_deleter_and_sender.h new file mode 100644 index 0000000..55f933d --- /dev/null +++ b/cdc_rsync_server/file_deleter_and_sender.h @@ -0,0 +1,65 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_SERVER_FILE_DELETER_AND_SENDER_H_ +#define CDC_RSYNC_SERVER_FILE_DELETER_AND_SENDER_H_ + +#include + +#include "absl/status/status.h" +#include "cdc_rsync/protos/messages.pb.h" + +namespace cdc_ft { + +class MessagePump; + +// Deletes files and sends info about deleted files to the client. +class FileDeleterAndSender { + public: + // Send AddDeletedFileResponse in packets of roughly 10k max by default. + static constexpr size_t kDefaultResponseSizeThreshold = 10000; + + FileDeleterAndSender( + MessagePump* message_pump, + size_t response_size_threshold = kDefaultResponseSizeThreshold); + ~FileDeleterAndSender(); + + // Deletes |base_dir| + |relative_path| and send |relative_path| the client. + // Deletion happens for either a directory or a file and only in a non dry-run + // mode. + absl::Status DeleteAndSendFileOrDir(const std::string& base_dir, + const std::string& relative_path, + bool dry_run, bool is_directory); + + // Sends the remaining file and directory batch to the client, followed by an + // EOF indicator. Should be called once all files and directories have been + // deleted. + absl::Status Flush(); + + private: + // Sends the current batch to the client. + absl::Status SendFilesAndDirs(); + + MessagePump* const message_pump_; + const size_t request_size_threshold_; + + AddDeletedFilesResponse response_; + size_t response_size_ = 0; +}; + +} // namespace cdc_ft + +#endif // CDC_RSYNC_SERVER_FILE_DELETER_AND_SENDER_H_ diff --git a/cdc_rsync_server/file_deleter_and_sender_test.cc b/cdc_rsync_server/file_deleter_and_sender_test.cc new file mode 100644 index 0000000..d425bd9 --- /dev/null +++ b/cdc_rsync_server/file_deleter_and_sender_test.cc @@ -0,0 +1,263 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync_server/file_deleter_and_sender.h" + +#include "cdc_rsync/base/fake_socket.h" +#include "cdc_rsync/base/message_pump.h" +#include "common/log.h" +#include "common/path.h" +#include "common/status_test_macros.h" +#include "gtest/gtest.h" + +constexpr bool kFile = false; +constexpr bool kDir = true; + +constexpr bool kDryRun = true; +constexpr bool kNoDryRun = false; + +namespace cdc_ft { +namespace { + +// Note: FileDiffGenerator is a server-only class and only runs on GGP, but the +// code is independent of the platform, so we can test it from Windows. +class FileDeleterAndSenderTest : public ::testing::Test { + void SetUp() override { + Log::Initialize(std::make_unique(LogLevel::kInfo)); + message_pump_.StartMessagePump(); + tmp_dir_ = path::GetTempDir(); + path::EnsureDoesNotEndWithPathSeparator(&tmp_dir_); + } + + void TearDown() override { + // Make sure there are no more AddDeletedFilesResponse. + ShutdownRequest shutdown; + EXPECT_OK(message_pump_.SendMessage(PacketType::kShutdown, shutdown)); + EXPECT_OK(message_pump_.ReceiveMessage(PacketType::kShutdown, &shutdown)); + + socket_.ShutdownSendingEnd(); + message_pump_.StopMessagePump(); + Log::Shutdown(); + } + + protected: + // Creates a temp file in %TMP% with given |relative_path|. + std::string CreateTempFile(std::string relative_path) { + std::string full_path = path::Join(tmp_dir_, relative_path); + + std::string dir = path::DirName(full_path); + EXPECT_OK(path::CreateDirRec(dir)); + EXPECT_OK(path::WriteFile(full_path, "")); + EXPECT_TRUE(path::Exists(full_path)); + return full_path; + } + + // Creates a bunch of temp files in %TMP% with given |relative_paths|. + std::vector CreateTempFiles( + std::vector relative_paths) { + std::vector full_paths; + for (const std::string& relative_path : relative_paths) { + full_paths.push_back(CreateTempFile(relative_path)); + } + return full_paths; + } + + // Creates a temp directory in %TMP% with given |relative_path|. + std::string CreateTempDir(std::string relative_path) { + std::string full_path = path::Join(tmp_dir_, relative_path); + + EXPECT_OK(path::CreateDirRec(full_path)); + EXPECT_TRUE(path::Exists(full_path)); + return full_path; + } + + // Creates a bunch of temp directories in %TMP% with given |relative_paths|. + std::vector CreateTempDirs( + std::vector relative_paths) { + std::vector full_paths; + for (const std::string& relative_path : relative_paths) { + full_paths.push_back(CreateTempDir(relative_path)); + } + return full_paths; + } + + // Expects an AddDeletedFilesResponse with no files as EOF indicator. + void ExpectEofMarker() { + // Verify that there is only the empty "EOF" indicator message. + AddDeletedFilesResponse response; + EXPECT_OK( + message_pump_.ReceiveMessage(PacketType::kAddDeletedFiles, &response)); + EXPECT_EQ(response.files_size(), 0); + } + + FakeSocket socket_; + MessagePump message_pump_{&socket_, MessagePump::PacketReceivedDelegate()}; + + std::string tmp_dir_; +}; + +TEST_F(FileDeleterAndSenderTest, NoFiles) { + // Delete no files, no dirs. + FileDeleterAndSender deleter(&message_pump_); + EXPECT_OK(deleter.Flush()); + + ExpectEofMarker(); +} + +TEST_F(FileDeleterAndSenderTest, FilesDeletedAndSent) { + // Create temp files. + std::vector full_paths = CreateTempFiles( + {"__fdas_unittest_1.txt", "__fdas_unittest_2.txt", + path::ToNative("__fdas_unittest_dir/__fdas_unittest_3.txt")}); + + // Delete files. + FileDeleterAndSender deleter(&message_pump_); + for (const std::string& file : full_paths) { + EXPECT_OK(deleter.DeleteAndSendFileOrDir( + tmp_dir_, file.substr(tmp_dir_.size()), kNoDryRun, kFile)); + } + EXPECT_OK(deleter.Flush()); + + // Did the files get deleted? + for (const std::string& file : full_paths) { + EXPECT_FALSE(path::Exists(file)); + } + + // Verify that the data sent to the socket matches. + AddDeletedFilesResponse response; + EXPECT_OK( + message_pump_.ReceiveMessage(PacketType::kAddDeletedFiles, &response)); + EXPECT_EQ(response.directory(), path::ToNative("/")); + ASSERT_EQ(response.files_size(), 2); + ASSERT_EQ(response.dirs_size(), 0); + EXPECT_EQ(response.files(0), "__fdas_unittest_1.txt"); + EXPECT_EQ(response.files(1), "__fdas_unittest_2.txt"); + + EXPECT_OK( + message_pump_.ReceiveMessage(PacketType::kAddDeletedFiles, &response)); + ASSERT_EQ(response.dirs_size(), 0); + EXPECT_EQ(response.directory(), path::ToNative("/__fdas_unittest_dir/")); + ASSERT_EQ(response.files_size(), 1); + EXPECT_EQ(response.files(0), "__fdas_unittest_3.txt"); + + ExpectEofMarker(); +} + +TEST_F(FileDeleterAndSenderTest, DirsDeletedAndSent) { + // Create temp dirs. + std::vector full_paths = CreateTempDirs( + {"__fdas_unittest_dir", + path::ToNative("__fdas_unittest_dir/__fdas_unittest_1"), + path::ToNative("__fdas_unittest_dir/__fdas_unittest_2"), + path::ToNative( + "__fdas_unittest_dir/__fdas_unittest_1/__fdas_unittest_1_1")}); + + // Delete files. + FileDeleterAndSender deleter(&message_pump_); + for (size_t idx = full_paths.size(); idx > 0; --idx) { + EXPECT_OK(deleter.DeleteAndSendFileOrDir( + tmp_dir_, full_paths[idx - 1].substr(tmp_dir_.size() + 1), kNoDryRun, + kDir)); + } + EXPECT_OK(deleter.Flush()); + + // Did the dirs get deleted? + for (const std::string& dir : full_paths) { + EXPECT_FALSE(path::Exists(dir)); + } + + // Verify that the data sent to the socket matches. + AddDeletedFilesResponse response; + EXPECT_OK( + message_pump_.ReceiveMessage(PacketType::kAddDeletedFiles, &response)); + EXPECT_EQ(response.directory(), + path::ToNative("__fdas_unittest_dir/__fdas_unittest_1/")); + ASSERT_EQ(response.files_size(), 0); + ASSERT_EQ(response.dirs_size(), 1); + EXPECT_EQ(response.dirs(0), "__fdas_unittest_1_1"); + + EXPECT_OK( + message_pump_.ReceiveMessage(PacketType::kAddDeletedFiles, &response)); + EXPECT_EQ(response.directory(), path::ToNative("__fdas_unittest_dir/")); + ASSERT_EQ(response.files_size(), 0); + ASSERT_EQ(response.dirs_size(), 2); + EXPECT_EQ(response.dirs(0), "__fdas_unittest_2"); + EXPECT_EQ(response.dirs(1), "__fdas_unittest_1"); + + EXPECT_OK( + message_pump_.ReceiveMessage(PacketType::kAddDeletedFiles, &response)); + EXPECT_EQ(response.directory(), ""); + ASSERT_EQ(response.files_size(), 0); + ASSERT_EQ(response.dirs_size(), 1); + EXPECT_EQ(response.dirs(0), "__fdas_unittest_dir"); + + ExpectEofMarker(); +} + +TEST_F(FileDeleterAndSenderTest, FilesDeletedAndSentDryRun) { + // Create a temp file. + std::string file_to_remove = CreateTempFile("__fdas_unittest_1.txt"); + + // "Delete" the file in dry-run mode: the file should not be deleted. + // It should be just sent to the socket. + FileDeleterAndSender deleter(&message_pump_); + EXPECT_OK(deleter.DeleteAndSendFileOrDir( + tmp_dir_, file_to_remove.substr(tmp_dir_.size()), kDryRun, kFile)); + EXPECT_OK(deleter.Flush()); + + EXPECT_TRUE(path::Exists(file_to_remove)); + + // Verify that the data sent to the socket matches. + AddDeletedFilesResponse response; + EXPECT_OK( + message_pump_.ReceiveMessage(PacketType::kAddDeletedFiles, &response)); + EXPECT_EQ(response.directory(), path::ToNative("/")); + ASSERT_EQ(response.files_size(), 1); + EXPECT_EQ(response.files(0), "__fdas_unittest_1.txt"); + + ExpectEofMarker(); +} + +TEST_F(FileDeleterAndSenderTest, MessageSplitByMaxSize) { + // Create temp files. + std::vector full_paths = + CreateTempFiles({"__fdas_unittest_1.txt", "__fdas_unittest_2.txt"}); + + // Delete files. The size is picked so that the message gets split. + FileDeleterAndSender deleter(&message_pump_, /*max_request_byte_size=*/20); + for (const std::string& file : full_paths) { + EXPECT_OK(deleter.DeleteAndSendFileOrDir( + tmp_dir_, file.substr(tmp_dir_.size()), kNoDryRun, kFile)); + } + EXPECT_OK(deleter.Flush()); + + // Verify that the data sent to the socket matches. + AddDeletedFilesResponse response; + EXPECT_OK( + message_pump_.ReceiveMessage(PacketType::kAddDeletedFiles, &response)); + EXPECT_EQ(response.directory(), path::ToNative("/")); + ASSERT_EQ(response.files_size(), 1); + EXPECT_EQ(response.files(0), "__fdas_unittest_1.txt"); + + EXPECT_OK( + message_pump_.ReceiveMessage(PacketType::kAddDeletedFiles, &response)); + EXPECT_EQ(response.directory(), path::ToNative("/")); + ASSERT_EQ(response.files_size(), 1); + EXPECT_EQ(response.files(0), "__fdas_unittest_2.txt"); + + ExpectEofMarker(); +} + +} // namespace +} // namespace cdc_ft diff --git a/cdc_rsync_server/file_diff_generator.cc b/cdc_rsync_server/file_diff_generator.cc new file mode 100644 index 0000000..be21062 --- /dev/null +++ b/cdc_rsync_server/file_diff_generator.cc @@ -0,0 +1,287 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync_server/file_diff_generator.h" + +#include + +#include "common/log.h" +#include "common/path.h" +#include "common/util.h" + +namespace cdc_ft { +namespace file_diff { + +namespace { + +struct FilePathComparer { + bool operator()(const FileInfo& a, const FileInfo& b) { + return a.filepath < b.filepath; + } +}; + +struct FilePathEquals { + bool operator()(const FileInfo& a, const FileInfo& b) { + return a.filepath == b.filepath; + } +}; + +struct DirPathComparer { + bool operator()(const DirInfo& a, const DirInfo& b) { + return a.filepath < b.filepath; + } +}; + +struct DirPathEquals { + bool operator()(const DirInfo& a, const DirInfo& b) { + return a.filepath == b.filepath; + } +}; + +bool FindFile(const std::string& base_dir, const std::string& relative_path, + FileInfo* file) { + path::Stats stats; + if (!path::GetStats(path::Join(base_dir, relative_path), &stats).ok()) { + return false; + } + + *file = FileInfo(relative_path, stats.modified_time, stats.size, + FileInfo::kInvalidIndex, base_dir.c_str()); + return true; +} + +// Returns true if |client_file| and |server_file| are considered a match based +// on filesize and timestamp. An exception is if the |server_file| is in the +// |copy_dest| directory (e.g. the package dir). In that case, the file should +// be considered as changed (the sync algo will copy the file over). +bool FilesMatch(const FileInfo& client_file, const FileInfo& server_file, + const std::string& copy_dest) { + return client_file.size == server_file.size && + client_file.modified_time == server_file.modified_time && + (copy_dest.empty() || server_file.base_dir != copy_dest.c_str()); +} + +} // namespace + +Result Generate(std::vector&& client_files, + std::vector&& server_files, + std::vector&& client_dirs, + std::vector&& server_dirs, const std::string& base_dir, + const std::string& copy_dest, bool double_check_missing) { + // Sort both arrays by filepath. + std::sort(client_files.begin(), client_files.end(), FilePathComparer()); + std::sort(server_files.begin(), server_files.end(), FilePathComparer()); + std::sort(client_dirs.begin(), client_dirs.end(), DirPathComparer()); + std::sort(server_dirs.begin(), server_dirs.end(), DirPathComparer()); + + // De-dupe client files, just in case. This might happen if someone calls + // cdc_rsync with overlapping sources, e.g. assets/* and assets/textures/*. + client_files.erase( + std::unique(client_files.begin(), client_files.end(), FilePathEquals()), + client_files.end()); + + client_dirs.erase( + std::unique(client_dirs.begin(), client_dirs.end(), DirPathEquals()), + client_dirs.end()); + + // Compare the arrays, sorting the files into the right buckets. + std::vector::iterator client_iter = client_files.begin(); + std::vector::iterator server_iter = server_files.begin(); + + Result diff; + + while (client_iter != client_files.end() || + server_iter != server_files.end()) { + const int order = + client_iter == client_files.end() ? 1 // Extraneous. + : server_iter == server_files.end() + ? -1 // Missing. + : client_iter->filepath.compare(server_iter->filepath); + + if (order < 0) { + // File is missing, but first double check if it's really missing if + // |double_check_missing| is true. + FileInfo server_file(std::string(), 0, 0, FileInfo::kInvalidIndex, + nullptr); + bool found = false; + if (double_check_missing) { + found = FindFile(base_dir, client_iter->filepath, &server_file); + if (!found && !copy_dest.empty()) { + found = FindFile(copy_dest, client_iter->filepath, &server_file); + } + } + + if (!found) { + diff.missing_files.push_back(std::move(*client_iter)); + } else if (FilesMatch(*client_iter, server_file, copy_dest)) { + diff.matching_files.push_back(std::move(*client_iter)); + } else { + diff.changed_files.emplace_back(server_file, std::move(*client_iter)); + } + ++client_iter; + } else if (order > 0) { + diff.extraneous_files.push_back(std::move(*server_iter)); + ++server_iter; + } else if (FilesMatch(*client_iter, *server_iter, copy_dest)) { + diff.matching_files.push_back(std::move(*client_iter)); + ++client_iter; + ++server_iter; + } else { + diff.changed_files.emplace_back(*server_iter, std::move(*client_iter)); + ++client_iter; + ++server_iter; + } + } + + // Compare the arrays, sorting the dirs into the right buckets. + std::vector::iterator client_dir_iter = client_dirs.begin(); + std::vector::iterator server_dir_iter = server_dirs.begin(); + + while (client_dir_iter != client_dirs.end() || + server_dir_iter != server_dirs.end()) { + const int order = + client_dir_iter == client_dirs.end() ? 1 // Extraneous. + : server_dir_iter == server_dirs.end() + ? -1 // Missing. + : client_dir_iter->filepath.compare(server_dir_iter->filepath); + + if (order < 0) { + diff.missing_dirs.push_back(std::move(*client_dir_iter)); + ++client_dir_iter; + } else if (order > 0) { + diff.extraneous_dirs.push_back(std::move(*server_dir_iter)); + ++server_dir_iter; + } else { + // Matching dirs in the copy_dest directory need to be created in the + // destination. + if (!copy_dest.empty() && server_dir_iter->base_dir == copy_dest.c_str()) + diff.missing_dirs.push_back(std::move(*client_dir_iter)); + else + diff.matching_dirs.push_back(std::move(*client_dir_iter)); + ++client_dir_iter; + ++server_dir_iter; + } + } + + // Remove all extraneous files and dirs from the |copy_dest| directory. + // Those should not be deleted with the --delete option. + if (!copy_dest.empty()) { + diff.extraneous_files.erase( + std::remove_if(diff.extraneous_files.begin(), + diff.extraneous_files.end(), + [©_dest](const FileInfo& dir) { + return copy_dest.c_str() == dir.base_dir; + }), + diff.extraneous_files.end()); + + diff.extraneous_dirs.erase( + std::remove_if(diff.extraneous_dirs.begin(), diff.extraneous_dirs.end(), + [©_dest](const DirInfo& dir) { + return copy_dest.c_str() == dir.base_dir; + }), + diff.extraneous_dirs.end()); + } + + // Release memory. + std::vector empty_client_files; + std::vector empty_server_files; + client_files.swap(empty_client_files); + server_files.swap(empty_server_files); + std::vector empty_client_dirs; + std::vector empty_server_dirs; + client_dirs.swap(empty_client_dirs); + server_dirs.swap(empty_server_dirs); + + return diff; +} + +SendFileStatsResponse AdjustToFlagsAndGetStats(bool existing, bool checksum, + bool whole_file, Result* diff) { + // Record stats. + SendFileStatsResponse file_stats_response; + file_stats_response.set_num_missing_files( + static_cast(diff->missing_files.size())); + file_stats_response.set_num_extraneous_files( + static_cast(diff->extraneous_files.size())); + file_stats_response.set_num_matching_files( + static_cast(diff->matching_files.size())); + file_stats_response.set_num_changed_files( + static_cast(diff->changed_files.size())); + file_stats_response.set_num_missing_dirs( + static_cast(diff->missing_dirs.size())); + file_stats_response.set_num_extraneous_dirs( + static_cast(diff->extraneous_dirs.size())); + file_stats_response.set_num_matching_dirs( + static_cast(diff->matching_dirs.size())); + + // Take special flags into account that move files between the lists. + + if (existing) { + // Do not copy missing files. + LOG_INFO("Removing missing files (--existing)"); + std::vector empty_files; + diff->missing_files.swap(empty_files); + std::vector empty_dirs; + diff->missing_dirs.swap(empty_dirs); + } + + if (checksum) { + // Move matching files over to changed files, so the delta-transfer + // algorithm is applied. + LOG_INFO("Moving matching files over to changed files (-c/--checksum)"); + for (FileInfo& file : diff->matching_files) + diff->changed_files.emplace_back(file, std::move(file)); + std::vector empty; + diff->matching_files.swap(empty); + } + + if (whole_file) { + // Move changed files over to the missing files, so they all get copied. + LOG_INFO("Moving changed files over to missing files (-W/--whole)"); + for (ChangedFileInfo& file : diff->changed_files) { + diff->missing_files.emplace_back( + std::move(file.filepath), file.client_modified_time, file.client_size, + file.client_index, nullptr); + } + std::vector empty; + diff->changed_files.swap(empty); + } + + // Compute totals. + uint64_t total_missing_bytes = 0; + for (const FileInfo& file : diff->missing_files) + total_missing_bytes += file.size; + + uint64_t total_changed_client_bytes = 0; + uint64_t total_changed_server_bytes = 0; + for (const ChangedFileInfo& file : diff->changed_files) { + total_changed_client_bytes += file.client_size; + total_changed_server_bytes += file.server_size; + } + + // Set totals in stats. Note that the totals are computed from the MODIFIED + // file lists. This is important to get progress reporting right. The other + // stats, OTOH, are computed from the ORIGINAL file lists. They're displayed + // to the user. + file_stats_response.set_total_missing_bytes(total_missing_bytes); + file_stats_response.set_total_changed_client_bytes( + total_changed_client_bytes); + file_stats_response.set_total_changed_server_bytes( + total_changed_server_bytes); + + return file_stats_response; +} + +} // namespace file_diff +} // namespace cdc_ft diff --git a/cdc_rsync_server/file_diff_generator.h b/cdc_rsync_server/file_diff_generator.h new file mode 100644 index 0000000..dc3fd60 --- /dev/null +++ b/cdc_rsync_server/file_diff_generator.h @@ -0,0 +1,75 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_SERVER_FILE_DIFF_GENERATOR_H_ +#define CDC_RSYNC_SERVER_FILE_DIFF_GENERATOR_H_ + +#include + +#include "cdc_rsync/protos/messages.pb.h" +#include "cdc_rsync_server/file_info.h" + +namespace cdc_ft { +namespace file_diff { + +struct Result { + // Files present on the client, but not on the server. + std::vector missing_files; + + // Files present on the server, but not on the client. + std::vector extraneous_files; + + // Files present on both, with different timestamp or file size. + std::vector changed_files; + + // Files present on both, with matching timestamp and file size. + std::vector matching_files; + + // Directories present on the client, but not on the server. + std::vector missing_dirs; + + // Directories present on the server, but not on the client. + std::vector extraneous_dirs; + + // Directories present on both client and server. + std::vector matching_dirs; +}; + +// Generates the diff between +// 1) |client_files| and |server_files| by comparing +// file paths, modified times and file sizes. +// If |double_check_missing| is true, missing files are checked for existence +// (relative to |base_dir| and |copy_dest|, if non-empty) +// before they are put into the missing bucket. +// 2) |client_dirs| and |server_dirs| by comparing directory paths. +// The passed-in vectors are consumed. +Result Generate(std::vector&& client_files, + std::vector&& server_files, + std::vector&& client_dirs, + std::vector&& server_dirs, const std::string& base_dir, + const std::string& copy_dest, bool double_check_missing); + +// Adjusts file containers according to sync flags. +// |existing|, |checksum|, |whole_file| are the sync flags, see command line +// help. They cause files to be moved between containers. +// |diff| is the result from Generate(). +SendFileStatsResponse AdjustToFlagsAndGetStats(bool existing, bool checksum, + bool whole_file, Result* diff); + +} // namespace file_diff +} // namespace cdc_ft + +#endif // CDC_RSYNC_SERVER_FILE_DIFF_GENERATOR_H_ diff --git a/cdc_rsync_server/file_diff_generator_test.cc b/cdc_rsync_server/file_diff_generator_test.cc new file mode 100644 index 0000000..53d0c95 --- /dev/null +++ b/cdc_rsync_server/file_diff_generator_test.cc @@ -0,0 +1,619 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync_server/file_diff_generator.h" + +#include "cdc_rsync_server/file_info.h" +#include "common/path.h" +#include "common/status_test_macros.h" +#include "common/test_main.h" +#include "gtest/gtest.h" + +namespace cdc_ft { + +bool operator==(const FileInfo& a, const FileInfo& b) { + return a.filepath == b.filepath && a.modified_time == b.modified_time && + a.size == b.size && a.client_index == b.client_index && + a.base_dir == b.base_dir; +} + +bool operator==(const ChangedFileInfo& a, const ChangedFileInfo& b) { + return a.filepath == b.filepath && + a.client_modified_time == b.client_modified_time && + a.client_size == b.client_size && a.server_size == b.server_size && + a.client_index == b.client_index && a.base_dir == b.base_dir; +} + +bool operator==(const DirInfo& a, const DirInfo& b) { + return a.filepath == b.filepath && a.client_index == b.client_index && + a.base_dir == b.base_dir; +} + +namespace { + +constexpr int64_t kModTime1 = 123; +constexpr int64_t kModTime2 = 234; +constexpr int64_t kModTime3 = 345; +constexpr int64_t kModTime4 = 456; + +constexpr uint64_t kFileSize1 = 1000; +constexpr uint64_t kFileSize2 = 2000; +constexpr uint64_t kFileSize3 = 3000; +constexpr uint64_t kFileSize4 = 4000; + +constexpr uint32_t kClientIndex1 = 1; +constexpr uint32_t kClientIndex2 = 2; +constexpr uint32_t kClientIndex3 = 3; +constexpr uint32_t kClientIndex4 = 4; +constexpr uint32_t kClientIndex5 = 5; + +constexpr bool kDoubleCheckMissing = true; +constexpr bool kNoDoubleCheckMissing = false; + +constexpr bool kExisting = true; +constexpr bool kNoExisting = false; + +constexpr bool kChecksum = true; +constexpr bool kNoChecksum = false; + +constexpr bool kWholeFile = true; +constexpr bool kNoWholeFile = false; + +constexpr char kNoCopyDest[] = ""; + +// Note: FileDiffGenerator is a server-only class and only runs on GGP, but the +// code is independent of the platform, so we can test it from Windows. +class FileDiffGeneratorTest : public ::testing::Test { + protected: + std::string base_dir_ = + path::Join(GetTestDataDir("file_diff_generator"), "base_dir"); + + std::string copy_dest_ = + path::Join(GetTestDataDir("file_diff_generator"), "copy_dest"); + + const FileInfo client_file_ = + FileInfo("file/path1", kModTime1, kFileSize1, kClientIndex1, nullptr); + const FileInfo server_file_ = + FileInfo("file/path2", kModTime2, kFileSize2, FileInfo::kInvalidIndex, + base_dir_.c_str()); + + FileInfo matching_client_file_ = + FileInfo("file/path3", kModTime3, kFileSize3, kClientIndex2, nullptr); + const FileInfo matching_server_file_ = + FileInfo("file/path3", kModTime3, kFileSize3, FileInfo::kInvalidIndex, + base_dir_.c_str()); + + FileInfo changed_size_client_file_ = + FileInfo("file/path4", kModTime3, kFileSize3, kClientIndex3, nullptr); + const FileInfo changed_size_server_file_ = + FileInfo("file/path4", kModTime3, kFileSize4, FileInfo::kInvalidIndex, + base_dir_.c_str()); + + FileInfo changed_time_client_file_ = + FileInfo("file/path5", kModTime3, kFileSize3, kClientIndex3, nullptr); + const FileInfo changed_time_server_file_ = + FileInfo("file/path5", kModTime4, kFileSize3, FileInfo::kInvalidIndex, + base_dir_.c_str()); + + const DirInfo client_dir_ = DirInfo("dir/dir1", kClientIndex4, nullptr); + const DirInfo server_dir_ = + DirInfo("dir/dir2", FileInfo::kInvalidIndex, base_dir_.c_str()); + + const DirInfo matching_client_dir_ = + DirInfo("dir/dir3", kClientIndex5, nullptr); + const DirInfo matching_server_dir_ = + DirInfo("dir/dir3", FileInfo::kInvalidIndex, base_dir_.c_str()); + + // Creates a FileInfo struct by filling data from the file at + // |fi_base_dir|\|filename|. If |fi_base_dir| is nullptr (for client files) + // reads from |base_dir_| instead. + FileInfo CreateFileInfo(const char* filename, const char* fi_base_dir) { + std::string path = + path::Join(fi_base_dir ? fi_base_dir : base_dir_, filename); + path::Stats stats; + EXPECT_OK(path::GetStats(path, &stats)); + return FileInfo(filename, stats.modified_time, stats.size, 0, fi_base_dir); + } + + // Creates a default file_diff::Result with one file/dir in each bucket. + file_diff::Result MakeResultForAdjustTests() { + file_diff::Result diff; + + diff.matching_files.push_back(matching_client_file_); + diff.changed_files.push_back(ChangedFileInfo( + changed_size_server_file_, FileInfo(changed_size_client_file_))); + diff.missing_files.push_back(client_file_); + diff.extraneous_files.push_back(server_file_); + + diff.matching_dirs.push_back(matching_client_dir_); + diff.missing_dirs.push_back(client_dir_); + diff.extraneous_dirs.push_back(server_dir_); + + return diff; + } +}; + +TEST_F(FileDiffGeneratorTest, MissingFile) { + file_diff::Result diff = + file_diff::Generate({client_file_}, {}, {}, {}, base_dir_, kNoCopyDest, + kNoDoubleCheckMissing); + + EXPECT_EQ(diff.missing_files, std::vector({client_file_})); + EXPECT_TRUE(diff.extraneous_files.empty()); + EXPECT_TRUE(diff.changed_files.empty()); + EXPECT_TRUE(diff.matching_files.empty()); + EXPECT_TRUE(diff.matching_dirs.empty()); + EXPECT_TRUE(diff.missing_dirs.empty()); + EXPECT_TRUE(diff.extraneous_dirs.empty()); +} + +TEST_F(FileDiffGeneratorTest, ExtraneousFile) { + file_diff::Result diff = + file_diff::Generate({}, {server_file_}, {}, {}, base_dir_, kNoCopyDest, + kNoDoubleCheckMissing); + + EXPECT_TRUE(diff.missing_files.empty()); + EXPECT_EQ(diff.extraneous_files, std::vector({server_file_})); + EXPECT_TRUE(diff.changed_files.empty()); + EXPECT_TRUE(diff.matching_files.empty()); + EXPECT_TRUE(diff.matching_dirs.empty()); + EXPECT_TRUE(diff.missing_dirs.empty()); + EXPECT_TRUE(diff.extraneous_dirs.empty()); +} + +TEST_F(FileDiffGeneratorTest, MatchingFiles) { + file_diff::Result diff = + file_diff::Generate({matching_client_file_}, {matching_server_file_}, {}, + {}, base_dir_, kNoCopyDest, kNoDoubleCheckMissing); + + EXPECT_TRUE(diff.missing_files.empty()); + EXPECT_TRUE(diff.extraneous_files.empty()); + EXPECT_TRUE(diff.changed_files.empty()); + EXPECT_EQ(diff.matching_files, + std::vector({matching_client_file_})); + EXPECT_TRUE(diff.matching_dirs.empty()); + EXPECT_TRUE(diff.missing_dirs.empty()); + EXPECT_TRUE(diff.extraneous_dirs.empty()); +} + +TEST_F(FileDiffGeneratorTest, ChangedFiles) { + // Purposely swap the order for the server files to test sorting. + file_diff::Result diff = file_diff::Generate( + {changed_time_client_file_, changed_size_client_file_}, + {changed_size_server_file_, changed_time_server_file_}, {}, {}, base_dir_, + kNoCopyDest, kNoDoubleCheckMissing); + + EXPECT_TRUE(diff.missing_files.empty()); + EXPECT_TRUE(diff.extraneous_files.empty()); + EXPECT_EQ(diff.changed_files, + std::vector( + {ChangedFileInfo(changed_size_server_file_, + std::move(changed_size_client_file_)), + ChangedFileInfo(changed_time_server_file_, + std::move(changed_time_client_file_))})); + EXPECT_TRUE(diff.matching_files.empty()); + EXPECT_TRUE(diff.matching_dirs.empty()); + EXPECT_TRUE(diff.missing_dirs.empty()); + EXPECT_TRUE(diff.extraneous_dirs.empty()); +} + +TEST_F(FileDiffGeneratorTest, OrderIndependence) { + std::vector client_files = {client_file_, matching_client_file_, + changed_size_client_file_, + changed_time_client_file_}; + std::vector server_files = {server_file_, matching_server_file_, + changed_size_server_file_, + changed_time_server_file_}; + + std::vector expected_missing_files = {client_file_}; + std::vector expected_extraneous_files = {server_file_}; + std::vector expected_changed_files = { + ChangedFileInfo(changed_size_server_file_, + std::move(changed_size_client_file_)), + ChangedFileInfo(changed_time_server_file_, + std::move(changed_time_client_file_))}; + std::vector expected_matching_files = {matching_client_file_}; + + // Make several tests, each time with |server_files| permuted a bit.. + for (size_t backwards = 0; backwards < 2; ++backwards) { + for (size_t circular = 0; circular < server_files.size(); ++circular) { + file_diff::Result diff = + file_diff::Generate(std::vector(client_files), + std::vector(server_files), {}, {}, + base_dir_, kNoCopyDest, kNoDoubleCheckMissing); + + EXPECT_EQ(diff.missing_files, expected_missing_files); + EXPECT_EQ(diff.extraneous_files, expected_extraneous_files); + EXPECT_EQ(diff.changed_files, expected_changed_files); + EXPECT_EQ(diff.matching_files, expected_matching_files); + + // Circular permutation. + server_files.insert(server_files.begin(), server_files.back()); + server_files.pop_back(); + } + + // Reverse order. + std::reverse(server_files.begin(), server_files.end()); + } +} + +TEST_F(FileDiffGeneratorTest, DoubleCheckMissing_NoCopyDest) { + // file_a is matching the real file, file_b is changed, file_h is missing. + FileInfo file_a = CreateFileInfo("a.txt", nullptr); + FileInfo file_b = CreateFileInfo("b.txt", nullptr); + file_b.modified_time = 0; + FileInfo file_h("h.txt", 0, 0, 0, nullptr); + + file_diff::Result diff = + file_diff::Generate({file_a, file_b, file_h}, {}, {}, {}, base_dir_, + kNoCopyDest, kDoubleCheckMissing); + + FileInfo server_file_a = CreateFileInfo("a.txt", base_dir_.c_str()); + FileInfo server_file_b = CreateFileInfo("b.txt", base_dir_.c_str()); + + ChangedFileInfo changed_file_b(server_file_b, std::move(file_b)); + + EXPECT_EQ(diff.matching_files, std::vector({file_a})); + EXPECT_EQ(diff.changed_files, std::vector({changed_file_b})); + EXPECT_EQ(diff.missing_files, std::vector({file_h})); + EXPECT_TRUE(diff.extraneous_files.empty()); +} + +TEST_F(FileDiffGeneratorTest, DoubleCheckMissing_CopyDest) { + // Tests all permutations of client files and server files in base_dir as + // well as copy_dest. Special treatment is marked as !!!. + // client files server files resulting diff list + // base_dir copy_dest + // a exists exists, matching missing matching/base_dir + // b exists exists, changed missing changed/base_dir + // c missing exists missing extraneous/base_dir + // d exists missing missing missing + // e exists exists, matching exists, ignored matching/base_dir + // f exists exists, changed exists, ignored changed/base_dir + // g missing exists exists, ignored extraneous/base_dir + // h exists missing exists, matching changed/copy_dest (!!!) + // i exists missing exists, changed changed/copy_dest + // j missing missing exists ignored (!!!) + + // Client files. + FileInfo file_a = CreateFileInfo("a.txt", nullptr); + FileInfo file_b = CreateFileInfo("b.txt", nullptr); + // c missing + FileInfo file_d = FileInfo("d.txt", 0, 0, 0, nullptr); + FileInfo file_e = CreateFileInfo("e.txt", nullptr); + FileInfo file_f = CreateFileInfo("f.txt", nullptr); + // g missing + FileInfo file_h = CreateFileInfo("h.txt", copy_dest_.c_str()); + file_h.base_dir = nullptr; + FileInfo file_i = FileInfo("i.txt", 0, 0, 0, nullptr); + // j missing + + // Mark files b and f as changed. + file_b.modified_time = 0; + file_f.modified_time = 0; + + std::vector client_files = {file_a, file_b, file_d, file_e, + file_f, file_h, file_i}; + + // Server files in base_dir. d, h, i and j are missing. + FileInfo server_file_a = CreateFileInfo("a.txt", base_dir_.c_str()); + FileInfo server_file_b = CreateFileInfo("b.txt", base_dir_.c_str()); + FileInfo server_file_c = CreateFileInfo("c.txt", base_dir_.c_str()); + FileInfo server_file_e = CreateFileInfo("e.txt", base_dir_.c_str()); + FileInfo server_file_f = CreateFileInfo("f.txt", base_dir_.c_str()); + FileInfo server_file_g = CreateFileInfo("g.txt", base_dir_.c_str()); + + std::vector server_files = {server_file_a, server_file_b, + server_file_c, server_file_e, + server_file_f, server_file_g}; + + std::vector expected_matching_files = {file_a, file_e}; + + ChangedFileInfo changed_file_b(server_file_b, std::move(file_b)); + ChangedFileInfo changed_file_f(server_file_f, std::move(file_f)); + ChangedFileInfo changed_file_h(CreateFileInfo("h.txt", copy_dest_.c_str()), + std::move(file_h)); + ChangedFileInfo changed_file_i(CreateFileInfo("i.txt", copy_dest_.c_str()), + std::move(file_i)); + std::vector expected_changed_files = { + changed_file_b, changed_file_f, changed_file_h, changed_file_i}; + + std::vector expected_missing_files = {file_d}; + std::vector expected_extraneous_files = {server_file_c, + server_file_g}; + + // The server files in copy_dest are stat'ed by file_diff::Generate(). + + file_diff::Result diff = + file_diff::Generate(std::move(client_files), std::move(server_files), {}, + {}, base_dir_, copy_dest_, kDoubleCheckMissing); + + EXPECT_EQ(diff.matching_files, expected_matching_files); + EXPECT_EQ(diff.changed_files, expected_changed_files); + EXPECT_EQ(diff.missing_files, expected_missing_files); + EXPECT_EQ(diff.extraneous_files, expected_extraneous_files); +} + +TEST_F(FileDiffGeneratorTest, MissingDir) { + file_diff::Result diff = file_diff::Generate( + {}, {}, {client_dir_}, {}, base_dir_, kNoCopyDest, kNoDoubleCheckMissing); + + EXPECT_EQ(diff.missing_dirs, std::vector({client_dir_})); + EXPECT_TRUE(diff.extraneous_dirs.empty()); + EXPECT_TRUE(diff.matching_dirs.empty()); + EXPECT_TRUE(diff.matching_files.empty()); + EXPECT_TRUE(diff.missing_files.empty()); + EXPECT_TRUE(diff.changed_files.empty()); + EXPECT_TRUE(diff.extraneous_files.empty()); +} + +TEST_F(FileDiffGeneratorTest, ExtraneousDir) { + file_diff::Result diff = file_diff::Generate( + {}, {}, {}, {server_dir_}, base_dir_, kNoCopyDest, kNoDoubleCheckMissing); + + EXPECT_TRUE(diff.missing_dirs.empty()); + EXPECT_EQ(diff.extraneous_dirs, std::vector({server_dir_})); + EXPECT_TRUE(diff.matching_dirs.empty()); + EXPECT_TRUE(diff.matching_files.empty()); + EXPECT_TRUE(diff.missing_files.empty()); + EXPECT_TRUE(diff.changed_files.empty()); + EXPECT_TRUE(diff.extraneous_files.empty()); +} + +TEST_F(FileDiffGeneratorTest, MatchingDirs) { + file_diff::Result diff = file_diff::Generate( + {}, {}, {matching_client_dir_}, {matching_server_dir_}, base_dir_, + kNoCopyDest, kNoDoubleCheckMissing); + + EXPECT_TRUE(diff.missing_dirs.empty()); + EXPECT_TRUE(diff.extraneous_dirs.empty()); + EXPECT_EQ(diff.matching_dirs, std::vector({matching_client_dir_})); + EXPECT_TRUE(diff.matching_files.empty()); + EXPECT_TRUE(diff.missing_files.empty()); + EXPECT_TRUE(diff.changed_files.empty()); + EXPECT_TRUE(diff.extraneous_files.empty()); +} + +TEST_F(FileDiffGeneratorTest, DirOrderIndependence) { + std::vector client_dirs = {client_dir_, matching_client_dir_}; + std::vector server_dirs = {server_dir_, matching_server_dir_}; + + std::vector expected_missing_dirs = {client_dir_}; + std::vector expected_extraneous_dirs = {server_dir_}; + std::vector expected_matching_dirs = {matching_client_dir_}; + + // Make several tests, each time with |server_dirs| permuted a bit. + for (size_t backwards = 0; backwards < 2; ++backwards) { + for (size_t circular = 0; circular < server_dirs.size(); ++circular) { + file_diff::Result diff = + file_diff::Generate({}, {}, std::vector(client_dirs), + std::vector(server_dirs), base_dir_, + kNoCopyDest, kNoDoubleCheckMissing); + + EXPECT_EQ(diff.missing_dirs, expected_missing_dirs); + EXPECT_EQ(diff.extraneous_dirs, expected_extraneous_dirs); + EXPECT_EQ(diff.matching_dirs, expected_matching_dirs); + + // Circular permutation. + server_dirs.insert(server_dirs.begin(), server_dirs.back()); + server_dirs.pop_back(); + } + + // Reverse order. + std::reverse(server_dirs.begin(), server_dirs.end()); + } +} + +TEST_F(FileDiffGeneratorTest, CopyDest_Dirs) { + DirInfo client_dir1("dir/dir1", kClientIndex1, nullptr); + DirInfo client_dir2("dir/dir2", kClientIndex2, nullptr); + + // Matching in |copy_dest_| + // -> counted as missing (so it gets created in destination) + DirInfo server_dir1("dir/dir1", FileInfo::kInvalidIndex, copy_dest_.c_str()); + // Extraneous in |copy_dest_| + // -> ignored (shouldn't delete dirs in package dir!) + DirInfo server_dir2("dir/dir3", FileInfo::kInvalidIndex, copy_dest_.c_str()); + + file_diff::Result diff = file_diff::Generate( + {}, {}, std::vector{client_dir1, client_dir2}, + std::vector{server_dir1, server_dir2}, base_dir_, copy_dest_, + kNoDoubleCheckMissing); + + EXPECT_EQ(diff.missing_dirs, + std::vector({client_dir1, client_dir2})); + EXPECT_TRUE(diff.extraneous_dirs.empty()); + EXPECT_TRUE(diff.matching_dirs.empty()); +} + +TEST_F(FileDiffGeneratorTest, Adjust_DefaultParams) { + file_diff::Result diff = MakeResultForAdjustTests(); + SendFileStatsResponse response = file_diff::AdjustToFlagsAndGetStats( + kNoExisting, kNoChecksum, kNoWholeFile, &diff); + + EXPECT_EQ(diff.matching_files, + std::vector({matching_client_file_})); + EXPECT_EQ(diff.missing_files, std::vector({client_file_})); + EXPECT_EQ( + diff.changed_files, + std::vector({ChangedFileInfo( + changed_size_server_file_, std::move(changed_size_client_file_))})); + EXPECT_EQ(diff.extraneous_files, std::vector({server_file_})); + + EXPECT_EQ(diff.matching_dirs, std::vector({matching_client_dir_})); + EXPECT_EQ(diff.missing_dirs, std::vector({client_dir_})); + EXPECT_EQ(diff.extraneous_dirs, std::vector({server_dir_})); + + EXPECT_EQ(response.num_matching_files(), 1); + EXPECT_EQ(response.num_missing_files(), 1); + EXPECT_EQ(response.num_changed_files(), 1); + EXPECT_EQ(response.num_extraneous_files(), 1); + + EXPECT_EQ(response.num_matching_dirs(), 1); + EXPECT_EQ(response.num_missing_dirs(), 1); + EXPECT_EQ(response.num_extraneous_dirs(), 1); + + EXPECT_EQ(response.total_changed_client_bytes(), + changed_size_client_file_.size); + EXPECT_EQ(response.total_changed_server_bytes(), + changed_size_server_file_.size); + EXPECT_EQ(response.total_missing_bytes(), client_file_.size); +} + +TEST_F(FileDiffGeneratorTest, Adjust_Existing) { + file_diff::Result diff = MakeResultForAdjustTests(); + SendFileStatsResponse response = file_diff::AdjustToFlagsAndGetStats( + kExisting, kNoChecksum, kNoWholeFile, &diff); + + // Existing removes missing files. + EXPECT_EQ(diff.matching_files, + std::vector({matching_client_file_})); + EXPECT_TRUE(diff.missing_files.empty()); + EXPECT_EQ( + diff.changed_files, + std::vector({ChangedFileInfo( + changed_size_server_file_, std::move(changed_size_client_file_))})); + EXPECT_EQ(diff.extraneous_files, std::vector({server_file_})); + + EXPECT_EQ(diff.matching_dirs, std::vector({matching_client_dir_})); + EXPECT_TRUE(diff.missing_dirs.empty()); + EXPECT_EQ(diff.extraneous_dirs, std::vector({server_dir_})); + + // These stats should be unchanged. + EXPECT_EQ(response.num_matching_files(), 1); + EXPECT_EQ(response.num_missing_files(), 1); + EXPECT_EQ(response.num_changed_files(), 1); + EXPECT_EQ(response.num_extraneous_files(), 1); + + EXPECT_EQ(response.num_matching_dirs(), 1); + EXPECT_EQ(response.num_missing_dirs(), 1); + EXPECT_EQ(response.num_extraneous_dirs(), 1); + + // These stats should be computed from the actual containers. + EXPECT_EQ(response.total_changed_client_bytes(), + changed_size_client_file_.size); + EXPECT_EQ(response.total_changed_server_bytes(), + changed_size_server_file_.size); + EXPECT_EQ(response.total_missing_bytes(), 0); +} + +TEST_F(FileDiffGeneratorTest, Adjust_Checksum) { + file_diff::Result diff = MakeResultForAdjustTests(); + SendFileStatsResponse response = file_diff::AdjustToFlagsAndGetStats( + kNoExisting, kChecksum, kNoWholeFile, &diff); + + // Checksum moves matching files to changed files. + EXPECT_TRUE(diff.matching_files.empty()); + EXPECT_EQ(diff.missing_files, std::vector({client_file_})); + EXPECT_EQ(diff.changed_files, + std::vector( + {ChangedFileInfo(changed_size_server_file_, + std::move(changed_size_client_file_)), + ChangedFileInfo(matching_client_file_, + std::move(matching_client_file_))})); + EXPECT_EQ(diff.extraneous_files, std::vector({server_file_})); + + EXPECT_EQ(diff.matching_dirs, std::vector({matching_client_dir_})); + EXPECT_EQ(diff.missing_dirs, std::vector({client_dir_})); + EXPECT_EQ(diff.extraneous_dirs, std::vector({server_dir_})); + + // These stats should be unchanged. + EXPECT_EQ(response.num_matching_files(), 1); + EXPECT_EQ(response.num_missing_files(), 1); + EXPECT_EQ(response.num_changed_files(), 1); + EXPECT_EQ(response.num_extraneous_files(), 1); + + EXPECT_EQ(response.num_matching_dirs(), 1); + EXPECT_EQ(response.num_missing_dirs(), 1); + EXPECT_EQ(response.num_extraneous_dirs(), 1); + + // These stats should be computed from the actual containers. + EXPECT_EQ(response.total_changed_client_bytes(), + changed_size_client_file_.size + matching_client_file_.size); + EXPECT_EQ(response.total_changed_server_bytes(), + changed_size_server_file_.size + matching_client_file_.size); + EXPECT_EQ(response.total_missing_bytes(), client_file_.size); +} + +TEST_F(FileDiffGeneratorTest, Adjust_WholeFile) { + file_diff::Result diff = MakeResultForAdjustTests(); + SendFileStatsResponse response = file_diff::AdjustToFlagsAndGetStats( + kNoExisting, kNoChecksum, kWholeFile, &diff); + + // WholeFile moves changed files to missing files. + EXPECT_EQ(diff.matching_files, + std::vector({matching_client_file_})); + EXPECT_EQ(diff.missing_files, + std::vector({client_file_, changed_size_client_file_})); + EXPECT_TRUE(diff.changed_files.empty()); + EXPECT_EQ(diff.extraneous_files, std::vector({server_file_})); + + EXPECT_EQ(diff.matching_dirs, std::vector({matching_client_dir_})); + EXPECT_EQ(diff.missing_dirs, std::vector({client_dir_})); + EXPECT_EQ(diff.extraneous_dirs, std::vector({server_dir_})); + + // These stats should be unchanged. + EXPECT_EQ(response.num_matching_files(), 1); + EXPECT_EQ(response.num_missing_files(), 1); + EXPECT_EQ(response.num_changed_files(), 1); + EXPECT_EQ(response.num_extraneous_files(), 1); + + EXPECT_EQ(response.num_matching_dirs(), 1); + EXPECT_EQ(response.num_missing_dirs(), 1); + EXPECT_EQ(response.num_extraneous_dirs(), 1); + + // These stats should be computed from the actual containers. + EXPECT_EQ(response.total_changed_client_bytes(), 0); + EXPECT_EQ(response.total_changed_server_bytes(), 0); + EXPECT_EQ(response.total_missing_bytes(), + client_file_.size + changed_size_client_file_.size); +} + +TEST_F(FileDiffGeneratorTest, Adjust_ChecksumAndWholeFile) { + file_diff::Result diff = MakeResultForAdjustTests(); + SendFileStatsResponse response = file_diff::AdjustToFlagsAndGetStats( + kNoExisting, kChecksum, kWholeFile, &diff); + + // Checksum+WholeFile moves both matching and changed files to missing files. + EXPECT_TRUE(diff.matching_files.empty()); + EXPECT_EQ(diff.missing_files, + std::vector({client_file_, changed_size_client_file_, + matching_client_file_})); + EXPECT_TRUE(diff.changed_files.empty()); + EXPECT_EQ(diff.extraneous_files, std::vector({server_file_})); + + EXPECT_EQ(diff.matching_dirs, std::vector({matching_client_dir_})); + EXPECT_EQ(diff.missing_dirs, std::vector({client_dir_})); + EXPECT_EQ(diff.extraneous_dirs, std::vector({server_dir_})); + + // These stats should be unchanged. + EXPECT_EQ(response.num_matching_files(), 1); + EXPECT_EQ(response.num_missing_files(), 1); + EXPECT_EQ(response.num_changed_files(), 1); + EXPECT_EQ(response.num_extraneous_files(), 1); + + EXPECT_EQ(response.num_matching_dirs(), 1); + EXPECT_EQ(response.num_missing_dirs(), 1); + EXPECT_EQ(response.num_extraneous_dirs(), 1); + + // These stats should be computed from the actual containers. + EXPECT_EQ(response.total_changed_client_bytes(), 0); + EXPECT_EQ(response.total_changed_server_bytes(), 0); + EXPECT_EQ(response.total_missing_bytes(), client_file_.size + + changed_size_client_file_.size + + matching_client_file_.size); +} + +} // namespace +} // namespace cdc_ft diff --git a/cdc_rsync_server/file_finder.cc b/cdc_rsync_server/file_finder.cc new file mode 100644 index 0000000..6129e0d --- /dev/null +++ b/cdc_rsync_server/file_finder.cc @@ -0,0 +1,96 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync_server/file_finder.h" + +#include "common/path.h" +#include "common/path_filter.h" +#include "common/platform.h" +#include "common/status.h" + +namespace cdc_ft { + +FileFinder::FileFinder() {} + +absl::Status FileFinder::AddFiles(const std::string& base_dir, bool recursive, + PathFilter* path_filter) { + std::vector* files = &files_; + std::vector* dirs = &dirs_; + auto handler = [files, dirs, &base_dir, path_filter]( + std::string dir, std::string filename, + int64_t modified_time, uint64_t size, bool is_directory) { + std::string path = path::Join(dir.substr(base_dir.size()), filename); + if (path_filter->IsMatch(path)) { + if (is_directory) { + dirs->emplace_back(path, FileInfo::kInvalidIndex, base_dir.c_str()); + } else { + files->emplace_back(path, modified_time, size, FileInfo::kInvalidIndex, + base_dir.c_str()); + } + } + return absl::OkStatus(); + }; + +#if PLATFORM_WINDOWS + // SearchFiles needs a wildcard on Windows. Currently only used for tests. + absl::Status status = + path::SearchFiles(path::Join(base_dir, "*"), recursive, handler); +#elif PLATFORM_LINUX + absl::Status status = path::SearchFiles(base_dir, recursive, handler); +#endif + if (!status.ok()) { + return WrapStatus(status, "Failed to find files for '%s'", base_dir); + } + + return absl::OkStatus(); +} + +void FileFinder::ReleaseFiles(std::vector* files, + std::vector* dirs) { + assert(files); + assert(dirs); + + // Dedupe files and directories. Note that the combination of std::stable_sort + // and std::unique is guaranteed to keep the entries added first to the lists. + // In practice, this kicks out the element in the copy_dest directory (e.g. + // the package) and keeps the one in the destination (e.g. /mnt/developer), if + // both are present. + std::stable_sort(files_.begin(), files_.end(), + [](const FileInfo& a, const FileInfo& b) { + return a.filepath < b.filepath; + }); + std::stable_sort(dirs_.begin(), dirs_.end(), + [](const DirInfo& a, const DirInfo& b) { + return a.filepath < b.filepath; + }); + + files_.erase(std::unique(files_.begin(), files_.end(), + [](const FileInfo& a, const FileInfo& b) { + return a.filepath == b.filepath; + }), + files_.end()); + + dirs_.erase(std::unique(dirs_.begin(), dirs_.end(), + [](const DirInfo& a, const DirInfo& b) { + return a.filepath == b.filepath; + }), + dirs_.end()); + + *files = std::move(files_); + *dirs = std::move(dirs_); +} + +FileFinder::~FileFinder() = default; + +} // namespace cdc_ft diff --git a/cdc_rsync_server/file_finder.h b/cdc_rsync_server/file_finder.h new file mode 100644 index 0000000..86fa01e --- /dev/null +++ b/cdc_rsync_server/file_finder.h @@ -0,0 +1,53 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_SERVER_FILE_FINDER_H_ +#define CDC_RSYNC_SERVER_FILE_FINDER_H_ + +#include + +#include "absl/status/status.h" +#include "cdc_rsync_server/file_info.h" + +namespace cdc_ft { + +class PathFilter; + +// Scans directories and gathers contained files and directories. +class FileFinder { + public: + FileFinder(); + ~FileFinder(); + + // Gathers files and directories in |base_dir|. If |recursive| is true, + // searches recursively. |path_filter| is used to filter files and + // directories. + // If subsequent calls to AddFiles find a file or directory with the same + // relative path, this file or directory is ignored. + absl::Status AddFiles(const std::string& base_dir, bool recursive, + PathFilter* path_filter); + + // Returns all found files and directories. + void ReleaseFiles(std::vector* files, std::vector* dirs); + + private: + std::vector files_; + std::vector dirs_; +}; + +} // namespace cdc_ft + +#endif // CDC_RSYNC_SERVER_FILE_FINDER_H_ diff --git a/cdc_rsync_server/file_finder_test.cc b/cdc_rsync_server/file_finder_test.cc new file mode 100644 index 0000000..5c7631d --- /dev/null +++ b/cdc_rsync_server/file_finder_test.cc @@ -0,0 +1,121 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync_server/file_finder.h" + +#include "common/log.h" +#include "common/path.h" +#include "common/path_filter.h" +#include "common/status_test_macros.h" +#include "common/test_main.h" +#include "gtest/gtest.h" + +namespace cdc_ft { +namespace { + +constexpr bool kNonRecursive = false; +constexpr bool kRecursive = true; + +class FileFinderTest : public ::testing::Test { + public: + void SetUp() override { + Log::Initialize(std::make_unique(LogLevel::kInfo)); + } + + void TearDown() override { Log::Shutdown(); } + + protected: + std::string base_dir_ = + path::Join(GetTestDataDir("file_finder"), path::ToNative("base_dir/")); + + std::string copy_dest_ = + path::Join(GetTestDataDir("file_finder"), path::ToNative("copy_dest/")); + + template + static void ExpectMatch( + const std::vector& paths, + std::vector> base_dir_and_rel_path) { + EXPECT_EQ(base_dir_and_rel_path.size(), paths.size()); + if (base_dir_and_rel_path.size() != paths.size()) return; + + for (size_t n = 0; n < paths.size(); ++n) { + EXPECT_EQ(paths[n].base_dir, base_dir_and_rel_path[n].first); + EXPECT_EQ(paths[n].filepath, base_dir_and_rel_path[n].second); + } + } + + PathFilter path_filter_; + std::vector files_; + std::vector dirs_; +}; + +TEST_F(FileFinderTest, FindSucceedsInvalidPath) { + // Invalid paths are just ignored. + std::string invalid_path = path::Join(base_dir_, "invalid"); + FileFinder finder; + EXPECT_OK(finder.AddFiles(invalid_path, kNonRecursive, &path_filter_)); + finder.ReleaseFiles(&files_, &dirs_); + EXPECT_TRUE(files_.empty()); + EXPECT_TRUE(dirs_.empty()); +} + +TEST_F(FileFinderTest, FindSucceedsNonRecursive) { + FileFinder finder; + EXPECT_OK(finder.AddFiles(base_dir_, kNonRecursive, &path_filter_)); + finder.ReleaseFiles(&files_, &dirs_); + ExpectMatch(files_, {{base_dir_, "a.txt"}, {base_dir_, "b.txt"}}); + ExpectMatch(dirs_, {{base_dir_, "dir1"}, {base_dir_, "dir2"}}); +} + +TEST_F(FileFinderTest, FindSucceedsRecursive) { + FileFinder finder; + EXPECT_OK(finder.AddFiles(base_dir_, kRecursive, &path_filter_)); + finder.ReleaseFiles(&files_, &dirs_); + ExpectMatch(files_, {{base_dir_, "a.txt"}, + {base_dir_, "b.txt"}, + {base_dir_, path::ToNative("dir1/c.txt")}, + {base_dir_, path::ToNative("dir2/d.txt")}}); + ExpectMatch(dirs_, {{base_dir_, "dir1"}, {base_dir_, "dir2"}}); +} + +TEST_F(FileFinderTest, FindSucceedsRecursiveWithCopyDest) { + FileFinder finder; + EXPECT_OK(finder.AddFiles(base_dir_, kRecursive, &path_filter_)); + EXPECT_OK(finder.AddFiles(copy_dest_, kRecursive, &path_filter_)); + finder.ReleaseFiles(&files_, &dirs_); + ExpectMatch(files_, {{base_dir_, "a.txt"}, + {base_dir_, "b.txt"}, + {base_dir_, path::ToNative("dir1/c.txt")}, + {copy_dest_, path::ToNative("dir1/f.txt")}, + {base_dir_, path::ToNative("dir2/d.txt")}, + {copy_dest_, path::ToNative("dir3/d.txt")}, + {copy_dest_, "e.txt"}}); + ExpectMatch(dirs_, + {{base_dir_, "dir1"}, {base_dir_, "dir2"}, {copy_dest_, "dir3"}}); +} + +TEST_F(FileFinderTest, FindSucceedsWithFilter) { + path_filter_.AddRule(PathFilter::Rule::Type::kExclude, "a.txt"); + + FileFinder finder; + EXPECT_OK(finder.AddFiles(base_dir_, kRecursive, &path_filter_)); + finder.ReleaseFiles(&files_, &dirs_); + ExpectMatch(files_, {{base_dir_, "b.txt"}, + {base_dir_, path::ToNative("dir1/c.txt")}, + {base_dir_, path::ToNative("dir2/d.txt")}}); + ExpectMatch(dirs_, {{base_dir_, "dir1"}, {base_dir_, "dir2"}}); +} + +} // namespace +} // namespace cdc_ft diff --git a/cdc_rsync_server/file_info.h b/cdc_rsync_server/file_info.h new file mode 100644 index 0000000..81432bb --- /dev/null +++ b/cdc_rsync_server/file_info.h @@ -0,0 +1,83 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CDC_RSYNC_SERVER_FILE_INFO_H_ +#define CDC_RSYNC_SERVER_FILE_INFO_H_ + +#include + +namespace cdc_ft { + +struct FileInfo { + static constexpr uint32_t kInvalidIndex = UINT32_MAX; + + // Path relative to |base_dir|. + std::string filepath; + int64_t modified_time; + uint64_t size; + // For client files: Index into the client file list. + uint32_t client_index; + // For server files: Base directory. If nullptr, |filepath| is assumed to be + // relative to the destination directory. + const char* base_dir; + + FileInfo(std::string filepath, int64_t modified_time, uint64_t size, + uint32_t client_index, const char* base_dir) + : filepath(std::move(filepath)), + modified_time(modified_time), + size(size), + client_index(client_index), + base_dir(base_dir) {} +}; + +struct DirInfo { + static constexpr uint32_t kInvalidIndex = UINT32_MAX; + + // Path relative to |base_dir|. + std::string filepath; + uint32_t client_index; + // For server files: Base directory. If nullptr, |filepath| is assumed to be + // relative to the destination directory. + const char* base_dir; + + DirInfo(std::string filepath, uint32_t client_index, const char* base_dir) + : filepath(std::move(filepath)), + client_index(client_index), + base_dir(base_dir) {} +}; + +// Similar to FileInfo, but size is needed from both client and server. +struct ChangedFileInfo { + std::string filepath; + int64_t client_modified_time; + uint64_t client_size; + uint64_t server_size; + uint32_t client_index; + const char* base_dir; + + // Moves |client_file| data into this class. + ChangedFileInfo(const FileInfo& server_file, FileInfo&& client_file) + : filepath(std::move(client_file.filepath)), + client_modified_time(client_file.modified_time), + client_size(client_file.size), + server_size(server_file.size), + client_index(client_file.client_index), + base_dir(server_file.base_dir) {} +}; + +} // namespace cdc_ft + +#endif // CDC_RSYNC_SERVER_FILE_INFO_H_ diff --git a/cdc_rsync_server/main.cc b/cdc_rsync_server/main.cc new file mode 100644 index 0000000..667ab93 --- /dev/null +++ b/cdc_rsync_server/main.cc @@ -0,0 +1,105 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cdc_rsync/base/server_exit_code.h" +#include "cdc_rsync_server/cdc_rsync_server.h" +#include "common/gamelet_component.h" +#include "common/log.h" +#include "common/status.h" + +namespace { + +void SendErrorMessage(const char* msg) { + constexpr char marker = cdc_ft::kServerErrorMarker; + fprintf(stderr, "%c%s%c", marker, msg, marker); +} + +} // namespace + +namespace cdc_ft { + +// Returns custom error codes based on the tag associated with |status|. This is +// used to display custom error messages on the client. +// Example: A bind failure usually means two instances are in use +// simultaneously. +ServerExitCode GetExitCode(const absl::Status& status) { + absl::optional tag = GetTag(status); + if (!tag.has_value()) { + return kServerExitCodeGeneric; + } + + // Some tags translate to a special error message on the client. + switch (tag.value()) { + case Tag::kAddressInUse: + // Can't bind port, probably two instances in use simultaneously. + return kServerExitCodeAddressInUse; + + case Tag::kSocketEof: + // Usually means client disconnected and shut down already. + case Tag::kDeployServer: + case Tag::kInstancePickerNotAvailableInQuietMode: + case Tag::kConnectionTimeout: + case Tag::kCount: + // Should not happen in server. + break; + } + + return kServerExitCodeGeneric; +} + +} // namespace cdc_ft + +int main(int argc, const char** argv) { + if (argc < 2) { + printf("Usage: cdc_rsync_server cdc_rsync_server