diff --git a/all_files.vcxitems b/all_files.vcxitems index 0f55789..6731d57 100644 --- a/all_files.vcxitems +++ b/all_files.vcxitems @@ -39,6 +39,8 @@ + + @@ -156,6 +158,7 @@ + diff --git a/common/BUILD b/common/BUILD index db40b20..6a12a66 100644 --- a/common/BUILD +++ b/common/BUILD @@ -2,6 +2,22 @@ load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") package(default_visibility = ["//visibility:public"]) +cc_library( + name = "ansi_filter", + srcs = ["ansi_filter.cc"], + hdrs = ["ansi_filter.h"], +) + +cc_test( + name = "ansi_filter_test", + srcs = ["ansi_filter_test.cc"], + deps = [ + ":ansi_filter", + "@com_google_googletest//:gtest", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "buffer", srcs = ["buffer.cc"], diff --git a/common/ansi_filter.cc b/common/ansi_filter.cc new file mode 100644 index 0000000..e36c2d2 --- /dev/null +++ b/common/ansi_filter.cc @@ -0,0 +1,103 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "common/ansi_filter.h" + +namespace cdc_ft { +namespace ansi_filter { +namespace { +enum class State { + kNotInSequence, + kDCS, // Starting with kESC + P or kDCSI, Device Control String. + kCS, // Starting with kESC + [ or kCSI, Control Sequence. + kOSC, // Starting with kESC + ] or kOSCI, Operating System Command. +}; + +constexpr uint8_t kBEL = 0x07; // Terminal bell. +constexpr uint8_t kESC = 0x1B; // ANSI escape character. +constexpr uint8_t kST = 0x9C; // String Terminator. +constexpr uint8_t kDCSI = 0x90; // Device Control String Introducer. +constexpr uint8_t kCSI = 0x9B; // Control Sequence Introducer. +constexpr uint8_t kOSCI = 0x9D; // Operating System Command Introducer + +} // namespace + +std::string RemoveEscapeSequences(const std::string& input) { + State state = State::kNotInSequence; + std::string result; + + for (size_t n = 0; n < input.size(); ++n) { + uint8_t ch = static_cast(input[n]); + uint8_t next_ch = + static_cast(n + 1 < input.size() ? input[n + 1] : 0); + + switch (state) { + case State::kNotInSequence: + // Device Control String. + if ((ch == kESC && next_ch == 'P') || ch == kDCSI) { + n += ch == kESC ? 1 : 0; + state = State::kDCS; + break; + } + + // Control Sequence. + if ((ch == kESC && next_ch == '[') || ch == kCSI) { + n += ch == kESC ? 1 : 0; + state = State::kCS; + break; + } + + // Operating System Command. + if ((ch == kESC && next_ch == ']') || ch == kOSCI) { + n += ch == kESC ? 1 : 0; + state = State::kOSC; + break; + } + + // Char does not belong to control sequence. + result.push_back(ch); + break; + + case State::kDCS: + // Device control strings are ended by kST or ESC + \. + if (ch == kST || (ch == kESC && next_ch == '\\')) { + n += ch == kESC ? 1 : 0; + state = State::kNotInSequence; + } + break; + + case State::kCS: + // Control sequence initializer are ended by a byte in 0x40�0x7E. + // https://en.wikipedia.org/wiki/ANSI_escape_code#CSIsection + if (ch >= 0x40 && ch <= 0x7E) { + state = State::kNotInSequence; + } + break; + + case State::kOSC: + // Operating system commands are ended by kBEL, kST or ESC + \. + // https://invisible-island.net/xterm/ctlseqs/ctlseqs.html#h3-Operating-System-Commands + if (ch == kBEL || ch == kST || (ch == kESC && next_ch == '\\')) { + n += ch == kESC ? 1 : 0; + state = State::kNotInSequence; + } + break; + } + } + + return result; +} + +} // namespace ansi_filter +} // namespace cdc_ft diff --git a/common/ansi_filter.h b/common/ansi_filter.h new file mode 100644 index 0000000..868a639 --- /dev/null +++ b/common/ansi_filter.h @@ -0,0 +1,37 @@ +/* + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef COMMON_ANSI_FILTER_H_ +#define COMMON_ANSI_FILTER_H_ + +#include + +namespace cdc_ft { +namespace ansi_filter { + +// Removes ANSI escape sequences from a string. +// |input| is a string that can contain ANSI escape sequences. +// Returns the filtered string with ANSI escape sequences removed. +// Example: The most common escape sequence sets a color, e.g. +// "This \x1b[1;32merror\x1b[0m is red." +// The filtered output is +// "This error is red." +std::string RemoveEscapeSequences(const std::string& input); + +} // namespace ansi_filter +} // namespace cdc_ft + +#endif // COMMON_ANSI_FILTER_H_ diff --git a/common/ansi_filter_test.cc b/common/ansi_filter_test.cc new file mode 100644 index 0000000..6ac9695 --- /dev/null +++ b/common/ansi_filter_test.cc @@ -0,0 +1,98 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "common/ansi_filter.h" + +#include "absl/strings/ascii.h" +#include "gtest/gtest.h" + +namespace cdc_ft { +namespace { + +// Actual sample output from running SSH with -tt on Windows. +// Note the \0 after cmd.exe. +constexpr char kSshOutput[] = + "\x1b[2J\x1b[?25l\x1b[m\x1b[" + "H\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n" + "\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\x1b[H\x1b]0;c:" + "\\windows\\system32\\cmd.exe\0\a\x1b[?25h\x1b[?25'l\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\x1b[H\x1b[?25h " + "\x1b[H\x1b[?25l\r\nfoo"; + +TEST(AnsiFilterTest, DoesNotExplodeOnEmptyString) { + EXPECT_EQ(ansi_filter::RemoveEscapeSequences(""), ""); +} + +TEST(AnsiFilterTest, KeepsUnescapedString) { + constexpr char kStr[] = "Lorem ipsum"; + EXPECT_EQ(ansi_filter::RemoveEscapeSequences(kStr), kStr); +} + +TEST(AnsiFilterTest, RemovesDeviceControlString) { + // Special commands for the device. + EXPECT_EQ(ansi_filter::RemoveEscapeSequences("foo\x1bPparams\x1b\\bar"), + "foobar"); + EXPECT_EQ(ansi_filter::RemoveEscapeSequences("foo\x90params\x9c" + "bar"), + "foobar"); +} + +TEST(AnsiFilterTest, RemovesControlSequenceIntroducer) { + // E.g. the well-known regular ANSI color codes. + EXPECT_EQ(ansi_filter::RemoveEscapeSequences("foo\x1b[01;32mbar"), "foobar"); + EXPECT_EQ(ansi_filter::RemoveEscapeSequences("foo\x9b" + "01;32mbar"), + "foobar"); +} + +TEST(AnsiFilterTest, RemovesOperatingSystemCommand) { + // E.g. setting the Window title. + // Not cool: OS commands can contain null-terminated string. + std::string str = "foo\x1b]0;c:\\path\\to\\foo.exe"; + str.append(1, '\0'); + str.append("\abar"); + EXPECT_EQ(ansi_filter::RemoveEscapeSequences(str), "foobar"); + EXPECT_EQ(ansi_filter::RemoveEscapeSequences("foo\x9dstring\x1b\\bar"), + "foobar"); +} + +TEST(AnsiFilterTest, RemovesRestIfNotTerminated) { + EXPECT_EQ(ansi_filter::RemoveEscapeSequences("foo\x1b[01;32"), "foo"); +} + +TEST(AnsiFilterTest, RemovesSequencesFromActualSshOutput) { + // Note: Can't just say str = kSshOutput because of the \0 in the string. + std::string str = std::string(kSshOutput, sizeof(kSshOutput) - 1); + std::string res = std::string( + absl::StripAsciiWhitespace(ansi_filter::RemoveEscapeSequences(str))); + EXPECT_EQ(res, "foo"); +} + +TEST(AnsiFilterTest, WorksForExampleFromDocumentation) { + std::string str = "This \x1b[1;32merror\x1b[0m is red."; + std::string res = std::string(ansi_filter::RemoveEscapeSequences(str)); + EXPECT_EQ(res, "This error is red."); +} + +} // namespace +} // namespace cdc_ft diff --git a/tests_common/BUILD b/tests_common/BUILD index 3905508..54f03c9 100644 --- a/tests_common/BUILD +++ b/tests_common/BUILD @@ -21,6 +21,7 @@ cc_binary( "//common:all_test_data", ], deps = [ + "//common:ansi_filter", "//common:buffer", "//common:dir_iter", "//common:file_watcher",