From 1200b343166b85d6ea1c2a8c19914898c513f2e1 Mon Sep 17 00:00:00 2001 From: Lutz Justen Date: Tue, 31 Jan 2023 14:53:43 +0100 Subject: [PATCH] [common] Add ansi_filter (#73) Adds a function to filter ANSI escape sequences from a string. Executing SSH commands on Windows yields output that is full of ANSI escape sequences if the "-tt" (forced TTY) argument is used. One particular escape sequence sets the window title to "c:\windows\system32\cmd.exe". This string is null terminated and messes with parsing the actual output later in that string. The filter function removes those escape sequences. The outout is still a bit messed up, even after removing escape sequences. Some sequences delete rows and move the cursor. Without properly interpreting these sequences it doesn't seem possible to retrieve the proper output. In a future CL the -tt argument is removed on Windows, which removes the necessity to filter ANSI codes. However, sometimes the target architecture is not known (yet), so that it is still useful to filter ANSI codes in that case to print useful debug output. --- all_files.vcxitems | 3 ++ common/BUILD | 16 ++++++ common/ansi_filter.cc | 103 +++++++++++++++++++++++++++++++++++++ common/ansi_filter.h | 37 +++++++++++++ common/ansi_filter_test.cc | 98 +++++++++++++++++++++++++++++++++++ tests_common/BUILD | 1 + 6 files changed, 258 insertions(+) create mode 100644 common/ansi_filter.cc create mode 100644 common/ansi_filter.h create mode 100644 common/ansi_filter_test.cc diff --git a/all_files.vcxitems b/all_files.vcxitems index 0f55789..6731d57 100644 --- a/all_files.vcxitems +++ b/all_files.vcxitems @@ -39,6 +39,8 @@ + + @@ -156,6 +158,7 @@ + diff --git a/common/BUILD b/common/BUILD index db40b20..6a12a66 100644 --- a/common/BUILD +++ b/common/BUILD @@ -2,6 +2,22 @@ load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") package(default_visibility = ["//visibility:public"]) +cc_library( + name = "ansi_filter", + srcs = ["ansi_filter.cc"], + hdrs = ["ansi_filter.h"], +) + +cc_test( + name = "ansi_filter_test", + srcs = ["ansi_filter_test.cc"], + deps = [ + ":ansi_filter", + "@com_google_googletest//:gtest", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "buffer", srcs = ["buffer.cc"], diff --git a/common/ansi_filter.cc b/common/ansi_filter.cc new file mode 100644 index 0000000..e36c2d2 --- /dev/null +++ b/common/ansi_filter.cc @@ -0,0 +1,103 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "common/ansi_filter.h" + +namespace cdc_ft { +namespace ansi_filter { +namespace { +enum class State { + kNotInSequence, + kDCS, // Starting with kESC + P or kDCSI, Device Control String. + kCS, // Starting with kESC + [ or kCSI, Control Sequence. + kOSC, // Starting with kESC + ] or kOSCI, Operating System Command. +}; + +constexpr uint8_t kBEL = 0x07; // Terminal bell. +constexpr uint8_t kESC = 0x1B; // ANSI escape character. +constexpr uint8_t kST = 0x9C; // String Terminator. +constexpr uint8_t kDCSI = 0x90; // Device Control String Introducer. +constexpr uint8_t kCSI = 0x9B; // Control Sequence Introducer. +constexpr uint8_t kOSCI = 0x9D; // Operating System Command Introducer + +} // namespace + +std::string RemoveEscapeSequences(const std::string& input) { + State state = State::kNotInSequence; + std::string result; + + for (size_t n = 0; n < input.size(); ++n) { + uint8_t ch = static_cast(input[n]); + uint8_t next_ch = + static_cast(n + 1 < input.size() ? input[n + 1] : 0); + + switch (state) { + case State::kNotInSequence: + // Device Control String. + if ((ch == kESC && next_ch == 'P') || ch == kDCSI) { + n += ch == kESC ? 1 : 0; + state = State::kDCS; + break; + } + + // Control Sequence. + if ((ch == kESC && next_ch == '[') || ch == kCSI) { + n += ch == kESC ? 1 : 0; + state = State::kCS; + break; + } + + // Operating System Command. + if ((ch == kESC && next_ch == ']') || ch == kOSCI) { + n += ch == kESC ? 1 : 0; + state = State::kOSC; + break; + } + + // Char does not belong to control sequence. + result.push_back(ch); + break; + + case State::kDCS: + // Device control strings are ended by kST or ESC + \. + if (ch == kST || (ch == kESC && next_ch == '\\')) { + n += ch == kESC ? 1 : 0; + state = State::kNotInSequence; + } + break; + + case State::kCS: + // Control sequence initializer are ended by a byte in 0x40�0x7E. + // https://en.wikipedia.org/wiki/ANSI_escape_code#CSIsection + if (ch >= 0x40 && ch <= 0x7E) { + state = State::kNotInSequence; + } + break; + + case State::kOSC: + // Operating system commands are ended by kBEL, kST or ESC + \. + // https://invisible-island.net/xterm/ctlseqs/ctlseqs.html#h3-Operating-System-Commands + if (ch == kBEL || ch == kST || (ch == kESC && next_ch == '\\')) { + n += ch == kESC ? 1 : 0; + state = State::kNotInSequence; + } + break; + } + } + + return result; +} + +} // namespace ansi_filter +} // namespace cdc_ft diff --git a/common/ansi_filter.h b/common/ansi_filter.h new file mode 100644 index 0000000..868a639 --- /dev/null +++ b/common/ansi_filter.h @@ -0,0 +1,37 @@ +/* + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef COMMON_ANSI_FILTER_H_ +#define COMMON_ANSI_FILTER_H_ + +#include + +namespace cdc_ft { +namespace ansi_filter { + +// Removes ANSI escape sequences from a string. +// |input| is a string that can contain ANSI escape sequences. +// Returns the filtered string with ANSI escape sequences removed. +// Example: The most common escape sequence sets a color, e.g. +// "This \x1b[1;32merror\x1b[0m is red." +// The filtered output is +// "This error is red." +std::string RemoveEscapeSequences(const std::string& input); + +} // namespace ansi_filter +} // namespace cdc_ft + +#endif // COMMON_ANSI_FILTER_H_ diff --git a/common/ansi_filter_test.cc b/common/ansi_filter_test.cc new file mode 100644 index 0000000..6ac9695 --- /dev/null +++ b/common/ansi_filter_test.cc @@ -0,0 +1,98 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "common/ansi_filter.h" + +#include "absl/strings/ascii.h" +#include "gtest/gtest.h" + +namespace cdc_ft { +namespace { + +// Actual sample output from running SSH with -tt on Windows. +// Note the \0 after cmd.exe. +constexpr char kSshOutput[] = + "\x1b[2J\x1b[?25l\x1b[m\x1b[" + "H\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n" + "\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\x1b[H\x1b]0;c:" + "\\windows\\system32\\cmd.exe\0\a\x1b[?25h\x1b[?25'l\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[" + "120C\r\n\x1b[120X\x1b[120C\r\n\x1b[120X\x1b[120C\x1b[H\x1b[?25h " + "\x1b[H\x1b[?25l\r\nfoo"; + +TEST(AnsiFilterTest, DoesNotExplodeOnEmptyString) { + EXPECT_EQ(ansi_filter::RemoveEscapeSequences(""), ""); +} + +TEST(AnsiFilterTest, KeepsUnescapedString) { + constexpr char kStr[] = "Lorem ipsum"; + EXPECT_EQ(ansi_filter::RemoveEscapeSequences(kStr), kStr); +} + +TEST(AnsiFilterTest, RemovesDeviceControlString) { + // Special commands for the device. + EXPECT_EQ(ansi_filter::RemoveEscapeSequences("foo\x1bPparams\x1b\\bar"), + "foobar"); + EXPECT_EQ(ansi_filter::RemoveEscapeSequences("foo\x90params\x9c" + "bar"), + "foobar"); +} + +TEST(AnsiFilterTest, RemovesControlSequenceIntroducer) { + // E.g. the well-known regular ANSI color codes. + EXPECT_EQ(ansi_filter::RemoveEscapeSequences("foo\x1b[01;32mbar"), "foobar"); + EXPECT_EQ(ansi_filter::RemoveEscapeSequences("foo\x9b" + "01;32mbar"), + "foobar"); +} + +TEST(AnsiFilterTest, RemovesOperatingSystemCommand) { + // E.g. setting the Window title. + // Not cool: OS commands can contain null-terminated string. + std::string str = "foo\x1b]0;c:\\path\\to\\foo.exe"; + str.append(1, '\0'); + str.append("\abar"); + EXPECT_EQ(ansi_filter::RemoveEscapeSequences(str), "foobar"); + EXPECT_EQ(ansi_filter::RemoveEscapeSequences("foo\x9dstring\x1b\\bar"), + "foobar"); +} + +TEST(AnsiFilterTest, RemovesRestIfNotTerminated) { + EXPECT_EQ(ansi_filter::RemoveEscapeSequences("foo\x1b[01;32"), "foo"); +} + +TEST(AnsiFilterTest, RemovesSequencesFromActualSshOutput) { + // Note: Can't just say str = kSshOutput because of the \0 in the string. + std::string str = std::string(kSshOutput, sizeof(kSshOutput) - 1); + std::string res = std::string( + absl::StripAsciiWhitespace(ansi_filter::RemoveEscapeSequences(str))); + EXPECT_EQ(res, "foo"); +} + +TEST(AnsiFilterTest, WorksForExampleFromDocumentation) { + std::string str = "This \x1b[1;32merror\x1b[0m is red."; + std::string res = std::string(ansi_filter::RemoveEscapeSequences(str)); + EXPECT_EQ(res, "This error is red."); +} + +} // namespace +} // namespace cdc_ft diff --git a/tests_common/BUILD b/tests_common/BUILD index 3905508..54f03c9 100644 --- a/tests_common/BUILD +++ b/tests_common/BUILD @@ -21,6 +21,7 @@ cc_binary( "//common:all_test_data", ], deps = [ + "//common:ansi_filter", "//common:buffer", "//common:dir_iter", "//common:file_watcher",