From cd8ef406b0868a020605c47a8a8d1092a4c27251 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Esteban=20K=C3=BCber?= Date: Thu, 25 Jul 2024 18:38:28 +0000 Subject: [PATCH] Change output normalization logic to be linear against size of output I believe the previous code was accidentally quadratic. Let's perf it. --- compiler/rustc_errors/src/emitter.rs | 108 +++++++++++++-------------- 1 file changed, 53 insertions(+), 55 deletions(-) diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs index 58220c6549005..9d1f1bf251af8 100644 --- a/compiler/rustc_errors/src/emitter.rs +++ b/compiler/rustc_errors/src/emitter.rs @@ -2557,62 +2557,60 @@ fn num_decimal_digits(num: usize) -> usize { MAX_DIGITS } -// We replace some characters so the CLI output is always consistent and underlines aligned. -// Keep the following list in sync with `rustc_span::char_width`. -const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[ - ('\t', " "), // We do our own tab replacement - ('\u{200D}', ""), // Replace ZWJ with nothing for consistent terminal output of grapheme clusters. - ('\u{202A}', "�"), // The following unicode text flow control characters are inconsistently - ('\u{202B}', "�"), // supported across CLIs and can cause confusion due to the bytes on disk - ('\u{202D}', "�"), // not corresponding to the visible source code, so we replace them always. - ('\u{202E}', "�"), - ('\u{2066}', "�"), - ('\u{2067}', "�"), - ('\u{2068}', "�"), - ('\u{202C}', "�"), - ('\u{2069}', "�"), - // In terminals without Unicode support the following will be garbled, but in *all* terminals - // the underlying codepoint will be as well. We could gate this replacement behind a "unicode - // support" gate. - ('\u{0000}', "␀"), - ('\u{0001}', "␁"), - ('\u{0002}', "␂"), - ('\u{0003}', "␃"), - ('\u{0004}', "␄"), - ('\u{0005}', "␅"), - ('\u{0006}', "␆"), - ('\u{0007}', "␇"), - ('\u{0008}', "␈"), - ('\u{000B}', "␋"), - ('\u{000C}', "␌"), - ('\u{000D}', "␍"), - ('\u{000E}', "␎"), - ('\u{000F}', "␏"), - ('\u{0010}', "␐"), - ('\u{0011}', "␑"), - ('\u{0012}', "␒"), - ('\u{0013}', "␓"), - ('\u{0014}', "␔"), - ('\u{0015}', "␕"), - ('\u{0016}', "␖"), - ('\u{0017}', "␗"), - ('\u{0018}', "␘"), - ('\u{0019}', "␙"), - ('\u{001A}', "␚"), - ('\u{001B}', "␛"), - ('\u{001C}', "␜"), - ('\u{001D}', "␝"), - ('\u{001E}', "␞"), - ('\u{001F}', "␟"), - ('\u{007F}', "␡"), -]; - fn normalize_whitespace(str: &str) -> String { - let mut s = str.to_string(); - for (c, replacement) in OUTPUT_REPLACEMENTS { - s = s.replace(*c, replacement); - } - s + // We replace some characters so the CLI output is always consistent and underlines aligned. + // Keep the following list in sync with `rustc_span::char_width`. + let output_replacements = FxHashMap::from_iter([ + ('\t', " "), // We do our own tab replacement + ('\u{200D}', ""), // Replace ZWJ for consistent terminal output of grapheme clusters. + ('\u{202A}', "�"), // The following unicode text flow control characters are inconsistently + ('\u{202B}', "�"), // supported across CLIs and can cause confusion due to the bytes on disk + ('\u{202D}', "�"), // not corresponding to the visible source code, so we replace them always. + ('\u{202E}', "�"), + ('\u{2066}', "�"), + ('\u{2067}', "�"), + ('\u{2068}', "�"), + ('\u{202C}', "�"), + ('\u{2069}', "�"), + // In terminals without Unicode support the following will be garbled, but in *all* terminals + // the underlying codepoint will be as well. We could gate this replacement behind a "unicode + // support" gate. + ('\u{0000}', "␀"), + ('\u{0001}', "␁"), + ('\u{0002}', "␂"), + ('\u{0003}', "␃"), + ('\u{0004}', "␄"), + ('\u{0005}', "␅"), + ('\u{0006}', "␆"), + ('\u{0007}', "␇"), + ('\u{0008}', "␈"), + ('\u{000B}', "␋"), + ('\u{000C}', "␌"), + ('\u{000D}', "␍"), + ('\u{000E}', "␎"), + ('\u{000F}', "␏"), + ('\u{0010}', "␐"), + ('\u{0011}', "␑"), + ('\u{0012}', "␒"), + ('\u{0013}', "␓"), + ('\u{0014}', "␔"), + ('\u{0015}', "␕"), + ('\u{0016}', "␖"), + ('\u{0017}', "␗"), + ('\u{0018}', "␘"), + ('\u{0019}', "␙"), + ('\u{001A}', "␚"), + ('\u{001B}', "␛"), + ('\u{001C}', "␜"), + ('\u{001D}', "␝"), + ('\u{001E}', "␞"), + ('\u{001F}', "␟"), + ('\u{007F}', "␡"), + ]); + + str.chars() + .map(|c| output_replacements.get(&c).map_or(c.to_string(), |s| s.to_string())) + .collect() } fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) {