coverage: Store coverage source regions as Span until codegen

rust-lang · bors · Nov 28, 2024 · Nov 24, 2024 · Nov 24, 2024 · Nov 24, 2024
commit b9fb1a69d2fe1c146cbdf181cf9e0eaf15935799
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
@@ -1,6 +1,4 @@
-use rustc_middle::mir::coverage::{CounterId, CovTerm, ExpressionId, SourceRegion};
-
-use crate::coverageinfo::mapgen::LocalFileId;
+use rustc_middle::mir::coverage::{CounterId, CovTerm, ExpressionId};
 
 /// Must match the layout of `LLVMRustCounterKind`.
 #[derive(Copy, Clone, Debug)]
@@ -126,30 +124,16 @@ pub(crate) struct CoverageSpan {
     /// Local index into the function's local-to-global file ID table.
     /// The value at that index is itself an index into the coverage filename
     /// table in the CGU's `__llvm_covmap` section.
-    file_id: u32,
+    pub(crate) file_id: u32,
 
     /// 1-based starting line of the source code span.
-    start_line: u32,
+    pub(crate) start_line: u32,
     /// 1-based starting column of the source code span.
-    start_col: u32,
+    pub(crate) start_col: u32,
     /// 1-based ending line of the source code span.
-    end_line: u32,
+    pub(crate) end_line: u32,
     /// 1-based ending column of the source code span. High bit must be unset.
-    end_col: u32,
-}
-
-impl CoverageSpan {
-    pub(crate) fn from_source_region(
-        local_file_id: LocalFileId,
-        code_region: &SourceRegion,
-    ) -> Self {
-        let file_id = local_file_id.as_u32();
-        let &SourceRegion { start_line, start_col, end_line, end_col } = code_region;
-        // Internally, LLVM uses the high bit of `end_col` to distinguish between
-        // code regions and gap regions, so it can't be used by the column number.
-        assert!(end_col & (1u32 << 31) == 0, "high bit of `end_col` must be unset: {end_col:#X}");
-        Self { file_id, start_line, start_col, end_line, end_col }
-    }
+    pub(crate) end_col: u32,
 }
 
 /// Must match the layout of `LLVMRustCoverageCodeRegion`.

diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs
@@ -3,9 +3,9 @@ use rustc_data_structures::fx::FxIndexSet;
 use rustc_index::bit_set::BitSet;
 use rustc_middle::mir::coverage::{
     CounterId, CovTerm, Expression, ExpressionId, FunctionCoverageInfo, Mapping, MappingKind, Op,
-    SourceRegion,
 };
 use rustc_middle::ty::Instance;
+use rustc_span::Span;
 use tracing::{debug, instrument};
 
 use crate::coverageinfo::ffi::{Counter, CounterExpression, ExprKind};
@@ -220,16 +220,16 @@ impl<'tcx> FunctionCoverage<'tcx> {
         })
     }
 
-    /// Converts this function's coverage mappings into an intermediate form
-    /// that will be used by `mapgen` when preparing for FFI.
-    pub(crate) fn counter_regions(
+    /// Yields all this function's coverage mappings, after simplifying away
+    /// unused counters and counter expressions.
+    pub(crate) fn mapping_spans(
         &self,
-    ) -> impl Iterator<Item = (MappingKind, &SourceRegion)> + ExactSizeIterator {
+    ) -> impl Iterator<Item = (MappingKind, Span)> + ExactSizeIterator + Captures<'_> {
         self.function_coverage_info.mappings.iter().map(move |mapping| {
-            let Mapping { kind, source_region } = mapping;
+            let &Mapping { ref kind, span } = mapping;
             let kind =
                 kind.map_terms(|term| if self.is_zero_term(term) { CovTerm::Zero } else { term });
-            (kind, source_region)
+            (kind, span)
         })
     }
 

diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
@@ -1,3 +1,5 @@
+mod spans;
+
 use std::ffi::CString;
 use std::iter;
 
@@ -201,7 +203,7 @@ rustc_index::newtype_index! {
     /// An index into a function's list of global file IDs. That underlying list
     /// of local-to-global mappings will be embedded in the function's record in
     /// the `__llvm_covfun` linker section.
-    pub(crate) struct LocalFileId {}
+    struct LocalFileId {}
 }
 
 /// Holds a mapping from "local" (per-function) file IDs to "global" (per-CGU)
@@ -244,11 +246,13 @@ fn encode_mappings_for_function(
     global_file_table: &GlobalFileTable,
     function_coverage: &FunctionCoverage<'_>,
 ) -> Vec<u8> {
-    let counter_regions = function_coverage.counter_regions();
-    if counter_regions.is_empty() {
+    let mapping_spans = function_coverage.mapping_spans();
+    if mapping_spans.is_empty() {
         return Vec::new();
     }
 
+    let fn_cov_info = function_coverage.function_coverage_info;
+
     let expressions = function_coverage.counter_expressions().collect::<Vec<_>>();
 
     let mut virtual_file_mapping = VirtualFileMapping::default();
@@ -258,7 +262,9 @@ fn encode_mappings_for_function(
     let mut mcdc_decision_regions = vec![];
 
     // Currently a function's mappings must all be in the same file as its body span.
-    let file_name = span_file_name(tcx, function_coverage.function_coverage_info.body_span);
+    let file_name = span_file_name(tcx, fn_cov_info.body_span);
+    let source_map = tcx.sess.source_map();
+    let source_file = source_map.lookup_source_file(fn_cov_info.body_span.lo());
 
     // Look up the global file ID for that filename.
     let global_file_id = global_file_table.global_file_id_for_file_name(file_name);
@@ -267,11 +273,15 @@ fn encode_mappings_for_function(
     let local_file_id = virtual_file_mapping.local_id_for_global(global_file_id);
     debug!("  file id: {local_file_id:?} => {global_file_id:?} = '{file_name:?}'");
 
-    // For each counter/region pair in this function+file, convert it to a
+    let make_cov_span = |span| {
+        spans::make_coverage_span(local_file_id, source_map, fn_cov_info, &source_file, span)
+    };
+
+    // For each coverage mapping span in this function+file, convert it to a
     // form suitable for FFI.
-    for (mapping_kind, region) in counter_regions {
-        debug!("Adding counter {mapping_kind:?} to map for {region:?}");
-        let cov_span = ffi::CoverageSpan::from_source_region(local_file_id, region);
+    for (mapping_kind, span) in mapping_spans {
+        debug!("Adding counter {mapping_kind:?} to map for {span:?}");
+        let Some(cov_span) = make_cov_span(span) else { continue };
         match mapping_kind {
             MappingKind::Code(term) => {
                 code_regions

diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
@@ -0,0 +1,124 @@
+use rustc_middle::mir::coverage::FunctionCoverageInfo;
+use rustc_span::source_map::SourceMap;
+use rustc_span::{BytePos, Pos, SourceFile, Span};
+use tracing::debug;
+
+use crate::coverageinfo::ffi;
+use crate::coverageinfo::mapgen::LocalFileId;
+
+/// Converts the span into its start line and column, and end line and column.
+///
+/// Line numbers and column numbers are 1-based. Unlike most column numbers emitted by
+/// the compiler, these column numbers are denoted in **bytes**, because that's what
+/// LLVM's `llvm-cov` tool expects to see in coverage maps.
+///
+/// Returns `None` if the conversion failed for some reason. This shouldn't happen,
+/// but it's hard to rule out entirely (especially in the presence of complex macros
+/// or other expansions), and if it does happen then skipping a span or function is
+/// better than an ICE or `llvm-cov` failure that the user might have no way to avoid.
+pub(crate) fn make_coverage_span(
+    file_id: LocalFileId,
+    source_map: &SourceMap,
+    fn_cov_info: &FunctionCoverageInfo,
+    file: &SourceFile,
+    span: Span,
+) -> Option<ffi::CoverageSpan> {
+    let span = ensure_non_empty_span(source_map, fn_cov_info, span)?;
+
+    let lo = span.lo();
+    let hi = span.hi();
+
+    // Column numbers need to be in bytes, so we can't use the more convenient
+    // `SourceMap` methods for looking up file coordinates.
+    let line_and_byte_column = |pos: BytePos| -> Option<(usize, usize)> {
+        let rpos = file.relative_position(pos);
+        let line_index = file.lookup_line(rpos)?;
+        let line_start = file.lines()[line_index];
+        // Line numbers and column numbers are 1-based, so add 1 to each.
+        Some((line_index + 1, (rpos - line_start).to_usize() + 1))
+    };
+
+    let (mut start_line, start_col) = line_and_byte_column(lo)?;
+    let (mut end_line, end_col) = line_and_byte_column(hi)?;
+
+    // Apply an offset so that code in doctests has correct line numbers.
+    // FIXME(#79417): Currently we have no way to offset doctest _columns_.
+    start_line = source_map.doctest_offset_line(&file.name, start_line);
+    end_line = source_map.doctest_offset_line(&file.name, end_line);
+
+    check_coverage_span(ffi::CoverageSpan {
+        file_id: file_id.as_u32(),
+        start_line: start_line as u32,
+        start_col: start_col as u32,
+        end_line: end_line as u32,
+        end_col: end_col as u32,
+    })
+}
+
+fn ensure_non_empty_span(
+    source_map: &SourceMap,
+    fn_cov_info: &FunctionCoverageInfo,
+    span: Span,
+) -> Option<Span> {
+    if !span.is_empty() {
+        return Some(span);
+    }
+
+    let lo = span.lo();
+    let hi = span.hi();
+
+    // The span is empty, so try to expand it to cover an adjacent '{' or '}',
+    // but only within the bounds of the body span.
+    let try_next = hi < fn_cov_info.body_span.hi();
+    let try_prev = fn_cov_info.body_span.lo() < lo;
+    if !(try_next || try_prev) {
+        return None;
+    }
+
+    source_map
+        .span_to_source(span, |src, start, end| try {
+            // We're only checking for specific ASCII characters, so we don't
+            // have to worry about multi-byte code points.
+            if try_next && src.as_bytes()[end] == b'{' {
+                Some(span.with_hi(hi + BytePos(1)))
+            } else if try_prev && src.as_bytes()[start - 1] == b'}' {
+                Some(span.with_lo(lo - BytePos(1)))
+            } else {
+                None
+            }
+        })
+        .ok()?
+}
+
+/// If `llvm-cov` sees a source region that is improperly ordered (end < start),
+/// it will immediately exit with a fatal error. To prevent that from happening,
+/// discard regions that are improperly ordered, or might be interpreted in a
+/// way that makes them improperly ordered.
+fn check_coverage_span(cov_span: ffi::CoverageSpan) -> Option<ffi::CoverageSpan> {
+    let ffi::CoverageSpan { file_id: _, start_line, start_col, end_line, end_col } = cov_span;
+
+    // Line/column coordinates are supposed to be 1-based. If we ever emit
+    // coordinates of 0, `llvm-cov` might misinterpret them.
+    let all_nonzero = [start_line, start_col, end_line, end_col].into_iter().all(|x| x != 0);
+    // Coverage mappings use the high bit of `end_col` to indicate that a
+    // region is actually a "gap" region, so make sure it's unset.
+    let end_col_has_high_bit_unset = (end_col & (1 << 31)) == 0;
+    // If a region is improperly ordered (end < start), `llvm-cov` will exit
+    // with a fatal error, which is inconvenient for users and hard to debug.
+    let is_ordered = (start_line, start_col) <= (end_line, end_col);
+
+    if all_nonzero && end_col_has_high_bit_unset && is_ordered {
+        Some(cov_span)
+    } else {
+        debug!(
+            ?cov_span,
+            ?all_nonzero,
+            ?end_col_has_high_bit_unset,
+            ?is_ordered,
+            "Skipping source region that would be misinterpreted or rejected by LLVM"
+        );
+        // If this happens in a debug build, ICE to make it easier to notice.
+        debug_assert!(false, "Improper source region: {cov_span:?}");
+        None
+    }
+}
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -17,6 +17,7 @@
 #![feature(iter_intersperse)]
 #![feature(let_chains)]
 #![feature(rustdoc_internals)]
+#![feature(try_blocks)]
 #![warn(unreachable_pub)]
 // tidy-alphabetical-end
 

diff --git a/compiler/rustc_middle/src/mir/coverage.rs b/compiler/rustc_middle/src/mir/coverage.rs
@@ -155,22 +155,6 @@ impl Debug for CoverageKind {
     }
 }
 
-#[derive(Clone, TyEncodable, TyDecodable, Hash, HashStable, PartialEq, Eq, PartialOrd, Ord)]
-#[derive(TypeFoldable, TypeVisitable)]
-pub struct SourceRegion {
-    pub start_line: u32,
-    pub start_col: u32,
-    pub end_line: u32,
-    pub end_col: u32,
-}
-
-impl Debug for SourceRegion {
-    fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result {
-        let &Self { start_line, start_col, end_line, end_col } = self;
-        write!(fmt, "{start_line}:{start_col} - {end_line}:{end_col}")
-    }
-}
-
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, HashStable)]
 #[derive(TyEncodable, TyDecodable, TypeFoldable, TypeVisitable)]
 pub enum Op {
@@ -232,7 +216,7 @@ impl MappingKind {
 #[derive(TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable, TypeVisitable)]
 pub struct Mapping {
     pub kind: MappingKind,
-    pub source_region: SourceRegion,
+    pub span: Span,
 }
 
 /// Stores per-function coverage information attached to a `mir::Body`,

diff --git a/compiler/rustc_middle/src/mir/pretty.rs b/compiler/rustc_middle/src/mir/pretty.rs
@@ -603,8 +603,8 @@ fn write_function_coverage_info(
     for (id, expression) in expressions.iter_enumerated() {
         writeln!(w, "{INDENT}coverage {id:?} => {expression:?};")?;
     }
-    for coverage::Mapping { kind, source_region } in mappings {
-        writeln!(w, "{INDENT}coverage {kind:?} => {source_region:?};")?;
+    for coverage::Mapping { kind, span } in mappings {
+        writeln!(w, "{INDENT}coverage {kind:?} => {span:?};")?;
     }
     writeln!(w)?;