From 7b94b0674e3e1010bbb87aba0aac534a3d6904bd Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <42969706+aherrmann-da@users.noreply.github.com>
Date: Tue, 24 Aug 2021 17:03:45 +0200
Subject: [PATCH] Map shortened scala test suite names to long names on Windows
 (#10628)

* Generate short to long name mapping in aspect

Maps shortened test names in da_scala_test_suite on Windows to their
long name on Linux and MacOS.

Names are shortened on Windows to avoid exceeding MAX_PATH.

* Script to generate scala test name mapping

* Generate scala-test-suite-name-map.json on Windows

changelog_begin
changelog_end

* Generate UTF-8 with Unix line endings

Otherwise the file will be formatted using UTF-16 with CRLF line
endings, which confuses `jq` on Linux.

* Apply Scala test name remapping before ES upload

* Pipe bazel output into intermediate file

Bazel writes the output of --experimental_show_artifacts to stderr
instead of stdout. In Powershell this means that these outputs are not
plain strings, but instead error objects. Simply redirecting these to
stdout and piping them into further processing will lead to
indeterministically missing items or indeterministically introduced
additional newlines which may break paths.

To work around this we extract the error message from error objects,
introduce appropriate newlines, and write the output to a temporary file
before further processing.

This solution is taken and adapted from
https://stackoverflow.com/a/48671797/841562

* Add copyright header

Co-authored-by: Andreas Herrmann <andreas.herrmann@tweag.io>
---
 bazel_tools/scala.bzl               | 51 +++++++++++++++++++++
 build.ps1                           |  4 ++
 ci/remap-scala-test-short-names.ps1 | 71 +++++++++++++++++++++++++++++
 ci/upload-bazel-metrics.yml         |  2 +-
 infra/es_cluster.tf                 | 18 ++++++++
 5 files changed, 145 insertions(+), 1 deletion(-)
 create mode 100755 ci/remap-scala-test-short-names.ps1

diff --git a/bazel_tools/scala.bzl b/bazel_tools/scala.bzl
index eafae3c7285c..97819e11cdca 100644
--- a/bazel_tools/scala.bzl
+++ b/bazel_tools/scala.bzl
@@ -11,6 +11,7 @@ load(
     "scala_test",
     "scala_test_suite",
 )
+load("@io_bazel_rules_scala//scala/private:common.bzl", "sanitize_string_for_usage")
 load(
     "@io_bazel_rules_scala//jmh:jmh.bzl",
     "scala_benchmark_jmh",
@@ -730,3 +731,53 @@ def da_scala_benchmark_jmh(
     deps = resolve_scala_deps(deps, scala_deps, versioned_deps, versioned_scala_deps)
     runtime_deps = resolve_scala_deps(runtime_deps, scala_runtime_deps)
     _wrap_rule_no_plugins(scala_benchmark_jmh, deps, runtime_deps, **kwargs)
+
+def _da_scala_test_short_name_aspect_impl(target, ctx):
+    is_scala_test = ctx.rule.kind == "scala_test"
+
+    srcs = getattr(ctx.rule.attr, "srcs", [])
+    has_single_src = type(srcs) == "list" and len(srcs) == 1
+
+    split = target.label.name.rsplit("_", 1)
+    is_numbered = len(split) == 2 and split[1].isdigit()
+    if is_numbered:
+        [name, number] = split
+
+    is_relevant = is_scala_test and has_single_src and is_numbered
+    if not is_relevant:
+        return []
+
+    src_name = srcs[0].label.name
+    long_name = "%s_test_suite_%s" % (name, sanitize_string_for_usage(src_name))
+    long_label = target.label.relative(long_name)
+    info_json = json.encode(struct(
+        short_label = str(target.label),
+        long_label = str(long_label),
+    ))
+
+    # Aspect generated providers are not available to Starlark cquery output,
+    # so we write the information to a file from where it can be collected in a
+    # separate step.
+    # See https://github.com/bazelbuild/bazel/issues/13866
+    info_out = ctx.actions.declare_file("{}_scala_test_info.json".format(target.label.name))
+    ctx.actions.write(info_out, content = info_json, is_executable = False)
+    return [OutputGroupInfo(scala_test_info = depset([info_out]))]
+
+da_scala_test_short_name_aspect = aspect(
+    implementation = _da_scala_test_short_name_aspect_impl,
+    doc = """\
+Maps shortened test names in da_scala_test_suite test-cases to long names.
+
+Test cases in da_scala_test_suite have shortened Bazel labels on Windows to
+avoid exceeding the MAX_PATH length limit on Windows. For the purposes of CI
+monitoring this makes it difficult to determine how a particular test case
+behaves across different platforms, since the name is different on Windows than
+on Linux and MacOS.
+
+This aspect generates a JSON file in the scala_test_info output group that
+describes a mapping from the shortened name to the regular long name.
+
+Such an output will be generated for any scala_test target that has a single
+source file and who's name follows the pattern `<name>_<number>`.
+""",
+)
diff --git a/build.ps1 b/build.ps1
index 071eafb9b230..145fa9d959b2 100644
--- a/build.ps1
+++ b/build.ps1
@@ -69,6 +69,10 @@ bazel build //... `
 bazel shutdown
 
 if ($env:SKIP_TESTS -ceq "False") {
+    # Generate mapping from shortened scala-test names on Windows to long names on Linux and MacOS.
+    ./ci/remap-scala-test-short-names.ps1 `
+      | Out-File -Encoding UTF8 -NoNewline scala-test-suite-name-map.json
+
     bazel test //... `
       `-`-profile test-profile.json `
       `-`-experimental_profile_include_target_label `
diff --git a/ci/remap-scala-test-short-names.ps1 b/ci/remap-scala-test-short-names.ps1
new file mode 100755
index 000000000000..1d6ffc2b7a8f
--- /dev/null
+++ b/ci/remap-scala-test-short-names.ps1
@@ -0,0 +1,71 @@
+# Copyright (c) 2021 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# $ErrorActionPreference = 'Stop' causes the script to fail because Bazel writes to stderr.
+$ErrorActionPreference = 'Continue'
+
+[string[]]$scala_test_targets = bazel.exe query "kind(scala_test, deps(kind(test_suite, //...), 1))"
+if ($lastexitcode -ne 0) {
+  throw "bazel query returned non-zero exit code: $lastexitcode"
+}
+
+if ($scala_test_targets.count -gt 0) {
+
+  try {
+
+    # Bazel writes the output of --experimental_show_artifacts to stderr
+    # instead of stdout. In Powershell this means that these outputs are not
+    # plain strings, but instead error objects. Simply redirecting these to
+    # stdout and piping them into further processing will lead to
+    # indeterministically missing items or indeterministically introduced
+    # additional newlines which may break paths.
+    #
+    # To work around this we extract the error message from error objects,
+    # introduce appropriate newlines, and write the output to a temporary file
+    # before further processing.
+    #
+    # This solution is taken and adapted from
+    # https://stackoverflow.com/a/48671797/841562
+    $bazelexitcode = 0
+    $tmp = New-TemporaryFile
+    try {
+      $append = $false
+      $out = [System.IO.StreamWriter]::new($tmp, $append)
+      bazel.exe build `
+        "--aspects=//bazel_tools:scala.bzl%da_scala_test_short_name_aspect" `
+        "--output_groups=scala_test_info" `
+        "--experimental_show_artifacts" `
+        @scala_test_targets `
+        2>&1 | % {
+          if ($_ -is [System.Management.Automation.ErrorRecord]) {
+            if ($_.TargetObject -ne $null) {
+              $out.WriteLine();
+            }
+            $out.Write($_.Exception.Message)
+          } else {
+            $out.WriteLine($_)
+          }
+        }
+      $bazelexitcode = $lastexitcode
+    } finally {
+      $out.Close()
+    }
+
+    if ($bazelexitcode -ne 0) {
+      $errmsg = Get-Content $tmp
+      Write-Error -Message "$errmsg"
+      throw "bazel build returned non-zero exit code: $lastexitcode"
+    }
+
+    Get-Content $tmp |
+      % { if ( $_ -match ">>>(?<filename>.*)" ) { Get-Content $Matches.filename } } |
+      jq -acsS "map({key:.short_label,value:.long_label})|from_entries"
+
+    if ($lastexitcode -ne 0) {
+      throw "jq returned non-zero exit code: $lastexitcode"
+    }
+  } finally {
+    Remove-Item $tmp
+  }
+
+}
diff --git a/ci/upload-bazel-metrics.yml b/ci/upload-bazel-metrics.yml
index 3cdb5b41fec3..0982a740e24b 100644
--- a/ci/upload-bazel-metrics.yml
+++ b/ci/upload-bazel-metrics.yml
@@ -50,7 +50,7 @@ steps:
       target_dir="$(Build.StagingDirectory)/$(pipeline_id)"
       mkdir -p "$target_dir"
       cp "job-md.json" "$target_dir/job-md.json"
-      for log_file in 'build-profile.json' 'build-events.json' 'test-profile.json' 'test-events.json'; do
+      for log_file in 'build-profile.json' 'build-events.json' 'test-profile.json' 'test-events.json' 'scala-test-suite-name-map.json'; do
         [[ -f "$log_file" ]] && cp "$log_file" "$target_dir/$log_file" || echo "$log_file not found"
       done
       cd "$(Build.StagingDirectory)"
diff --git a/infra/es_cluster.tf b/infra/es_cluster.tf
index bec74648bf89..ff666dd74f2b 100644
--- a/infra/es_cluster.tf
+++ b/infra/es_cluster.tf
@@ -781,6 +781,23 @@ bulk_upload() {
   echo "$res"
 }
 
+patch() {
+  local job map
+  job="$1"
+  # Replace shortened Scala test names by their long names.
+  # See //bazel_tools:scala.bzl%da_scala_test_short_name_aspect.
+  map="scala-test-suite-name-map.json"
+  if ! [[ -f "$job/$map" ]]; then
+    echo "$job: no $map"
+  else
+    echo "$job: applying $map"
+    # Generates a sed command to replace short labels by long labels.
+    jq_command='to_entries | map("s|\(.key)\\b|\(.value)|g") | join(";")'
+    sed_command="$(jq -r "$jq_command" <"$job/$map")"
+    sed -i "$sed_command" "$job"/{build-events,build-profile,test-events,test-profile}.json
+  fi
+}
+
 push() {
   local job f pids
   job="$1"
@@ -852,6 +869,7 @@ for tar in $todo; do
     ensure_index "$job" "$(index "$job" jobs)"
     ensure_index "$job" "$(index "$job" events)"
     tar --force-local -x -z -f "$(basename "$tar")"
+    patch "$job"
     push "$job"
     rm -rf $job
     r=$(curl -H 'Content-Type: application/json' \