Skip to content

Commit

Permalink
Add dependency inference for Shell (#11857)
Browse files Browse the repository at this point in the history
This uses a suggestion from @jsirois to run Shellcheck in isolation against each file, so that Shellcheck reports unrecognized source files, which we then extract the paths from the JSON error message.

Outside of this new parsing logic, this is mostly copy pasta from our Protobuf<->Protobuf dependency inference implementation.

[ci skip-build-wheels]
  • Loading branch information
Eric-Arellano authored Apr 8, 2021
1 parent 7ef8880 commit 1d6e3bb
Show file tree
Hide file tree
Showing 8 changed files with 374 additions and 15 deletions.
9 changes: 2 additions & 7 deletions BUILD
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
# Copyright 2019 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

shell_library(name="scripts", sources=["cargo", "pants"])

# We use this to establish the build root, rather than `./pants`, because we cannot safely use the
# latter as the sentinel filename per https://github.com/pantsbuild/pants/pull/8105.
files(name='build_root', sources=["BUILD_ROOT"])

files(name='gitignore', sources=['.gitignore'])
files(name='pants_toml', sources=['pants.toml'])
files(name='pyproject', sources=['pyproject.toml'])

shell_library(
name="pants",
sources=["pants"],
dependencies=["build-support/common.sh", "build-support/pants_venv", "build-support/bin/rust/bootstrap_code.sh"],
)
shell_library(name="cargo", sources=["cargo"], dependencies=["build-support/common.sh"])
5 changes: 1 addition & 4 deletions build-support/bin/BUILD
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
# Copyright 2019 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

shell_library(
name="sh_scripts",
dependencies=["build-support/common.sh", "build-support/pants_venv", "build-support/bin/rust/bootstrap_code.sh"],
)
shell_library(name="sh_scripts")

python_library(name="py_scripts")
python_tests(name="py_tests", timeout=90) # reversion_test.py times out occasionally.
Expand Down
2 changes: 1 addition & 1 deletion build-support/bin/rust/BUILD
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

shell_library(dependencies=["build-support/common.sh", "build-support/pants_venv"])
shell_library()
2 changes: 1 addition & 1 deletion build-support/githooks/BUILD
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

shell_library(sources=["pre-commit", "prepare-commit-msg"], dependencies=["build-support/common.sh"])
shell_library(sources=["pre-commit", "prepare-commit-msg"])
199 changes: 199 additions & 0 deletions src/python/pants/backend/shell/dependency_inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import annotations

import json
import logging
import re
from collections import defaultdict
from dataclasses import dataclass
from typing import DefaultDict

from pants.backend.shell.lint.shellcheck.subsystem import Shellcheck
from pants.backend.shell.shell_setup import ShellSetup
from pants.backend.shell.target_types import ShellSources
from pants.base.specs import AddressSpecs, DescendantAddresses
from pants.core.util_rules.external_tool import DownloadedExternalTool, ExternalToolRequest
from pants.engine.addresses import Address
from pants.engine.collection import DeduplicatedCollection
from pants.engine.fs import Digest, DigestSubset, MergeDigests, PathGlobs
from pants.engine.platform import Platform
from pants.engine.process import FallibleProcessResult, Process
from pants.engine.rules import Get, MultiGet, collect_rules, rule
from pants.engine.target import (
Dependencies,
DependenciesRequest,
ExplicitlyProvidedDependencies,
HydratedSources,
HydrateSourcesRequest,
InferDependenciesRequest,
InferredDependencies,
SourcesPaths,
SourcesPathsRequest,
Targets,
WrappedTarget,
)
from pants.engine.unions import UnionRule
from pants.util.frozendict import FrozenDict
from pants.util.logging import LogLevel
from pants.util.ordered_set import OrderedSet

logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class ShellMapping:
"""A mapping of Shell file names to their owning file address."""

mapping: FrozenDict[str, Address]
ambiguous_modules: FrozenDict[str, tuple[Address, ...]]


@rule(desc="Creating map of Shell file names to Shell targets", level=LogLevel.DEBUG)
async def map_shell_files() -> ShellMapping:
all_expanded_targets = await Get(Targets, AddressSpecs([DescendantAddresses("")]))
shell_tgts = tuple(tgt for tgt in all_expanded_targets if tgt.has_field(ShellSources))
sources_per_target = await MultiGet(
Get(SourcesPaths, SourcesPathsRequest(tgt[ShellSources])) for tgt in shell_tgts
)

files_to_addresses: dict[str, Address] = {}
files_with_multiple_owners: DefaultDict[str, set[Address]] = defaultdict(set)
for tgt, sources in zip(shell_tgts, sources_per_target):
for f in sources.files:
if f in files_to_addresses:
files_with_multiple_owners[f].update({files_to_addresses[f], tgt.address})
else:
files_to_addresses[f] = tgt.address

# Remove files with ambiguous owners.
for ambiguous_f in files_with_multiple_owners:
files_to_addresses.pop(ambiguous_f)

return ShellMapping(
mapping=FrozenDict(sorted(files_to_addresses.items())),
ambiguous_modules=FrozenDict(
(k, tuple(sorted(v))) for k, v in sorted(files_with_multiple_owners.items())
),
)


class ParsedShellImports(DeduplicatedCollection):
sort_input = True


@dataclass(frozen=True)
class ParseShellImportsRequest:
# NB: We parse per-file, rather than per-target. This is necessary so that we can have each
# file in complete isolation without its sibling files present so that Shellcheck errors when
# trying to source a sibling file, which then allows us to extract that path.
digest: Digest
fp: str


PATH_FROM_SHELLCHECK_ERROR = re.compile(r"Not following: (.+) was not specified as input")


@rule
async def parse_shell_imports(
request: ParseShellImportsRequest, shellcheck: Shellcheck
) -> ParsedShellImports:
# We use Shellcheck to parse for us by running it against each file in isolation, which means
# that all `source` statements will error. Then, we can extract the problematic paths from the
# JSON output.
downloaded_shellcheck = await Get(
DownloadedExternalTool, ExternalToolRequest, shellcheck.get_request(Platform.current)
)
input_digest = await Get(Digest, MergeDigests([request.digest, downloaded_shellcheck.digest]))
process_result = await Get(
FallibleProcessResult,
Process(
# NB: We do not load up `[shellcheck].{args,config}` because it would risk breaking
# determinism of dependency inference in an unexpected way.
[downloaded_shellcheck.exe, "--format=json", request.fp],
input_digest=input_digest,
description=f"Detect Shell imports for {request.fp}",
level=LogLevel.DEBUG,
),
)

try:
output = json.loads(process_result.stdout)
except json.JSONDecodeError:
logger.error(
f"Parsing {request.fp} for dependency inference failed because Shellcheck's output "
f"could not be loaded as JSON. Please open a GitHub issue at "
f"https://github.com/pantsbuild/pants/issues/new with this error message attached.\n\n"
f"\nshellcheck version: {shellcheck.version}\n"
f"process_result.stdout: {process_result.stdout.decode()}"
)
return ParsedShellImports()

paths = set()
for error in output:
if not error.get("code", "") == 1091:
continue
msg = error.get("message", "")
matches = PATH_FROM_SHELLCHECK_ERROR.match(msg)
if matches:
paths.add(matches.group(1))
else:
logger.error(
f"Parsing {request.fp} for dependency inference failed because Shellcheck's error "
f"message was not in the expected format. Please open a GitHub issue at "
f"https://github.com/pantsbuild/pants/issues/new with this error message "
f"attached.\n\n\nshellcheck version: {shellcheck.version}\n"
f"error JSON entry: {error}"
)
return ParsedShellImports(paths)


class InferShellDependencies(InferDependenciesRequest):
infer_from = ShellSources


@rule(desc="Inferring Shell dependencies by analyzing imports")
async def infer_shell_dependencies(
request: InferShellDependencies, shell_mapping: ShellMapping, shell_setup: ShellSetup
) -> InferredDependencies:
if not shell_setup.dependency_inference:
return InferredDependencies([], sibling_dependencies_inferrable=False)

address = request.sources_field.address
wrapped_tgt = await Get(WrappedTarget, Address, address)
explicitly_provided_deps, hydrated_sources = await MultiGet(
Get(ExplicitlyProvidedDependencies, DependenciesRequest(wrapped_tgt.target[Dependencies])),
Get(HydratedSources, HydrateSourcesRequest(request.sources_field)),
)
per_file_digests = await MultiGet(
Get(Digest, DigestSubset(hydrated_sources.snapshot.digest, PathGlobs([f])))
for f in hydrated_sources.snapshot.files
)
all_detected_imports = await MultiGet(
Get(ParsedShellImports, ParseShellImportsRequest(digest, f))
for digest, f in zip(per_file_digests, hydrated_sources.snapshot.files)
)

result: OrderedSet[Address] = OrderedSet()
for detected_imports in all_detected_imports:
for import_path in detected_imports:
unambiguous = shell_mapping.mapping.get(import_path)
ambiguous = shell_mapping.ambiguous_modules.get(import_path)
if unambiguous:
result.add(unambiguous)
elif ambiguous:
explicitly_provided_deps.maybe_warn_of_ambiguous_dependency_inference(
ambiguous,
address,
import_reference="file",
context=f"The target {address} sources `{import_path}`",
)
maybe_disambiguated = explicitly_provided_deps.disambiguated_via_ignores(ambiguous)
if maybe_disambiguated:
result.add(maybe_disambiguated)
return InferredDependencies(sorted(result), sibling_dependencies_inferrable=True)


def rules():
return (*collect_rules(), UnionRule(InferDependenciesRequest, InferShellDependencies))
154 changes: 154 additions & 0 deletions src/python/pants/backend/shell/dependency_inference_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import annotations

from textwrap import dedent

import pytest

from pants.backend.shell import dependency_inference
from pants.backend.shell.dependency_inference import (
InferShellDependencies,
ParsedShellImports,
ParseShellImportsRequest,
ShellMapping,
)
from pants.backend.shell.target_types import ShellLibrary, ShellSources
from pants.core.util_rules import external_tool
from pants.engine.addresses import Address
from pants.engine.target import InferredDependencies
from pants.testutil.rule_runner import QueryRule, RuleRunner
from pants.util.frozendict import FrozenDict


@pytest.fixture
def rule_runner() -> RuleRunner:
return RuleRunner(
rules=[
*dependency_inference.rules(),
*external_tool.rules(),
QueryRule(ShellMapping, []),
QueryRule(ParsedShellImports, [ParseShellImportsRequest]),
QueryRule(InferredDependencies, [InferShellDependencies]),
],
target_types=[ShellLibrary],
)


def test_shell_mapping(rule_runner: RuleRunner) -> None:
rule_runner.write_files(
{
# Two Shell files belonging to the same target. We should use two file addresses.
"a/f1.sh": "",
"a/f2.sh": "",
"a/BUILD": "shell_library()",
# >1 target owns the same file, so it's ambiguous.
"b/f.sh": "",
"b/BUILD": "shell_library(name='t1')\nshell_library(name='t2')",
}
)
result = rule_runner.request(ShellMapping, [])
assert result == ShellMapping(
mapping=FrozenDict(
{
"a/f1.sh": Address("a", relative_file_path="f1.sh"),
"a/f2.sh": Address("a", relative_file_path="f2.sh"),
}
),
ambiguous_modules=FrozenDict(
{
"b/f.sh": (
Address("b", target_name="t1", relative_file_path="f.sh"),
Address("b", target_name="t2", relative_file_path="f.sh"),
)
}
),
)


def test_parse_imports(rule_runner: RuleRunner) -> None:
def parse(content: str) -> set[str]:
snapshot = rule_runner.make_snapshot({"subdir/f.sh": content})
return set(
rule_runner.request(
ParsedShellImports, [ParseShellImportsRequest(snapshot.digest, "subdir/f.sh")]
)
)

assert not parse("")
assert not parse("#!/usr/bin/env bash")
assert not parse("def python():\n print('hi')")
assert parse("source a/b.sh") == {"a/b.sh"}
assert parse(". a/b.sh") == {"a/b.sh"}
assert parse("source ../parent.sh") == {"../parent.sh"}
assert parse("echo foo\nsource foo.sh\necho bar; source bar.sh") == {"foo.sh", "bar.sh"}

# Can use a Shellcheck directive to fix unrecognized imports.
assert not parse("source ${FOO}")
assert parse("# shellcheck source=a/b.sh\nsource ${FOO}") == {"a/b.sh"}


def test_dependency_inference(rule_runner: RuleRunner, caplog) -> None:
rule_runner.write_files(
{
"a/f1.sh": dedent(
"""\
source b/f.sh
source unknown/f.sh
"""
),
"a/f2.sh": "source a/f1.sh",
"a/BUILD": "shell_library()",
"b/f.sh": "",
"b/BUILD": "shell_library()",
# Test handling of ambiguous imports. We should warn on the ambiguous dependency, but
# not warn on the disambiguated one and should infer a dep.
"ambiguous/dep.sh": "",
"ambiguous/disambiguated.sh": "",
"ambiguous/main.sh": dedent(
"""\
source ambiguous/dep.sh
source ambiguous/disambiguated.sh
"""
),
"ambiguous/BUILD": dedent(
"""\
shell_library(name='dep1', sources=['dep.sh', 'disambiguated.sh'])
shell_library(name='dep2', sources=['dep.sh', 'disambiguated.sh'])
shell_library(
name='main',
sources=['main.sh'],
dependencies=['!./disambiguated.sh:dep2'],
)
"""
),
}
)

def run_dep_inference(address: Address) -> InferredDependencies:
tgt = rule_runner.get_target(address)
return rule_runner.request(
InferredDependencies, [InferShellDependencies(tgt[ShellSources])]
)

build_address = Address("a")
assert run_dep_inference(build_address) == InferredDependencies(
[Address("b", relative_file_path="f.sh"), Address("a", relative_file_path="f1.sh")],
sibling_dependencies_inferrable=True,
)

file_address = Address("a", relative_file_path="f1.sh")
assert run_dep_inference(file_address) == InferredDependencies(
[Address("b", relative_file_path="f.sh")], sibling_dependencies_inferrable=True
)

caplog.clear()
assert run_dep_inference(Address("ambiguous", target_name="main")) == InferredDependencies(
[Address("ambiguous", target_name="dep1", relative_file_path="disambiguated.sh")],
sibling_dependencies_inferrable=True,
)
assert len(caplog.records) == 1
assert "The target ambiguous:main sources `ambiguous/dep.sh`" in caplog.text
assert "['ambiguous/dep.sh:dep1', 'ambiguous/dep.sh:dep2']" in caplog.text
assert "disambiguated.sh" not in caplog.text
Loading

0 comments on commit 1d6e3bb

Please sign in to comment.