From 364cd0190e5803a0eea59cc2e2c3677c4c12234f Mon Sep 17 00:00:00 2001 From: Arun Babu Neelicattu Date: Fri, 8 Apr 2022 01:04:55 +0200 Subject: [PATCH 1/4] replace git command use with dulwich This change introduces dulwich as the git backend, instead of system git executable. Together with an LRU cache when inspecting git package, this considerable improves performance for dependency solver and reuse of source when project has git dependencies. In cases where dulwich fails with an HTTPUnauthorized error, Poetry falls back to system provided git client as a temporary measure. This will be replaced in the future once dulwich supports git credentials. --- .github/workflows/main.yml | 3 + poetry.lock | 49 ++- pyproject.toml | 1 + src/poetry/console/commands/init.py | 7 +- src/poetry/installation/executor.py | 23 +- src/poetry/installation/installer.py | 10 +- src/poetry/installation/pip_installer.py | 22 +- src/poetry/puzzle/provider.py | 135 ++++--- .../repositories/installed_repository.py | 9 +- src/poetry/vcs/__init__.py | 0 src/poetry/vcs/git/__init__.py | 6 + src/poetry/vcs/git/backend.py | 378 ++++++++++++++++++ src/poetry/vcs/git/system.py | 65 +++ tests/conftest.py | 24 +- tests/console/conftest.py | 6 +- tests/helpers.py | 34 +- tests/integration/__init__.py | 0 tests/integration/test_utils_vcs_git.py | 225 +++++++++++ tests/puzzle/conftest.py | 30 +- .../repositories/test_installed_repository.py | 15 +- 20 files changed, 884 insertions(+), 158 deletions(-) create mode 100644 src/poetry/vcs/__init__.py create mode 100644 src/poetry/vcs/git/__init__.py create mode 100644 src/poetry/vcs/git/backend.py create mode 100644 src/poetry/vcs/git/system.py create mode 100644 tests/integration/__init__.py create mode 100644 tests/integration/test_utils_vcs_git.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index af810ff052e..24ae5a1373e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -85,6 +85,9 @@ jobs: - name: Run pytest run: poetry run python -m pytest -p no:sugar -q tests/ + - name: Run pytest (integration suite) + run: poetry run python -m pytest -p no:sugar -q --integration tests/integration + - name: Get Plugin Version (poetry-plugin-export) id: poetry-plugin-export-version run: | diff --git a/poetry.lock b/poetry.lock index 833e94621f9..d7194f5a182 100644 --- a/poetry.lock +++ b/poetry.lock @@ -22,7 +22,7 @@ tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (> [[package]] name = "cachecontrol" -version = "0.12.10" +version = "0.12.11" description = "httplib2 caching for requests" category = "main" optional = false @@ -168,6 +168,24 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "dulwich" +version = "0.20.35" +description = "Python Git Library" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +certifi = "*" +urllib3 = ">=1.24.1" + +[package.extras] +fastimport = ["fastimport"] +https = ["urllib3[secure] (>=1.24.1)"] +pgp = ["gpg"] +watch = ["pyinotify"] + [[package]] name = "entrypoints" version = "0.3" @@ -721,7 +739,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest- [metadata] lock-version = "1.1" python-versions = "^3.7" -content-hash = "f74aedfd57d8aa47486cacfd4e2f5a24e952cfe1aee43c7b6a6d801eec5254ea" +content-hash = "2bf89b93e12d19fdadc3799785ef9cae5fd5d3d964ac2cfc4861b5e9d7e9554a" [metadata.files] atomicwrites = [ @@ -733,8 +751,8 @@ attrs = [ {file = "attrs-21.4.0.tar.gz", hash = "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"}, ] cachecontrol = [ - {file = "CacheControl-0.12.10-py2.py3-none-any.whl", hash = "sha256:b0d43d8f71948ef5ebdee5fe236b86c6ffc7799370453dccb0e894c20dfa487c"}, - {file = "CacheControl-0.12.10.tar.gz", hash = "sha256:d8aca75b82eec92d84b5d6eb8c8f66ea16f09d2adb09dbca27fe2d5fc8d3732d"}, + {file = "CacheControl-0.12.11-py2.py3-none-any.whl", hash = "sha256:2c75d6a8938cb1933c75c50184549ad42728a27e9f6b92fd677c3151aa72555b"}, + {file = "CacheControl-0.12.11.tar.gz", hash = "sha256:a5b9fcc986b184db101aa280b42ecdcdfc524892596f606858e0b7a8b4d9e144"}, ] cachy = [ {file = "cachy-0.3.0-py2.py3-none-any.whl", hash = "sha256:338ca09c8860e76b275aff52374330efedc4d5a5e45dc1c5b539c1ead0786fe7"}, @@ -889,6 +907,29 @@ distlib = [ {file = "distlib-0.3.4-py2.py3-none-any.whl", hash = "sha256:6564fe0a8f51e734df6333d08b8b94d4ea8ee6b99b5ed50613f731fd4089f34b"}, {file = "distlib-0.3.4.zip", hash = "sha256:e4b58818180336dc9c529bfb9a0b58728ffc09ad92027a3f30b7cd91e3458579"}, ] +dulwich = [ + {file = "dulwich-0.20.35-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:428b5fbb79f8cfba2f5ac6826cc813d1903b44b0780e9ec57e54cbd0f44feb61"}, + {file = "dulwich-0.20.35-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:581c6aa825c9267794747c5cc5ec3831960d96ca7fd9eb0158989e9a4099cbb1"}, + {file = "dulwich-0.20.35-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e11cc7a30b42dbbe5a0b6ebbfbfbb07138a5ffd6175bab2ddbabc9882a1c0438"}, + {file = "dulwich-0.20.35-cp310-cp310-win_amd64.whl", hash = "sha256:22c61a24edb699564b49a9701b723a08fa773f5d3322e8a0cabda897ae86816e"}, + {file = "dulwich-0.20.35-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:9759cf611503681bcdd2950c9d2db04d1c057ecbb62d6fccd095b13771864f1c"}, + {file = "dulwich-0.20.35-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d683b4f30b1dae6b1668336f62f10ff57ebf2a1252c7cc76ad3eeff973879eb"}, + {file = "dulwich-0.20.35-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9d85b6b41c4be6df9ecdc4014d3cbe78a5a44a73c97bccbefac3e5de83bb74be"}, + {file = "dulwich-0.20.35-cp36-cp36m-win_amd64.whl", hash = "sha256:6dc9b082f6ace9890de572260a575a09a996d617f5930edd2858c6f8fedfd7fb"}, + {file = "dulwich-0.20.35-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:28ac2374f09487b02a8cb9b2fad083c358fc927bcfe9803d971614bc00e25076"}, + {file = "dulwich-0.20.35-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:195b21c7a8f85cb2de8938d54fcc6d589d1ccbceaa63bb117796b531065bb68b"}, + {file = "dulwich-0.20.35-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9bdea3a4e8e5e3b1dbd513d9ab8a692f8a9a6f4760633e25c006446bce56fc5e"}, + {file = "dulwich-0.20.35-cp37-cp37m-win_amd64.whl", hash = "sha256:3d3d07b5aa51e6b7d08707c62932da86adbbaaa62552a0129b37d413735c7786"}, + {file = "dulwich-0.20.35-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:5d94cd182fb0da4ec2f182be977b27b9cc1d7dbd0ee9bbf991e101a95fdcd3d8"}, + {file = "dulwich-0.20.35-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f563e9f51e83c47a7df2f3cea79919f700e50d1e5556b6b753730b9cd2be1f47"}, + {file = "dulwich-0.20.35-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f221c3c2fd10260419905bb673cd00129d491e3ed38c7a8d3ac2c7662682dd9b"}, + {file = "dulwich-0.20.35-cp38-cp38-win_amd64.whl", hash = "sha256:c4f4c59445dc5c2341e9cb2fe35e51a890e8a5f42178abec0a96044811c558a9"}, + {file = "dulwich-0.20.35-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:3616a949053eb6bdf34581f57d1f6cb7192a4bb635be1a02c37f6f6dda032277"}, + {file = "dulwich-0.20.35-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:134a2f586847c2c58569959a784d7a875b551df4226b639267302217799e4234"}, + {file = "dulwich-0.20.35-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c008b6b562af76cf011d3b5450a0d30edc96feeee7856b081d7400bc7cf42653"}, + {file = "dulwich-0.20.35-cp39-cp39-win_amd64.whl", hash = "sha256:bf228800785754d7a55d52c5f122c26c3ced51f0f3df727fde2c9fefb71d5d76"}, + {file = "dulwich-0.20.35.tar.gz", hash = "sha256:953f6301a9df8a091fa88d55eed394a88bf9988cde8be341775354910918c196"}, +] entrypoints = [ {file = "entrypoints-0.3-py2.py3-none-any.whl", hash = "sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19"}, {file = "entrypoints-0.3.tar.gz", hash = "sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451"}, diff --git a/pyproject.toml b/pyproject.toml index 86976b24947..defd5ed760c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,7 @@ tomlkit = ">=0.7.0,<1.0.0" # exclude 20.4.5 - 20.4.6 due to https://github.com/pypa/pip/issues/9953 virtualenv = "(>=20.4.3,<20.4.5 || >=20.4.7)" urllib3 = "^1.26.0" +dulwich = "^0.20.35" [tool.poetry.dev-dependencies] tox = "^3.18" diff --git a/src/poetry/console/commands/init.py b/src/poetry/console/commands/init.py index b2271518ed9..42f8c705210 100644 --- a/src/poetry/console/commands/init.py +++ b/src/poetry/console/commands/init.py @@ -436,8 +436,13 @@ def _parse_requirements(self, requirements: list[str]) -> list[dict[str, Any]]: if extras: pair["extras"] = extras + source_root = ( + self.env.path.joinpath("src") + if isinstance(self, EnvCommand) and self.env + else None + ) package = Provider.get_package_from_vcs( - "git", url.url, rev=pair.get("rev") + "git", url=url.url, rev=pair.get("rev"), source_root=source_root ) pair["name"] = package.name result.append(pair) diff --git a/src/poetry/installation/executor.py b/src/poetry/installation/executor.py index 983a3763205..6ba757ef28c 100644 --- a/src/poetry/installation/executor.py +++ b/src/poetry/installation/executor.py @@ -576,7 +576,7 @@ def _install_directory(self, operation: Install | Update) -> int: return self.pip_install(req, upgrade=True) def _install_git(self, operation: Install | Update) -> int: - from poetry.core.vcs import Git + from poetry.vcs.git import Git package = operation.package operation_message = self.get_operation_message(operation) @@ -586,24 +586,15 @@ def _install_git(self, operation: Install | Update) -> int: ) self._write(operation, message) - src_dir = self._env.path / "src" / package.name - if src_dir.exists(): - remove_directory(src_dir, force=True) - - src_dir.parent.mkdir(exist_ok=True) - - git = Git() - git.clone(package.source_url, src_dir) - - reference = package.source_resolved_reference - if not reference: - reference = package.source_reference - - git.checkout(reference, src_dir) + source = Git.clone( + url=package.source_url, + source_root=self._env.path / "src", + revision=package.source_resolved_reference or package.source_reference, + ) # Now we just need to install from the source directory original_url = package.source_url - package._source_url = str(src_dir) + package._source_url = str(source.path) status_code = self._install_directory(operation) diff --git a/src/poetry/installation/installer.py b/src/poetry/installation/installer.py index 4a63e0767aa..0b429b69a37 100644 --- a/src/poetry/installation/installer.py +++ b/src/poetry/installation/installer.py @@ -199,7 +199,10 @@ def _do_refresh(self) -> int: self._io, ) - ops = solver.solve(use_latest=[]).calculate_operations() + with solver.provider.use_source_root( + source_root=self._env.path.joinpath("src") + ): + ops = solver.solve(use_latest=[]).calculate_operations() local_repo = Repository() self._populate_local_repo(local_repo, ops) @@ -236,7 +239,10 @@ def _do_install(self, local_repo: Repository) -> int: self._io, ) - ops = solver.solve(use_latest=self._whitelist).calculate_operations() + with solver.provider.use_source_root( + source_root=self._env.path.joinpath("src") + ): + ops = solver.solve(use_latest=self._whitelist).calculate_operations() else: self._io.write_line("Installing dependencies from lock file") diff --git a/src/poetry/installation/pip_installer.py b/src/poetry/installation/pip_installer.py index ec8899daf26..5e6d01d9f70 100644 --- a/src/poetry/installation/pip_installer.py +++ b/src/poetry/installation/pip_installer.py @@ -248,27 +248,19 @@ def install_directory(self, package: Package) -> str | int: def install_git(self, package: Package) -> None: from poetry.core.packages.package import Package - from poetry.core.vcs.git import Git - src_dir = self._env.path / "src" / package.name - if src_dir.exists(): - remove_directory(src_dir, force=True) + from poetry.vcs.git import Git - src_dir.parent.mkdir(exist_ok=True) - - git = Git() - git.clone(package.source_url, src_dir) - - reference = package.source_resolved_reference - if not reference: - reference = package.source_reference - - git.checkout(reference, src_dir) + source = Git.clone( + url=package.source_url, + source_root=self._env.path / "src", + revision=package.source_resolved_reference or package.source_reference, + ) # Now we just need to install from the source directory pkg = Package(package.name, package.version) pkg._source_type = "directory" - pkg._source_url = str(src_dir) + pkg._source_url = str(source.path) pkg.develop = package.develop self.install_directory(pkg) diff --git a/src/poetry/puzzle/provider.py b/src/poetry/puzzle/provider.py index a08e8fa7328..f14e83c50fd 100644 --- a/src/poetry/puzzle/provider.py +++ b/src/poetry/puzzle/provider.py @@ -1,5 +1,6 @@ from __future__ import annotations +import functools import logging import os import re @@ -10,7 +11,6 @@ from collections import defaultdict from contextlib import contextmanager from pathlib import Path -from tempfile import mkdtemp from typing import TYPE_CHECKING from typing import Any from typing import Iterable @@ -20,7 +20,6 @@ from poetry.core.packages.utils.utils import get_python_constraint_from_marker from poetry.core.semver.empty_constraint import EmptyConstraint from poetry.core.semver.version import Version -from poetry.core.vcs.git import Git from poetry.core.version.markers import AnyMarker from poetry.core.version.markers import MarkerUnion @@ -34,7 +33,7 @@ from poetry.packages.package_collection import PackageCollection from poetry.puzzle.exceptions import OverrideNeeded from poetry.utils.helpers import download_file -from poetry.utils.helpers import remove_directory +from poetry.vcs.git import Git if TYPE_CHECKING: @@ -61,12 +60,43 @@ def _formatter_elapsed(self) -> str: return f"{elapsed:.1f}s" +@functools.lru_cache(maxsize=None) +def _get_package_from_git( + url: str, + branch: str | None = None, + tag: str | None = None, + rev: str | None = None, + source_root: Path | None = None, +) -> Package: + source = Git.clone( + url=url, + source_root=source_root, + branch=branch, + tag=tag, + revision=rev, + clean=False, + ) + revision = Git.get_revision(source) + + package = Provider.get_package_from_directory(Path(source.path)) + package._source_type = "git" + package._source_url = url + package._source_reference = rev or tag or branch or "HEAD" + package._source_resolved_reference = revision + + return package + + class Provider: UNSAFE_PACKAGES: set[str] = set() def __init__( - self, package: Package, pool: Pool, io: Any, env: Env | None = None + self, + package: Package, + pool: Pool, + io: Any, + env: Env | None = None, ) -> None: self._package = package self._pool = pool @@ -78,6 +108,7 @@ def __init__( self._overrides: dict[DependencyPackage, dict[str, Dependency]] = {} self._deferred_cache: dict[Dependency, Package] = {} self._load_deferred = True + self._source_root: Path | None = None @property def pool(self) -> Pool: @@ -92,6 +123,15 @@ def set_overrides(self, overrides: dict) -> None: def load_deferred(self, load_deferred: bool) -> None: self._load_deferred = load_deferred + @contextmanager + def use_source_root(self, source_root: Path) -> Iterator[Provider]: + original_source_root = self._source_root + self._source_root = source_root + + yield self + + self._source_root = original_source_root + @contextmanager def use_environment(self, env: Env) -> Iterator[Provider]: original_env = self._env @@ -105,6 +145,17 @@ def use_environment(self, env: Env) -> Iterator[Provider]: self._env = original_env self._python_constraint = original_python_constraint + @staticmethod + def validate_package_for_dependency( + dependency: Dependency, package: Package + ) -> None: + if dependency.name != package.name: + # For now, the dependency's name must match the actual package's name + raise RuntimeError( + f"The dependency name for {dependency.name} does not match the actual" + f" package's name: {package.name}" + ) + def search_for( self, dependency: ( @@ -161,8 +212,12 @@ def search_for_vcs(self, dependency: VCSDependency) -> list[Package]: branch=dependency.branch, tag=dependency.tag, rev=dependency.rev, - name=dependency.name, + source_root=self._source_root + or (self._env.path.joinpath("src") if self._env else None), ) + + self.validate_package_for_dependency(dependency=dependency, package=package) + package.develop = dependency.develop dependency._constraint = package.version @@ -176,44 +231,21 @@ def search_for_vcs(self, dependency: VCSDependency) -> list[Package]: return [package] - @classmethod + @staticmethod def get_package_from_vcs( - cls, vcs: str, url: str, branch: str | None = None, tag: str | None = None, rev: str | None = None, - name: str | None = None, + source_root: Path | None = None, ) -> Package: if vcs != "git": raise ValueError(f"Unsupported VCS dependency {vcs}") - suffix = url.split("/")[-1].rstrip(".git") - tmp_dir = Path(mkdtemp(prefix=f"pypoetry-git-{suffix}")) - - try: - git = Git() - git.clone(url, tmp_dir) - reference = branch or tag or rev - if reference is not None: - git.checkout(reference, tmp_dir) - else: - reference = "HEAD" - - revision = git.rev_parse(reference, tmp_dir).strip() - - package = cls.get_package_from_directory(tmp_dir, name=name) - package._source_type = "git" - package._source_url = url - package._source_reference = reference - package._source_resolved_reference = revision - except Exception: - raise - finally: - remove_directory(tmp_dir, force=True) - - return package + return _get_package_from_git( + url=url, branch=branch, tag=tag, rev=rev, source_root=source_root + ) def search_for_file(self, dependency: FileDependency) -> list[Package]: if dependency in self._deferred_cache: @@ -228,12 +260,7 @@ def search_for_file(self, dependency: FileDependency) -> list[Package]: self._deferred_cache[dependency] = (dependency, package) - if dependency.name != package.name: - # For now, the dependency's name must match the actual package's name - raise RuntimeError( - f"The dependency name for {dependency.name} does not match the actual" - f" package's name: {package.name}" - ) + self.validate_package_for_dependency(dependency=dependency, package=package) if dependency.base is not None: package.root_dir = dependency.base @@ -263,15 +290,15 @@ def search_for_directory(self, dependency: DirectoryDependency) -> list[Package] package = _package.clone() else: - package = self.get_package_from_directory( - dependency.full_path, name=dependency.name - ) + package = self.get_package_from_directory(dependency.full_path) dependency._constraint = package.version dependency._pretty_constraint = package.version.text self._deferred_cache[dependency] = (dependency, package) + self.validate_package_for_dependency(dependency=dependency, package=package) + package.develop = dependency.develop if dependency.base is not None: @@ -280,21 +307,8 @@ def search_for_directory(self, dependency: DirectoryDependency) -> list[Package] return [package] @classmethod - def get_package_from_directory( - cls, directory: Path, name: str | None = None - ) -> Package: - package = PackageInfo.from_directory(path=directory).to_package( - root_dir=directory - ) - - if name and name != package.name: - # For now, the dependency's name must match the actual package's name - raise RuntimeError( - f"The dependency name for {name} does not match the actual package's" - f" name: {package.name}" - ) - - return package + def get_package_from_directory(cls, directory: Path) -> Package: + return PackageInfo.from_directory(path=directory).to_package(root_dir=directory) def search_for_url(self, dependency: URLDependency) -> list[Package]: if dependency in self._deferred_cache: @@ -302,12 +316,7 @@ def search_for_url(self, dependency: URLDependency) -> list[Package]: package = self.get_package_from_url(dependency.url) - if dependency.name != package.name: - # For now, the dependency's name must match the actual package's name - raise RuntimeError( - f"The dependency name for {dependency.name} does not match the actual" - f" package's name: {package.name}" - ) + self.validate_package_for_dependency(dependency=dependency, package=package) for extra in dependency.extras: if extra in package.extras: diff --git a/src/poetry/repositories/installed_repository.py b/src/poetry/repositories/installed_repository.py index e1568df41c4..d7bea9ed652 100644 --- a/src/poetry/repositories/installed_repository.py +++ b/src/poetry/repositories/installed_repository.py @@ -77,13 +77,10 @@ def get_package_paths(cls, env: Env, name: str) -> set[Path]: @classmethod def get_package_vcs_properties_from_path(cls, src: Path) -> tuple[str, str, str]: - from poetry.core.vcs.git import Git + from poetry.vcs.git import Git - git = Git() - revision = git.rev_parse("HEAD", src).strip() - url = git.remote_url(src) - - return "git", url, revision + info = Git.info(repo=src) + return "git", info.origin, info.revision @classmethod def is_vcs_package(cls, package: Path | Package, env: Env) -> bool: diff --git a/src/poetry/vcs/__init__.py b/src/poetry/vcs/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/poetry/vcs/git/__init__.py b/src/poetry/vcs/git/__init__.py new file mode 100644 index 00000000000..18ac476e2a2 --- /dev/null +++ b/src/poetry/vcs/git/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from poetry.vcs.git.backend import Git + + +__all__ = [Git.__name__] diff --git a/src/poetry/vcs/git/backend.py b/src/poetry/vcs/git/backend.py new file mode 100644 index 00000000000..716e2e0aac9 --- /dev/null +++ b/src/poetry/vcs/git/backend.py @@ -0,0 +1,378 @@ +from __future__ import annotations + +import dataclasses +import logging +import re + +from pathlib import Path +from subprocess import CalledProcessError +from typing import TYPE_CHECKING + +from dulwich import porcelain +from dulwich.client import HTTPUnauthorized +from dulwich.client import get_transport_and_path +from dulwich.config import ConfigFile +from dulwich.config import parse_submodules +from dulwich.errors import NotGitRepository +from dulwich.refs import ANNOTATED_TAG_SUFFIX +from dulwich.repo import Repo + +from poetry.console.exceptions import PoetrySimpleConsoleException +from poetry.utils.helpers import safe_rmtree + + +if TYPE_CHECKING: + from dataclasses import InitVar + + from dulwich.client import FetchPackResult + from dulwich.client import GitClient + + +logger = logging.getLogger(__name__) + + +def is_revision_sha(revision: str | None) -> bool: + return re.match(r"^\b[0-9a-f]{5,40}\b$", revision or "") is not None + + +def annotated_tag(ref: str | bytes) -> bytes: + if isinstance(ref, str): + ref = ref.encode("utf-8") + return ref + ANNOTATED_TAG_SUFFIX + + +@dataclasses.dataclass +class GitRefSpec: + branch: str | None = None + revision: str | None = None + tag: str | None = None + ref: bytes = dataclasses.field(default_factory=lambda: b"HEAD") + + def resolve(self, remote_refs: FetchPackResult) -> None: + """ + Resolve the ref using the provided remote refs. + """ + self._normalise(remote_refs=remote_refs) + self._set_head(remote_refs=remote_refs) + + def _normalise(self, remote_refs: FetchPackResult) -> None: + """ + Internal helper method to determine if given revision is + 1. a branch or tag; if so, set corresponding properties. + 2. a short sha; if so, resolve full sha and set as revision + """ + if self.revision: + ref = f"refs/tags/{self.revision}".encode() + if ref in remote_refs.refs or annotated_tag(ref) in remote_refs.refs: + # this is a tag, incorrectly specified as a revision, tags take priority + self.tag = self.revision + self.revision = None + elif ( + self.revision.encode("utf-8") in remote_refs.refs + or f"refs/heads/{self.revision}".encode() in remote_refs.refs + ): + # this is most likely a ref spec or a branch incorrectly specified + self.branch = self.revision + self.revision = None + elif ( + self.branch + and f"refs/heads/{self.branch}".encode() not in remote_refs.refs + and ( + f"refs/tags/{self.branch}".encode() in remote_refs.refs + or annotated_tag(f"refs/tags/{self.branch}") in remote_refs.refs + ) + ): + # this is a tag incorrectly specified as a branch + self.tag = self.branch + self.branch = None + + if self.revision and self.is_sha_short: + # revision is a short sha, resolve to full sha + short_sha = self.revision.encode("utf-8") + for sha in remote_refs.refs.values(): + if sha.startswith(short_sha): + self.revision = sha.decode("utf-8") + break + + def _set_head(self, remote_refs: FetchPackResult) -> None: + """ + Internal helper method to populate ref and set it's sha as the remote's head + and default ref. + """ + self.ref = remote_refs.symrefs[b"HEAD"] + + if self.revision: + head = self.revision.encode("utf-8") + else: + if self.tag: + ref = f"refs/tags/{self.tag}".encode() + annotated = annotated_tag(ref) + self.ref = annotated if annotated in remote_refs.refs else ref + elif self.branch: + self.ref = ( + self.branch.encode("utf-8") + if self.is_ref + else f"refs/heads/{self.branch}".encode() + ) + head = remote_refs.refs[self.ref] + + remote_refs.refs[self.ref] = remote_refs.refs[b"HEAD"] = head + + @property + def key(self) -> str: + return self.revision or self.branch or self.tag or self.ref.decode("utf-8") + + @property + def is_sha(self) -> bool: + return is_revision_sha(revision=self.revision) + + @property + def is_ref(self) -> bool: + return self.branch is not None and self.branch.startswith("refs/") + + @property + def is_sha_short(self) -> bool: + return self.revision is not None and self.is_sha and len(self.revision) < 40 + + +@dataclasses.dataclass +class GitRepoLocalInfo: + repo: InitVar[Repo | Path | str] + origin: str = dataclasses.field(init=False) + revision: str = dataclasses.field(init=False) + + def __post_init__(self, repo: Repo | Path | str) -> None: + repo = Git.as_repo(repo=repo) if not isinstance(repo, Repo) else repo + self.origin = Git.get_remote_url(repo=repo, remote="origin") + self.revision = Git.get_revision(repo=repo) + + +class Git: + @staticmethod + def as_repo(repo: Path | str) -> Repo: + return Repo(repo) + + @staticmethod + def get_remote_url(repo: Repo, remote: str = "origin") -> str: + with repo: + config = repo.get_config() + section = (b"remote", remote.encode("utf-8")) + return config.get(section, b"url") if config.has_section(section) else "" + + @staticmethod + def get_revision(repo: Repo) -> str: + with repo: + return repo.head().decode("utf-8") + + @classmethod + def info(cls, repo: Repo | Path | str) -> GitRepoLocalInfo: + return GitRepoLocalInfo(repo=repo) + + @staticmethod + def get_name_from_source_url(url: str) -> str: + return re.sub(r"(.git)?$", "", url.rsplit("/", 1)[-1]) + + @classmethod + def _fetch_remote_refs(cls, url: str, local: Repo) -> FetchPackResult: + """ + Helper method to fetch remote refs. + """ + client: GitClient + path: str + client, path = get_transport_and_path(url) + + with local: + return client.fetch( + path, + local, + determine_wants=local.object_store.determine_wants_all, + ) + + @staticmethod + def _clone_legacy(url: str, refspec: GitRefSpec, target: Path) -> Repo: + """ + Helper method to facilitate fallback to using system provided git client via + subprocess calls. + """ + from poetry.vcs.git.system import SystemGit + + if target.exists(): + safe_rmtree(path=target, ignore_errors=True) + + revision = refspec.tag or refspec.branch or refspec.revision or "HEAD" + + try: + SystemGit.clone(url, target) + except CalledProcessError: + raise PoetrySimpleConsoleException( + f"Failed to clone {url}, check your git configuration and permissions" + " for this repository." + ) + + if revision: + revision.replace("refs/head/", "") + revision.replace("refs/tags/", "") + + try: + SystemGit.checkout(revision, target) + except CalledProcessError: + raise PoetrySimpleConsoleException( + f"Failed to checkout {url} at '{revision}'" + ) + + return Repo(target) + + @classmethod + def _clone(cls, url: str, refspec: GitRefSpec, target: Path) -> Repo: + """ + Helper method to clone a remove repository at the given `url` at the specified + ref spec. + """ + if not target.exists(): + local = Repo.init(target, mkdir=True) + porcelain.remote_add(local, "origin", url) + else: + local = Repo(target) + + remote_refs = cls._fetch_remote_refs(url=url, local=local) + + try: + refspec.resolve(remote_refs=remote_refs) + except KeyError: # branch / ref does not exist + raise PoetrySimpleConsoleException( + f"Failed to clone {url} at '{refspec.key}'" + ) + + # ensure local HEAD matches remote + local.refs[b"HEAD"] = remote_refs.refs[b"HEAD"] + + if refspec.is_ref: + # set ref to current HEAD + local.refs[refspec.ref] = local.refs[b"HEAD"] + + for base, prefix in { + (b"refs/remotes/origin", b"refs/heads/"), + (b"refs/tags", b"refs/tags"), + }: + local.refs.import_refs( + base=base, + other={ + n[len(prefix) :]: v + for (n, v) in remote_refs.refs.items() + if n.startswith(prefix) and not n.endswith(ANNOTATED_TAG_SUFFIX) + }, + ) + + try: + with local: + local.reset_index() + except (AssertionError, KeyError) as e: + # this implies the ref we need does not exist or is invalid + if isinstance(e, KeyError): + # the local copy is at a bad state, lets remove it + safe_rmtree(local.path, ignore_errors=True) + + if isinstance(e, AssertionError) and "Invalid object name" not in str(e): + raise + + raise PoetrySimpleConsoleException( + f"Failed to clone {url} at '{refspec.key}'" + ) + + return local + + @classmethod + def _clone_submodules(cls, repo: Repo) -> None: + """ + Helper method to identify configured submodules and clone them recursively. + """ + repo_root = Path(repo.path) + modules_config = repo_root.joinpath(".gitmodules") + + if modules_config.exists(): + config = ConfigFile.from_path(modules_config) + + url: bytes + path: bytes + for path, url, _ in parse_submodules(config): + path_relative = Path(path.decode("utf-8")) + path_absolute = repo_root.joinpath(path_relative) + + source_root = path_absolute.parent + source_root.mkdir(parents=True, exist_ok=True) + + with repo: + revision = repo.open_index()[path].sha.decode("utf-8") + + cls.clone( + url=url.decode("utf-8"), + source_root=source_root, + name=path_relative.name, + revision=revision, + clean=path_absolute.exists() + and not path_absolute.joinpath(".git").is_dir(), + ) + + @classmethod + def clone( + cls, + url: str, + name: str | None = None, + branch: str | None = None, + tag: str | None = None, + revision: str | None = None, + source_root: Path | None = None, + clean: bool = False, + ) -> Repo: + if not source_root: + from poetry.factory import Factory + + source_root = Path(Factory.create_config().get("cache-dir")) / "src" + + source_root.mkdir(parents=True, exist_ok=True) + + name = name or cls.get_name_from_source_url(url=url) + target = source_root / name + refspec = GitRefSpec(branch=branch, revision=revision, tag=tag) + + if target.exists(): + if clean: + # force clean the local copy if it exists, do not reuse + safe_rmtree(target, ignore_errors=True) + else: + # check if the current local copy matches the requested ref spec + try: + current_repo = Repo(target) + + with current_repo: + current_sha = current_repo.head().decode("utf-8") + except (NotGitRepository, AssertionError, KeyError): + # something is wrong with the current checkout, clean it + safe_rmtree(target, ignore_errors=True) + else: + if not is_revision_sha(revision=current_sha): + # head is not a sha, this will cause issues later, lets reset + safe_rmtree(target, ignore_errors=True) + elif refspec.is_sha and current_sha.startswith(refspec.revision): + # if revision is used short-circuit remote fetch head matches + return current_repo + + try: + local = cls._clone(url=url, refspec=refspec, target=target) + cls._clone_submodules(repo=local) + except HTTPUnauthorized: + # we do this here to handle http authenticated repositories as dulwich + # does not currently support using credentials from git-credential helpers. + # upstream issue: https://github.com/jelmer/dulwich/issues/873 + # + # this is a little inefficient, however preferred as this is transparent + # without additional configuration or changes for existing projects that + # use http basic auth credentials. + logger.debug( + "Unable to fetch from private repository '{%s}', falling back to" + " system git", + url, + ) + local = cls._clone_legacy(url=url, refspec=refspec, target=target) + + return local diff --git a/src/poetry/vcs/git/system.py b/src/poetry/vcs/git/system.py new file mode 100644 index 00000000000..0b75d5582d9 --- /dev/null +++ b/src/poetry/vcs/git/system.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import subprocess + +from typing import TYPE_CHECKING + +from dulwich.client import find_git_command + + +if TYPE_CHECKING: + from pathlib import Path + from typing import Any + + +class SystemGit: + @classmethod + def clone(cls, repository: str, dest: Path) -> str: + cls._check_parameter(repository) + + return cls.run("clone", "--recurse-submodules", "--", repository, str(dest)) + + @classmethod + def checkout(cls, rev: str, target: Path | None = None) -> str: + args = [] + + if target: + args += [ + "--git-dir", + (target / ".git").as_posix(), + "--work-tree", + target.as_posix(), + ] + + cls._check_parameter(rev) + + args += ["checkout", rev] + + return cls.run(*args) + + @staticmethod + def run(*args: Any, **kwargs: Any) -> str: + folder = kwargs.pop("folder", None) + if folder: + args = ( + "--git-dir", + (folder / ".git").as_posix(), + "--work-tree", + folder.as_posix(), + ) + args + + return ( + subprocess.check_output( + find_git_command() + list(args), stderr=subprocess.STDOUT + ) + .decode() + .strip() + ) + + @staticmethod + def _check_parameter(parameter: str) -> None: + """ + Checks a git parameter to avoid unwanted code execution. + """ + if parameter.strip().startswith("-"): + raise RuntimeError(f"Invalid Git parameter: {parameter}") diff --git a/tests/conftest.py b/tests/conftest.py index 6253f50fbd2..b4664b340aa 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -38,6 +38,8 @@ if TYPE_CHECKING: + from _pytest.config import Config as PyTestConfig + from _pytest.config.argparsing import Parser from pytest_mock import MockerFixture from poetry.poetry import Poetry @@ -45,6 +47,23 @@ from tests.types import ProjectFactory +def pytest_addoption(parser: Parser) -> None: + parser.addoption( + "--integration", + action="store_true", + dest="integration", + default=False, + help="enable integration tests", + ) + + +def pytest_configure(config: PyTestConfig) -> None: + config.addinivalue_line("markers", "integration: mark integration tests") + + if not config.option.integration: + config.option.markexpr = "not integration" + + class Config(BaseConfig): def get(self, setting_name: str, default: Any = None) -> Any: self.merge(self._config_source.config) @@ -252,9 +271,8 @@ def isolate_environ() -> Iterator[None]: @pytest.fixture(autouse=True) def git_mock(mocker: MockerFixture) -> None: # Patch git module to not actually clone projects - mocker.patch("poetry.core.vcs.git.Git.clone", new=mock_clone) - mocker.patch("poetry.core.vcs.git.Git.checkout", new=lambda *_: None) - p = mocker.patch("poetry.core.vcs.git.Git.rev_parse") + mocker.patch("poetry.vcs.git.Git.clone", new=mock_clone) + p = mocker.patch("poetry.vcs.git.Git.get_revision") p.return_value = "9cf87a285a2d3fbb0b9fa621997b3acc3631ed24" diff --git a/tests/console/conftest.py b/tests/console/conftest.py index 697f7badf81..022b14a2ebc 100644 --- a/tests/console/conftest.py +++ b/tests/console/conftest.py @@ -71,9 +71,8 @@ def setup( p.return_value = installed # Patch git module to not actually clone projects - mocker.patch("poetry.core.vcs.git.Git.clone", new=mock_clone) - mocker.patch("poetry.core.vcs.git.Git.checkout", new=lambda *_: None) - p = mocker.patch("poetry.core.vcs.git.Git.rev_parse") + mocker.patch("poetry.vcs.git.Git.clone", new=mock_clone) + p = mocker.patch("poetry.vcs.git.Git.get_revision") p.return_value = "9cf87a285a2d3fbb0b9fa621997b3acc3631ed24" # Patch the virtual environment creation do actually do nothing @@ -99,6 +98,7 @@ def project_directory() -> str: @pytest.fixture def poetry(repo: TestRepository, project_directory: str, config: Config) -> Poetry: + p = Factory().create_poetry( Path(__file__).parent.parent / "fixtures" / project_directory ) diff --git a/tests/helpers.py b/tests/helpers.py index 08eb045baba..1644d1aed27 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -1,6 +1,7 @@ from __future__ import annotations import os +import re import shutil import urllib.parse @@ -90,19 +91,34 @@ def copy_or_symlink(source: Path, dest: Path) -> None: os.symlink(str(source), str(dest)) -def mock_clone(_: Any, source: str, dest: Path) -> None: +class MockDulwichRepo: + def __init__(self, root: Path | str, **__: Any) -> None: + self.path = str(root) + + def head(self) -> bytes: + return b"9cf87a285a2d3fbb0b9fa621997b3acc3631ed24" + + +def mock_clone( + url: str, + *_: Any, + source_root: Path | None = None, + **__: Any, +) -> MockDulwichRepo: # Checking source to determine which folder we need to copy - parsed = ParsedUrl.parse(source) + parsed = ParsedUrl.parse(url) + path = re.sub(r"(.git)?$", "", parsed.pathname.lstrip("/")) + + folder = Path(__file__).parent / "fixtures" / "git" / parsed.resource / path + + if not source_root: + source_root = Path(Factory.create_config().get("cache-dir")) / "src" - folder = ( - Path(__file__).parent - / "fixtures" - / "git" - / parsed.resource - / parsed.pathname.lstrip("/").rstrip(".git") - ) + dest = source_root / path + dest.parent.mkdir(parents=True, exist_ok=True) copy_or_symlink(folder, dest) + return MockDulwichRepo(dest) def mock_download(url: str, dest: str, **__: Any) -> None: diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_utils_vcs_git.py b/tests/integration/test_utils_vcs_git.py new file mode 100644 index 00000000000..a5bdd21c8fb --- /dev/null +++ b/tests/integration/test_utils_vcs_git.py @@ -0,0 +1,225 @@ +from __future__ import annotations + +import uuid + +from copy import deepcopy +from hashlib import sha1 +from pathlib import Path +from typing import TYPE_CHECKING + +import pytest + +from dulwich.client import HTTPUnauthorized +from dulwich.client import get_transport_and_path +from dulwich.repo import Repo +from poetry.core.pyproject.toml import PyProjectTOML + +from poetry.console.exceptions import PoetrySimpleConsoleException +from poetry.vcs.git import Git +from poetry.vcs.git.backend import GitRefSpec + + +if TYPE_CHECKING: + from _pytest.tmpdir import TempdirFactory + from dulwich.client import FetchPackResult + from dulwich.client import GitClient + from pytest_mock import MockerFixture + + from tests.conftest import Config + + +# these tests are integration as they rely on an external repository +# see `source_url` fixture +pytestmark = pytest.mark.integration + + +@pytest.fixture(autouse=True) +def git_mock() -> None: + pass + + +@pytest.fixture(autouse=True) +def setup(config: Config) -> None: + pass + + +REVISION_TO_VERSION_MAP = { + "b6204750a763268e941cec1f05f8986b6c66913e": "0.1.0", # Annotated Tag + "18d3ff247d288da701fc7f9ce2ec718388fca266": "0.1.1-alpha.0", + "dd07e8d4efb82690e7975b289917a7782fbef29b": "0.2.0-alpha.0", + "7263819922b4cd008afbb447f425a562432dad7d": "0.2.0-alpha.1", +} + +BRANCH_TO_REVISION_MAP = {"0.1": "18d3ff247d288da701fc7f9ce2ec718388fca266"} + +TAG_TO_REVISION_MAP = {"v0.1.0": "b6204750a763268e941cec1f05f8986b6c66913e"} + +REF_TO_REVISION_MAP = { + "branch": BRANCH_TO_REVISION_MAP, + "tag": TAG_TO_REVISION_MAP, +} + + +@pytest.fixture(scope="module") +def source_url() -> str: + return "https://github.com/python-poetry/test-fixture-vcs-repository.git" + + +@pytest.fixture(scope="module") +def source_directory_name(source_url: str) -> str: + return Git.get_name_from_source_url(url=source_url) + + +@pytest.fixture(scope="module") +def local_repo(tmpdir_factory: TempdirFactory, source_directory_name: str) -> Repo: + with Repo.init( + tmpdir_factory.mktemp("src") / source_directory_name, mkdir=True + ) as repo: + yield repo + + +@pytest.fixture(scope="module") +def _remote_refs(source_url: str, local_repo: Repo) -> FetchPackResult: + client: GitClient + path: str + client, path = get_transport_and_path(source_url) + return client.fetch( + path, local_repo, determine_wants=local_repo.object_store.determine_wants_all + ) + + +@pytest.fixture +def remote_refs(_remote_refs: FetchPackResult) -> FetchPackResult: + return deepcopy(_remote_refs) + + +@pytest.fixture(scope="module") +def remote_default_ref(_remote_refs: FetchPackResult) -> bytes: + return _remote_refs.symrefs[b"HEAD"] + + +@pytest.fixture(scope="module") +def remote_default_branch(remote_default_ref: bytes) -> str: + return remote_default_ref.decode("utf-8").replace("refs/heads/", "") + + +def test_git_clone_default_branch_head( + source_url: str, remote_refs: FetchPackResult, remote_default_ref: bytes +): + with Git.clone(url=source_url) as repo: + assert remote_refs.refs[remote_default_ref] == repo.head() + + +def test_git_clone_fails_for_non_existent_branch(source_url: str): + branch = uuid.uuid4().hex + + with pytest.raises(PoetrySimpleConsoleException) as e: + Git.clone(url=source_url, branch=branch) + + assert f"Failed to clone {source_url} at '{branch}'" in str(e.value) + + +def test_git_clone_fails_for_non_existent_revision(source_url: str): + revision = sha1(uuid.uuid4().bytes).hexdigest() + + with pytest.raises(PoetrySimpleConsoleException) as e: + Git.clone(url=source_url, revision=revision) + + assert f"Failed to clone {source_url} at '{revision}'" in str(e.value) + + +def assert_version(repo: Repo, expected_revision: str) -> None: + version = PyProjectTOML( + path=Path(repo.path).joinpath("pyproject.toml") + ).poetry_config["version"] + + revision = Git.get_revision(repo=repo) + + assert revision == expected_revision + assert revision in REVISION_TO_VERSION_MAP + assert version == REVISION_TO_VERSION_MAP[revision] + + +def test_git_clone_when_branch_is_ref(source_url: str) -> None: + with Git.clone(url=source_url, branch="refs/heads/0.1") as repo: + assert_version(repo, BRANCH_TO_REVISION_MAP["0.1"]) + + +@pytest.mark.parametrize("branch", [*BRANCH_TO_REVISION_MAP.keys()]) +def test_git_clone_branch( + source_url: str, remote_refs: FetchPackResult, branch: str +) -> None: + with Git.clone(url=source_url, branch=branch) as repo: + assert_version(repo, BRANCH_TO_REVISION_MAP[branch]) + + +@pytest.mark.parametrize("tag", [*TAG_TO_REVISION_MAP.keys()]) +def test_git_clone_tag(source_url: str, remote_refs: FetchPackResult, tag: str) -> None: + with Git.clone(url=source_url, tag=tag) as repo: + assert_version(repo, TAG_TO_REVISION_MAP[tag]) + + +def test_git_clone_multiple_times( + source_url: str, remote_refs: FetchPackResult +) -> None: + for revision in REVISION_TO_VERSION_MAP: + with Git.clone(url=source_url, revision=revision) as repo: + assert_version(repo, revision) + + +def test_git_clone_revision_is_branch( + source_url: str, remote_refs: FetchPackResult +) -> None: + with Git.clone(url=source_url, revision="0.1") as repo: + assert_version(repo, BRANCH_TO_REVISION_MAP["0.1"]) + + +def test_git_clone_revision_is_ref( + source_url: str, remote_refs: FetchPackResult +) -> None: + with Git.clone(url=source_url, revision="refs/heads/0.1") as repo: + assert_version(repo, BRANCH_TO_REVISION_MAP["0.1"]) + + +@pytest.mark.parametrize( + ("revision", "expected_revision"), + [ + ("0.1", BRANCH_TO_REVISION_MAP["0.1"]), + ("v0.1.0", TAG_TO_REVISION_MAP["v0.1.0"]), + *zip(REVISION_TO_VERSION_MAP, REVISION_TO_VERSION_MAP), + ], +) +def test_git_clone_revision_is_tag( + source_url: str, remote_refs: FetchPackResult, revision: str, expected_revision: str +) -> None: + with Git.clone(url=source_url, revision=revision) as repo: + assert_version(repo, expected_revision) + + +def test_git_clone_clones_submodules(source_url: str) -> None: + with Git.clone(url=source_url) as repo: + submodule_package_directory = ( + Path(repo.path) / "submodules" / "sample-namespace-packages" + ) + + assert submodule_package_directory.exists() + assert submodule_package_directory.joinpath("README.md").exists() + assert len(list(submodule_package_directory.glob("*"))) > 1 + + +def test_system_git_fallback_on_http_401( + mocker: MockerFixture, source_url: str +) -> None: + spy = mocker.spy(Git, "_clone_legacy") + mocker.patch.object(Git, "_clone", side_effect=HTTPUnauthorized(None, None)) + + with Git.clone(url=source_url, branch="0.1") as repo: + path = Path(repo.path) + assert_version(repo, BRANCH_TO_REVISION_MAP["0.1"]) + + spy.assert_called_with( + url="https://github.com/python-poetry/test-fixture-vcs-repository.git", + target=path, + refspec=GitRefSpec(branch="0.1", revision=None, tag=None, ref=b"HEAD"), + ) + spy.assert_called_once() diff --git a/tests/puzzle/conftest.py b/tests/puzzle/conftest.py index 23bb4635ca0..a4a93c2b4b6 100644 --- a/tests/puzzle/conftest.py +++ b/tests/puzzle/conftest.py @@ -1,43 +1,19 @@ from __future__ import annotations -import shutil - -from pathlib import Path from typing import TYPE_CHECKING import pytest +from tests.helpers import mock_clone -try: - import urllib.parse as urlparse -except ImportError: - import urlparse if TYPE_CHECKING: - from poetry.core.vcs import Git from pytest_mock import MockerFixture -def mock_clone(self: Git, source: str, dest: Path) -> None: - # Checking source to determine which folder we need to copy - parts = urlparse.urlparse(source) - - folder = ( - Path(__file__).parent.parent - / "fixtures" - / "git" - / parts.netloc - / parts.path.lstrip("/").rstrip(".git") - ) - - shutil.rmtree(str(dest)) - shutil.copytree(str(folder), str(dest)) - - @pytest.fixture(autouse=True) def setup(mocker: MockerFixture) -> None: # Patch git module to not actually clone projects - mocker.patch("poetry.core.vcs.git.Git.clone", new=mock_clone) - mocker.patch("poetry.core.vcs.git.Git.checkout", new=lambda *_: None) - p = mocker.patch("poetry.core.vcs.git.Git.rev_parse") + mocker.patch("poetry.vcs.git.Git.clone", new=mock_clone) + p = mocker.patch("poetry.vcs.git.Git.get_revision") p.return_value = "9cf87a285a2d3fbb0b9fa621997b3acc3631ed24" diff --git a/tests/repositories/test_installed_repository.py b/tests/repositories/test_installed_repository.py index c13be7d8e3e..f6cb2ceb8a6 100644 --- a/tests/repositories/test_installed_repository.py +++ b/tests/repositories/test_installed_repository.py @@ -1,5 +1,6 @@ from __future__ import annotations +from collections import namedtuple from pathlib import Path from typing import TYPE_CHECKING @@ -71,15 +72,11 @@ def repository(mocker: MockerFixture, env: MockEnv) -> InstalledRepository: return_value=INSTALLED_RESULTS, ) mocker.patch( - "poetry.core.vcs.git.Git.rev_parse", - return_value="bb058f6b78b2d28ef5d9a5e759cfa179a1a713d6", - ) - mocker.patch( - "poetry.core.vcs.git.Git.remote_urls", - side_effect=[ - {"remote.origin.url": "https://github.com/sdispater/pendulum.git"}, - {"remote.origin.url": "git@github.com:sdispater/pendulum.git"}, - ], + "poetry.vcs.git.Git.info", + return_value=namedtuple("GitRepoLocalInfo", "origin revision")( + origin="https://github.com/sdispater/pendulum.git", + revision="bb058f6b78b2d28ef5d9a5e759cfa179a1a713d6", + ), ) mocker.patch("poetry.repositories.installed_repository._VENDORS", str(VENDOR_DIR)) return InstalledRepository.load(env) From 398382d19062a59ee795c85aa3685a82491ae5c3 Mon Sep 17 00:00:00 2001 From: Arun Babu Neelicattu Date: Fri, 8 Apr 2022 16:36:24 +0200 Subject: [PATCH 2/4] tests/helpers: remove python 2 compatibility code --- tests/helpers.py | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/tests/helpers.py b/tests/helpers.py index 1644d1aed27..e9a02fea936 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -22,7 +22,6 @@ from poetry.packages import Locker from poetry.repositories import Repository from poetry.repositories.exceptions import PackageNotFound -from poetry.utils._compat import WINDOWS if TYPE_CHECKING: @@ -68,27 +67,12 @@ def fixture(path: str | None = None) -> Path: def copy_or_symlink(source: Path, dest: Path) -> None: - if dest.exists(): - if dest.is_symlink(): - os.unlink(str(dest)) - elif dest.is_dir(): - shutil.rmtree(str(dest)) - else: - os.unlink(str(dest)) - - # Python2 does not support os.symlink on Windows whereas Python3 does. - # os.symlink requires either administrative privileges or developer mode on Win10, - # throwing an OSError if neither is active. - if WINDOWS: - try: - os.symlink(str(source), str(dest), target_is_directory=source.is_dir()) - except OSError: - if source.is_dir(): - shutil.copytree(str(source), str(dest)) - else: - shutil.copyfile(str(source), str(dest)) - else: - os.symlink(str(source), str(dest)) + if dest.is_symlink() or dest.is_file(): + dest.unlink() # missing_ok is only available in Python >= 3.8 + elif dest.is_dir(): + shutil.rmtree(dest) + + os.symlink(str(source), str(dest), target_is_directory=source.is_dir()) class MockDulwichRepo: From 7b31df8813971d4dd2e4ced365df8d4dc69d53dd Mon Sep 17 00:00:00 2001 From: Arun Babu Neelicattu Date: Tue, 19 Apr 2022 00:37:12 +0200 Subject: [PATCH 3/4] test: ensure read-only temp files are removed --- tests/conftest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index b4664b340aa..093289522b2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -30,6 +30,7 @@ from poetry.utils.env import EnvManager from poetry.utils.env import SystemEnv from poetry.utils.env import VirtualEnv +from poetry.utils.helpers import safe_rmtree from tests.helpers import TestLocker from tests.helpers import TestRepository from tests.helpers import get_package @@ -306,7 +307,7 @@ def tmp_dir() -> Iterator[str]: yield dir_ - shutil.rmtree(dir_) + safe_rmtree(dir_) @pytest.fixture From 74ad52b136aba48d6d78bd072687b4b4d734cb90 Mon Sep 17 00:00:00 2001 From: Arun Babu Neelicattu Date: Wed, 27 Apr 2022 00:30:01 +0200 Subject: [PATCH 4/4] git: allow users to fall back to system git This change introduces the config option `experimental.system-git-client` defaulting to `false`. When set to `true`, the subprocess git client implementation is used when cloning a remote repository. This option will be removed in a future release. --- docs/dependency-specification.md | 18 ++++++++++ src/poetry/config/config.py | 3 +- src/poetry/vcs/git/backend.py | 48 ++++++++++++++++--------- tests/conftest.py | 4 +-- tests/console/commands/test_config.py | 3 ++ tests/integration/test_utils_vcs_git.py | 39 ++++++++++++++++++-- 6 files changed, 94 insertions(+), 21 deletions(-) diff --git a/docs/dependency-specification.md b/docs/dependency-specification.md index c5efbb2a32d..14bf57b8ce0 100644 --- a/docs/dependency-specification.md +++ b/docs/dependency-specification.md @@ -116,6 +116,24 @@ To use an SSH connection, for example in the case of private repositories, use t requests = { git = "git@github.com:requests/requests.git" } ``` +{{% note %}} +With Poetry 1.2 releases, the default git client used is [Dulwich](https://www.dulwich.io/). We +fall back to legacy system git client implementation in cases where [gitcredentials](https://git-scm.com/docs/gitcredentials) +are used. This fallback will be removed in a future release where username/password authentication +can be better supported natively. + +In cases where you encounter issues with the default implementation that used to work prior to +Poetry 1.2, you may wish to explicitly configure the use of the system git client via a shell +subprocess call. + +```bash +poetry config experimental.system-git-client true +``` + +Keep in mind however, that doing so will surface bugs that existed in versions prior to 1.2 which +were caused due to the use of the system git client. +{{% /note %}} + ## `path` dependencies To depend on a library located in a local directory or file, diff --git a/src/poetry/config/config.py b/src/poetry/config/config.py index ec6f34bb338..b7c08833f03 100644 --- a/src/poetry/config/config.py +++ b/src/poetry/config/config.py @@ -40,7 +40,7 @@ class Config: "options": {"always-copy": False, "system-site-packages": False}, "prefer-active-python": False, }, - "experimental": {"new-installer": True}, + "experimental": {"new-installer": True, "system-git-client": False}, "installer": {"parallel": True, "max-workers": None}, } @@ -141,6 +141,7 @@ def _get_normalizer(name: str) -> Callable: "virtualenvs.options.system-site-packages", "virtualenvs.options.prefer-active-python", "experimental.new-installer", + "experimental.system-git-client", "installer.parallel", }: return boolean_normalizer diff --git a/src/poetry/vcs/git/backend.py b/src/poetry/vcs/git/backend.py index 716e2e0aac9..c534dc48130 100644 --- a/src/poetry/vcs/git/backend.py +++ b/src/poetry/vcs/git/backend.py @@ -18,7 +18,7 @@ from dulwich.repo import Repo from poetry.console.exceptions import PoetrySimpleConsoleException -from poetry.utils.helpers import safe_rmtree +from poetry.utils.helpers import remove_directory if TYPE_CHECKING: @@ -196,8 +196,10 @@ def _clone_legacy(url: str, refspec: GitRefSpec, target: Path) -> Repo: """ from poetry.vcs.git.system import SystemGit + logger.debug("Cloning '%s' using system git client", url) + if target.exists(): - safe_rmtree(path=target, ignore_errors=True) + remove_directory(path=target, force=True) revision = refspec.tag or refspec.branch or refspec.revision or "HEAD" @@ -270,7 +272,7 @@ def _clone(cls, url: str, refspec: GitRefSpec, target: Path) -> Repo: # this implies the ref we need does not exist or is invalid if isinstance(e, KeyError): # the local copy is at a bad state, lets remove it - safe_rmtree(local.path, ignore_errors=True) + remove_directory(local.path, force=True) if isinstance(e, AssertionError) and "Invalid object name" not in str(e): raise @@ -313,6 +315,22 @@ def _clone_submodules(cls, repo: Repo) -> None: and not path_absolute.joinpath(".git").is_dir(), ) + @staticmethod + def is_using_legacy_client() -> bool: + from poetry.factory import Factory + + return ( + Factory.create_config() + .get("experimental", {}) + .get("system-git-client", False) + ) + + @staticmethod + def get_default_source_root() -> Path: + from poetry.factory import Factory + + return Path(Factory.create_config().get("cache-dir")) / "src" + @classmethod def clone( cls, @@ -324,11 +342,7 @@ def clone( source_root: Path | None = None, clean: bool = False, ) -> Repo: - if not source_root: - from poetry.factory import Factory - - source_root = Path(Factory.create_config().get("cache-dir")) / "src" - + source_root = source_root or cls.get_default_source_root() source_root.mkdir(parents=True, exist_ok=True) name = name or cls.get_name_from_source_url(url=url) @@ -338,7 +352,7 @@ def clone( if target.exists(): if clean: # force clean the local copy if it exists, do not reuse - safe_rmtree(target, ignore_errors=True) + remove_directory(target, force=True) else: # check if the current local copy matches the requested ref spec try: @@ -348,18 +362,20 @@ def clone( current_sha = current_repo.head().decode("utf-8") except (NotGitRepository, AssertionError, KeyError): # something is wrong with the current checkout, clean it - safe_rmtree(target, ignore_errors=True) + remove_directory(target, force=True) else: if not is_revision_sha(revision=current_sha): # head is not a sha, this will cause issues later, lets reset - safe_rmtree(target, ignore_errors=True) + remove_directory(target, force=True) elif refspec.is_sha and current_sha.startswith(refspec.revision): # if revision is used short-circuit remote fetch head matches return current_repo try: - local = cls._clone(url=url, refspec=refspec, target=target) - cls._clone_submodules(repo=local) + if not cls.is_using_legacy_client(): + local = cls._clone(url=url, refspec=refspec, target=target) + cls._clone_submodules(repo=local) + return local except HTTPUnauthorized: # we do this here to handle http authenticated repositories as dulwich # does not currently support using credentials from git-credential helpers. @@ -369,10 +385,10 @@ def clone( # without additional configuration or changes for existing projects that # use http basic auth credentials. logger.debug( - "Unable to fetch from private repository '{%s}', falling back to" + "Unable to fetch from private repository '%s', falling back to" " system git", url, ) - local = cls._clone_legacy(url=url, refspec=refspec, target=target) - return local + # fallback to legacy git client + return cls._clone_legacy(url=url, refspec=refspec, target=target) diff --git a/tests/conftest.py b/tests/conftest.py index 093289522b2..7d7bf37f485 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -30,7 +30,7 @@ from poetry.utils.env import EnvManager from poetry.utils.env import SystemEnv from poetry.utils.env import VirtualEnv -from poetry.utils.helpers import safe_rmtree +from poetry.utils.helpers import remove_directory from tests.helpers import TestLocker from tests.helpers import TestRepository from tests.helpers import get_package @@ -307,7 +307,7 @@ def tmp_dir() -> Iterator[str]: yield dir_ - safe_rmtree(dir_) + remove_directory(dir_, force=True) @pytest.fixture diff --git a/tests/console/commands/test_config.py b/tests/console/commands/test_config.py index 4d340ba8b33..6e7bb3ebcfd 100644 --- a/tests/console/commands/test_config.py +++ b/tests/console/commands/test_config.py @@ -51,6 +51,7 @@ def test_list_displays_default_value_if_not_set( venv_path = json.dumps(os.path.join("{cache-dir}", "virtualenvs")) expected = f"""cache-dir = {cache_dir} experimental.new-installer = true +experimental.system-git-client = false installer.max-workers = null installer.parallel = true virtualenvs.create = true @@ -75,6 +76,7 @@ def test_list_displays_set_get_setting( venv_path = json.dumps(os.path.join("{cache-dir}", "virtualenvs")) expected = f"""cache-dir = {cache_dir} experimental.new-installer = true +experimental.system-git-client = false installer.max-workers = null installer.parallel = true virtualenvs.create = false @@ -123,6 +125,7 @@ def test_list_displays_set_get_local_setting( venv_path = json.dumps(os.path.join("{cache-dir}", "virtualenvs")) expected = f"""cache-dir = {cache_dir} experimental.new-installer = true +experimental.system-git-client = false installer.max-workers = null installer.parallel = true virtualenvs.create = false diff --git a/tests/integration/test_utils_vcs_git.py b/tests/integration/test_utils_vcs_git.py index a5bdd21c8fb..3bdac3f99aa 100644 --- a/tests/integration/test_utils_vcs_git.py +++ b/tests/integration/test_utils_vcs_git.py @@ -60,6 +60,11 @@ def setup(config: Config) -> None: } +@pytest.fixture +def use_system_git_client(config: Config) -> None: + config.merge({"experimental": {"system-git-client": True}}) + + @pytest.fixture(scope="module") def source_url() -> str: return "https://github.com/python-poetry/test-fixture-vcs-repository.git" @@ -104,11 +109,20 @@ def remote_default_branch(remote_default_ref: bytes) -> str: def test_git_clone_default_branch_head( - source_url: str, remote_refs: FetchPackResult, remote_default_ref: bytes + source_url: str, + remote_refs: FetchPackResult, + remote_default_ref: bytes, + mocker: MockerFixture, ): + spy = mocker.spy(Git, "_clone") + spy_legacy = mocker.spy(Git, "_clone_legacy") + with Git.clone(url=source_url) as repo: assert remote_refs.refs[remote_default_ref] == repo.head() + spy_legacy.assert_not_called() + spy.assert_called() + def test_git_clone_fails_for_non_existent_branch(source_url: str): branch = uuid.uuid4().hex @@ -208,7 +222,8 @@ def test_git_clone_clones_submodules(source_url: str) -> None: def test_system_git_fallback_on_http_401( - mocker: MockerFixture, source_url: str + mocker: MockerFixture, + source_url: str, ) -> None: spy = mocker.spy(Git, "_clone_legacy") mocker.patch.object(Git, "_clone", side_effect=HTTPUnauthorized(None, None)) @@ -223,3 +238,23 @@ def test_system_git_fallback_on_http_401( refspec=GitRefSpec(branch="0.1", revision=None, tag=None, ref=b"HEAD"), ) spy.assert_called_once() + + +def test_system_git_called_when_configured( + mocker: MockerFixture, source_url: str, use_system_git_client: None +) -> None: + spy_legacy = mocker.spy(Git, "_clone_legacy") + spy = mocker.spy(Git, "_clone") + + with Git.clone(url=source_url, branch="0.1") as repo: + path = Path(repo.path) + assert_version(repo, BRANCH_TO_REVISION_MAP["0.1"]) + + spy.assert_not_called() + + spy_legacy.assert_called_once() + spy_legacy.assert_called_with( + url=source_url, + target=path, + refspec=GitRefSpec(branch="0.1", revision=None, tag=None, ref=b"HEAD"), + )