From e2d2088d4556eb5d397f9561cc8175acc615d7f8 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Fri, 25 Aug 2023 03:36:06 -0400 Subject: [PATCH 1/7] Adjust the Fallback logic for obtaining the hashes from private indexes --- pipenv/project.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pipenv/project.py b/pipenv/project.py index 3f71e374f5..88eee3e997 100644 --- a/pipenv/project.py +++ b/pipenv/project.py @@ -12,7 +12,7 @@ from json.decoder import JSONDecodeError from pathlib import Path from urllib import parse -from urllib.parse import unquote +from urllib.parse import unquote, urljoin from pipenv.utils.constants import VCS_LIST @@ -315,8 +315,7 @@ def get_hashes_from_remote_index_urls(self, ireq, source): if params_dict.get(FAVORITE_HASH): collected_hashes.add(params_dict[FAVORITE_HASH][0]) else: # Fallback to downloading the file to obtain hash - if source["url"] not in package_url: - package_url = f"{source['url']}{package_url}" + package_url = urljoin(source["url"], package_url) link = Link(package_url) collected_hashes.add(self.get_file_hash(session, link)) return self.prepend_hash_types(collected_hashes, FAVORITE_HASH) From 9443d4f71f16a1f075ea077a263527b62513f483 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Fri, 25 Aug 2023 03:51:36 -0400 Subject: [PATCH 2/7] Add news fragment --- news/5866.bugfix.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 news/5866.bugfix.rst diff --git a/news/5866.bugfix.rst b/news/5866.bugfix.rst new file mode 100644 index 0000000000..46f0a9e741 --- /dev/null +++ b/news/5866.bugfix.rst @@ -0,0 +1 @@ +Fix regression of hash collection when downloading package from private indexes when the hash is not found in the index href url fragment. From 8374c9659807f9740e7bfb3accf0f92162df2201 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Fri, 25 Aug 2023 04:08:48 -0400 Subject: [PATCH 3/7] Raise an error if fetching the file is not actually succesful. --- pipenv/utils/fileutils.py | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/pipenv/utils/fileutils.py b/pipenv/utils/fileutils.py index e5bb1c615c..1d1f0f94e6 100644 --- a/pipenv/utils/fileutils.py +++ b/pipenv/utils/fileutils.py @@ -3,8 +3,7 @@ import os import sys import warnings -from contextlib import closing, contextmanager -from http.client import HTTPResponse as Urllib_HTTPResponse +from contextlib import contextmanager from pathlib import Path from tempfile import TemporaryDirectory from typing import IO, Any, ContextManager, Optional, TypeVar, Union @@ -12,10 +11,8 @@ from urllib import request as urllib_request from urllib.parse import quote, urlparse +from pipenv.patched.pip._internal.vcs import RemoteNotFoundError from pipenv.patched.pip._vendor.requests import Session -from pipenv.patched.pip._vendor.urllib3.response import ( - HTTPResponse as Urllib3_HTTPResponse, -) _T = TypeVar("_T") @@ -120,7 +117,7 @@ def path_to_url(path): @contextmanager def open_file( link: Union[_T, str], session: Optional[Session] = None, stream: bool = True -) -> ContextManager[Union[IO[bytes], Urllib3_HTTPResponse, Urllib_HTTPResponse]]: +) -> ContextManager[IO[bytes]]: """Open local or remote file for reading. :param pipenv.patched.pip._internal.index.Link link: A link object from resolving dependencies with @@ -151,17 +148,12 @@ def open_file( # Remote URL headers = {"Accept-Encoding": "identity"} if not session: - try: - from pipenv.patched.pip._vendor.requests import Session # noqa - except ImportError: - session = None - else: - session = Session() - if session is None: - with closing(urllib_request.urlopen(link)) as f: - yield f - else: + session = Session() with session.get(link, headers=headers, stream=stream) as resp: + if resp.status_code != 200: + raise RemoteNotFoundError( + f"HTTP error {resp.status_code} while getting {link}" + ) try: raw = getattr(resp, "raw", None) result = raw if raw else resp @@ -216,7 +208,7 @@ def create_tracked_tempdir(*args: Any, **kwargs: Any) -> str: This uses `TemporaryDirectory`, but does not remove the directory when the return value goes out of scope, instead registers a handler - to cleanup on program exit. The return value is the path to the + to clean up on program exit. The return value is the path to the created directory. """ tempdir = TemporaryDirectory(*args, **kwargs) From a207c67fc395a0a1fbe7440d95568b7928e0415d Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Fri, 25 Aug 2023 04:15:15 -0400 Subject: [PATCH 4/7] Dont raise exception since subpocess swallows it, skip file and alert with log message instead. --- pipenv/utils/fileutils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pipenv/utils/fileutils.py b/pipenv/utils/fileutils.py index 1d1f0f94e6..2844882d28 100644 --- a/pipenv/utils/fileutils.py +++ b/pipenv/utils/fileutils.py @@ -13,6 +13,7 @@ from pipenv.patched.pip._internal.vcs import RemoteNotFoundError from pipenv.patched.pip._vendor.requests import Session +from pipenv.utils import err _T = TypeVar("_T") @@ -131,7 +132,11 @@ def open_file( try: link = link.url_without_fragment except AttributeError: - raise ValueError(f"Cannot parse url from unknown type: {link!r}") + err.print( + f"Cannot parse url from unknown type: {link!r}; Skipping ...", + style="bold red", + ) + return None if not is_valid_url(link) and os.path.exists(link): link = path_to_url(link) From 8dad455e163c6f35246a71f5d7b23590472e5ad0 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Fri, 25 Aug 2023 04:43:58 -0400 Subject: [PATCH 5/7] Safer open_file for remote files and don't collect bad hashes --- pipenv/project.py | 6 ++++- pipenv/utils/fileutils.py | 46 +++++++++++-------------------------- tasks/vendoring/__init__.py | 3 +++ 3 files changed, 21 insertions(+), 34 deletions(-) diff --git a/pipenv/project.py b/pipenv/project.py index 88eee3e997..e5bf9554be 100644 --- a/pipenv/project.py +++ b/pipenv/project.py @@ -317,7 +317,9 @@ def get_hashes_from_remote_index_urls(self, ireq, source): else: # Fallback to downloading the file to obtain hash package_url = urljoin(source["url"], package_url) link = Link(package_url) - collected_hashes.add(self.get_file_hash(session, link)) + file_hash = self.get_file_hash(session, link) + if file_hash: + collected_hashes.add(file_hash) return self.prepend_hash_types(collected_hashes, FAVORITE_HASH) except (ValueError, KeyError, ConnectionError): if self.s.is_verbose(): @@ -333,6 +335,8 @@ def get_file_hash(self, session, link): h = hashlib.new(FAVORITE_HASH) err.print(f"Downloading file {link.filename} to obtain hash...") with open_file(link.url, session) as fp: + if fp is None: + return None for chunk in iter(lambda: fp.read(8096), b""): h.update(chunk) return f"{h.name}:{h.hexdigest()}" diff --git a/pipenv/utils/fileutils.py b/pipenv/utils/fileutils.py index 2844882d28..8887c81bd5 100644 --- a/pipenv/utils/fileutils.py +++ b/pipenv/utils/fileutils.py @@ -1,22 +1,20 @@ """A collection for utilities for working with files and paths.""" import atexit +import io import os import sys import warnings from contextlib import contextmanager from pathlib import Path from tempfile import TemporaryDirectory -from typing import IO, Any, ContextManager, Optional, TypeVar, Union +from typing import Any, Optional from urllib import parse as urllib_parse from urllib import request as urllib_request from urllib.parse import quote, urlparse -from pipenv.patched.pip._internal.vcs import RemoteNotFoundError from pipenv.patched.pip._vendor.requests import Session from pipenv.utils import err -_T = TypeVar("_T") - def is_file_url(url: Any) -> bool: """Returns true if the given url is a file url.""" @@ -116,27 +114,16 @@ def path_to_url(path): @contextmanager -def open_file( - link: Union[_T, str], session: Optional[Session] = None, stream: bool = True -) -> ContextManager[IO[bytes]]: +def open_file(link, session: Optional[Session] = None, stream: bool = True): """Open local or remote file for reading. - :param pipenv.patched.pip._internal.index.Link link: A link object from resolving dependencies with - pip, or else a URL. - :param Optional[Session] session: A :class:`~requests.Session` instance - :param bool stream: Whether to stream the content if remote, default True - :raises ValueError: If link points to a local directory. - :return: a context manager to the opened file-like object + Other details... """ if not isinstance(link, str): try: link = link.url_without_fragment except AttributeError: - err.print( - f"Cannot parse url from unknown type: {link!r}; Skipping ...", - style="bold red", - ) - return None + raise ValueError(f"Cannot parse url from unknown type: {link!r}") if not is_valid_url(link) and os.path.exists(link): link = path_to_url(link) @@ -154,21 +141,14 @@ def open_file( headers = {"Accept-Encoding": "identity"} if not session: session = Session() - with session.get(link, headers=headers, stream=stream) as resp: - if resp.status_code != 200: - raise RemoteNotFoundError( - f"HTTP error {resp.status_code} while getting {link}" - ) - try: - raw = getattr(resp, "raw", None) - result = raw if raw else resp - yield result - finally: - if raw: - conn = raw._connection - if conn is not None: - conn.close() - result.close() + resp = session.get(link, headers=headers, stream=stream) + if resp.status_code != 200: + err.print(f"HTTP error {resp.status_code} while getting {link}") + yield None + else: + # Creating a buffer-like object + buffer = io.BytesIO(resp.content) + yield buffer @contextmanager diff --git a/tasks/vendoring/__init__.py b/tasks/vendoring/__init__.py index 4fb926ead8..886ddea4da 100644 --- a/tasks/vendoring/__init__.py +++ b/tasks/vendoring/__init__.py @@ -795,4 +795,7 @@ def vendor_artifact(ctx, package, version=None): dest_file = dest_dir / dest_path with open(dest_file.as_posix(), "wb") as target_handle: with open_file(link) as fp: + if fp is None: + print(f"Error downloading {link}") + continue shutil.copyfileobj(fp, target_handle) From ca4d65bff982d4aa7f5f30828806bc5c6f5a0d7a Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Fri, 25 Aug 2023 05:03:50 -0400 Subject: [PATCH 6/7] restore docstring --- pipenv/utils/fileutils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pipenv/utils/fileutils.py b/pipenv/utils/fileutils.py index 8887c81bd5..728e174ad9 100644 --- a/pipenv/utils/fileutils.py +++ b/pipenv/utils/fileutils.py @@ -117,7 +117,12 @@ def path_to_url(path): def open_file(link, session: Optional[Session] = None, stream: bool = True): """Open local or remote file for reading. - Other details... + :param pipenv.patched.pip._internal.index.Link link: A link object from resolving dependencies with + pip, or else a URL. + :param Optional[Session] session: A :class:`~requests.Session` instance + :param bool stream: Whether to stream the content if remote, default True + :raises ValueError: If link points to a local directory. + :return: a context manager to the opened file-like object """ if not isinstance(link, str): try: From bbf528291c7ea708066fbde37adecb4ad2c013f4 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Fri, 25 Aug 2023 10:44:21 -0400 Subject: [PATCH 7/7] Get the cache working for obtaining file hashes --- pipenv/project.py | 4 +++- pipenv/utils/fileutils.py | 9 +++++---- pipenv/utils/internet.py | 10 ++++------ 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/pipenv/project.py b/pipenv/project.py index e5bf9554be..7ef349f634 100644 --- a/pipenv/project.py +++ b/pipenv/project.py @@ -240,7 +240,9 @@ def get_requests_session_for_source(self, source): session = self.sessions[source["name"]] else: session = get_requests_session( - self.s.PIPENV_MAX_RETRIES, source.get("verify_ssl", True) + self.s.PIPENV_MAX_RETRIES, + source.get("verify_ssl", True), + cache_dir=self.s.PIPENV_CACHE_DIR, ) self.sessions[source["name"]] = session return session diff --git a/pipenv/utils/fileutils.py b/pipenv/utils/fileutils.py index 728e174ad9..afb92ffddc 100644 --- a/pipenv/utils/fileutils.py +++ b/pipenv/utils/fileutils.py @@ -12,7 +12,8 @@ from urllib import request as urllib_request from urllib.parse import quote, urlparse -from pipenv.patched.pip._vendor.requests import Session +from pipenv.patched.pip._internal.locations import USER_CACHE_DIR +from pipenv.patched.pip._internal.network.download import PipSession from pipenv.utils import err @@ -114,12 +115,12 @@ def path_to_url(path): @contextmanager -def open_file(link, session: Optional[Session] = None, stream: bool = True): +def open_file(link, session: Optional[PipSession] = None, stream: bool = False): """Open local or remote file for reading. :param pipenv.patched.pip._internal.index.Link link: A link object from resolving dependencies with pip, or else a URL. - :param Optional[Session] session: A :class:`~requests.Session` instance + :param Optional[PipSession] session: A :class:`~PipSession` instance :param bool stream: Whether to stream the content if remote, default True :raises ValueError: If link points to a local directory. :return: a context manager to the opened file-like object @@ -145,7 +146,7 @@ def open_file(link, session: Optional[Session] = None, stream: bool = True): # Remote URL headers = {"Accept-Encoding": "identity"} if not session: - session = Session() + session = PipSession(cache=USER_CACHE_DIR) resp = session.get(link, headers=headers, stream=stream) if resp.status_code != 200: err.print(f"HTTP error {resp.status_code} while getting {link}") diff --git a/pipenv/utils/internet.py b/pipenv/utils/internet.py index f4d57b385f..ec0c6d39e4 100644 --- a/pipenv/utils/internet.py +++ b/pipenv/utils/internet.py @@ -3,19 +3,17 @@ from html.parser import HTMLParser from urllib.parse import urlparse -from pipenv.patched.pip._vendor import requests -from pipenv.patched.pip._vendor.requests.adapters import HTTPAdapter +from pipenv.patched.pip._internal.locations import USER_CACHE_DIR +from pipenv.patched.pip._internal.network.download import PipSession from pipenv.patched.pip._vendor.urllib3 import util as urllib3_util -def get_requests_session(max_retries=1, verify_ssl=True): +def get_requests_session(max_retries=1, verify_ssl=True, cache_dir=USER_CACHE_DIR): """Load requests lazily.""" pip_client_cert = os.environ.get("PIP_CLIENT_CERT") - requests_session = requests.Session() + requests_session = PipSession(cache=cache_dir, retries=max_retries) if pip_client_cert: requests_session.cert = pip_client_cert - adapter = HTTPAdapter(max_retries=max_retries) - requests_session.mount("https://", adapter) if verify_ssl is False: requests_session.verify = False return requests_session