From fc701067c3645276a10a4b6fc835a5dcdfaa2c40 Mon Sep 17 00:00:00 2001 From: Yi Cheng <74173148+iycheng@users.noreply.github.com> Date: Mon, 26 Apr 2021 14:06:03 -0700 Subject: [PATCH] [runtime env] Support `.gitignore` exclusion in working dir (#15392) --- .flake8 | 3 +- ci/travis/format.sh | 5 +- python/ray/_private/runtime_env.py | 79 ++- python/ray/_private/thirdparty/__init__.py | 0 .../ray/_private/thirdparty/pathspec/LICENSE | 373 +++++++++++ .../_private/thirdparty/pathspec/__init__.py | 69 ++ .../_private/thirdparty/pathspec/compat.py | 38 ++ .../_private/thirdparty/pathspec/pathspec.py | 206 ++++++ .../_private/thirdparty/pathspec/pattern.py | 146 +++++ .../thirdparty/pathspec/patterns/__init__.py | 8 + .../pathspec/patterns/gitwildmatch.py | 330 ++++++++++ .../ray/_private/thirdparty/pathspec/util.py | 600 ++++++++++++++++++ python/ray/tests/test_runtime_env.py | 153 ++++- 13 files changed, 1973 insertions(+), 37 deletions(-) create mode 100644 python/ray/_private/thirdparty/__init__.py create mode 100644 python/ray/_private/thirdparty/pathspec/LICENSE create mode 100644 python/ray/_private/thirdparty/pathspec/__init__.py create mode 100644 python/ray/_private/thirdparty/pathspec/compat.py create mode 100644 python/ray/_private/thirdparty/pathspec/pathspec.py create mode 100644 python/ray/_private/thirdparty/pathspec/pattern.py create mode 100644 python/ray/_private/thirdparty/pathspec/patterns/__init__.py create mode 100644 python/ray/_private/thirdparty/pathspec/patterns/gitwildmatch.py create mode 100644 python/ray/_private/thirdparty/pathspec/util.py diff --git a/.flake8 b/.flake8 index 782615692804b..2e8336a35a775 100644 --- a/.flake8 +++ b/.flake8 @@ -1,5 +1,5 @@ [flake8] -exclude = +exclude = python/ray/core/generated/ streaming/python/generated doc/source/conf.py @@ -7,6 +7,7 @@ exclude = python/ray/thirdparty_files/ python/build/ python/.eggs/ + python/ray/_private/thirdparty/* max-line-length = 79 inline-quotes = " ignore = diff --git a/ci/travis/format.sh b/ci/travis/format.sh index bb916869cca2d..354c80b3971e9 100755 --- a/ci/travis/format.sh +++ b/ci/travis/format.sh @@ -96,7 +96,7 @@ YAPF_FLAGS=( ) # TODO(dmitri): When more of the codebase is typed properly, the mypy flags -# should be set to do a more stringent check. +# should be set to do a more stringent check. MYPY_FLAGS=( '--follow-imports=skip' '--ignore-missing-imports' @@ -116,6 +116,7 @@ YAPF_EXCLUDES=( '--exclude' 'python/build/*' '--exclude' 'python/ray/core/src/ray/gcs/*' '--exclude' 'python/ray/thirdparty_files/*' + '--exclude' 'python/ray/_private/thirdparty/*' ) GIT_LS_EXCLUDES=( @@ -143,7 +144,7 @@ shellcheck_scripts() { shellcheck "${SHELLCHECK_FLAGS[@]}" "$@" } -# Runs mypy on each argument in sequence. This is different than running mypy +# Runs mypy on each argument in sequence. This is different than running mypy # once on the list of arguments. mypy_on_each() { pushd python/ray diff --git a/python/ray/_private/runtime_env.py b/python/ray/_private/runtime_env.py index f8860d06bf4d8..725338fa8d672 100644 --- a/python/ray/_private/runtime_env.py +++ b/python/ray/_private/runtime_env.py @@ -4,6 +4,7 @@ from filelock import FileLock from pathlib import Path from zipfile import ZipFile +from ray._private.thirdparty.pathspec import PathSpec from ray.job_config import JobConfig from enum import Enum @@ -11,7 +12,7 @@ _internal_kv_exists, _internal_kv_initialized) -from typing import List, Tuple, Optional, Set, Callable +from typing import List, Tuple, Optional, Callable from urllib.parse import urlparse import os import sys @@ -114,18 +115,23 @@ def _xor_bytes(left: bytes, right: bytes) -> bytes: def _dir_travel( path: Path, - excludes: Set[Path], + excludes: List[Callable], handler: Callable, ): - if path in excludes: - return - handler(path) - if path.is_dir(): - for sub_path in path.iterdir(): - _dir_travel(sub_path, excludes, handler) - - -def _zip_module(root: Path, relative_path: Path, excludes: Set[Path], + e = _get_gitignore(path) + if e is not None: + excludes.append(e) + skip = any([e(path) for e in excludes]) + if not skip: + handler(path) + if path.is_dir(): + for sub_path in path.iterdir(): + _dir_travel(sub_path, excludes, handler) + if e is not None: + excludes.pop() + + +def _zip_module(root: Path, relative_path: Path, excludes: Optional[Callable], zip_handler: ZipFile) -> None: """Go through all files and zip them into a zip file""" @@ -141,13 +147,14 @@ def handler(path: Path): to_path = path.relative_to(relative_path) zip_handler.write(path, to_path) + excludes = [] if excludes is None else [excludes] _dir_travel(root, excludes, handler) def _hash_modules( root: Path, relative_path: Path, - excludes: Set[Path], + excludes: Optional[Callable], ) -> bytes: """Helper function to create hash of a directory. @@ -169,6 +176,7 @@ def handler(path: Path): nonlocal hash_val hash_val = _xor_bytes(hash_val, md5.digest()) + excludes = [] if excludes is None else [excludes] _dir_travel(root, excludes, handler) return hash_val @@ -185,6 +193,36 @@ def _parse_uri(pkg_uri: str) -> Tuple[Protocol, str]: return (protocol, uri.netloc) +def _get_excludes(path: Path, excludes: List[str]) -> Callable: + path = path.absolute() + pathspec = PathSpec.from_lines("gitwildmatch", excludes) + + def match(p: Path): + path_str = str(p.absolute().relative_to(path)) + path_str += "/" + return pathspec.match_file(path_str) + + return match + + +def _get_gitignore(path: Path) -> Optional[Callable]: + path = path.absolute() + ignore_file = path / ".gitignore" + if ignore_file.is_file(): + with ignore_file.open("r") as f: + pathspec = PathSpec.from_lines("gitwildmatch", f.readlines()) + + def match(p: Path): + path_str = str(p.absolute().relative_to(path)) + if p.is_dir(): + path_str += "/" + return pathspec.match_file(path_str) + + return match + else: + return None + + # TODO(yic): Fix this later to handle big directories in better way def get_project_package_name(working_dir: str, py_modules: List[str], excludes: List[str]) -> str: @@ -208,14 +246,13 @@ def get_project_package_name(working_dir: str, py_modules: List[str], Args: working_dir (str): The working directory. py_modules (list[str]): The python module. - excludes (set[str]): The dir or files that should be excluded + excludes (list[str]): The dir or files that should be excluded Returns: Package name as a string. """ RAY_PKG_PREFIX = "_ray_pkg_" hash_val = None - excludes = {Path(p).absolute() for p in excludes} if working_dir: if not isinstance(working_dir, str): raise TypeError("`working_dir` must be a string.") @@ -224,7 +261,9 @@ def get_project_package_name(working_dir: str, py_modules: List[str], raise ValueError(f"working_dir {working_dir} must be an existing" " directory") hash_val = _xor_bytes( - hash_val, _hash_modules(working_dir, working_dir, excludes)) + hash_val, + _hash_modules(working_dir, working_dir, + _get_excludes(working_dir, excludes))) for py_module in py_modules or []: if not isinstance(py_module, str): raise TypeError("`py_module` must be a string.") @@ -233,7 +272,7 @@ def get_project_package_name(working_dir: str, py_modules: List[str], raise ValueError(f"py_module {py_module} must be an existing" " directory") hash_val = _xor_bytes( - hash_val, _hash_modules(module_dir, module_dir.parent, excludes)) + hash_val, _hash_modules(module_dir, module_dir.parent, None)) return RAY_PKG_PREFIX + hash_val.hex() + ".zip" if hash_val else None @@ -252,15 +291,15 @@ def create_project_package(working_dir: str, py_modules: List[str], output_path (str): The path of file to be created. """ pkg_file = Path(output_path).absolute() - excludes = [Path(e).absolute() for e in excludes] with ZipFile(pkg_file, "w") as zip_handler: if working_dir: # put all files in /path/working_dir into zip working_path = Path(working_dir).absolute() - _zip_module(working_path, working_path, excludes, zip_handler) + _zip_module(working_path, working_path, + _get_excludes(working_path, excludes), zip_handler) for py_module in py_modules or []: module_path = Path(py_module).absolute() - _zip_module(module_path, module_path.parent, excludes, zip_handler) + _zip_module(module_path, module_path.parent, None, zip_handler) def fetch_package(pkg_uri: str) -> int: @@ -359,7 +398,7 @@ def rewrite_working_dir_uri(job_config: JobConfig) -> None: if (not job_config.runtime_env.get("working_dir_uri")) and (working_dir or py_modules): if excludes is None: - excludes = set() + excludes = [] pkg_name = get_project_package_name(working_dir, py_modules, excludes) job_config.runtime_env[ "working_dir_uri"] = Protocol.GCS.value + "://" + pkg_name diff --git a/python/ray/_private/thirdparty/__init__.py b/python/ray/_private/thirdparty/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/python/ray/_private/thirdparty/pathspec/LICENSE b/python/ray/_private/thirdparty/pathspec/LICENSE new file mode 100644 index 0000000000000..14e2f777f6c39 --- /dev/null +++ b/python/ray/_private/thirdparty/pathspec/LICENSE @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/python/ray/_private/thirdparty/pathspec/__init__.py b/python/ray/_private/thirdparty/pathspec/__init__.py new file mode 100644 index 0000000000000..423759fd5bcc5 --- /dev/null +++ b/python/ray/_private/thirdparty/pathspec/__init__.py @@ -0,0 +1,69 @@ +# encoding: utf-8 +""" +The *pathspec* package provides pattern matching for file paths. So far +this only includes Git's wildmatch pattern matching (the style used for +".gitignore" files). + +The following classes are imported and made available from the root of +the `pathspec` package: + +- :class:`pathspec.pathspec.PathSpec` + +- :class:`pathspec.pattern.Pattern` + +- :class:`pathspec.pattern.RegexPattern` + +- :class:`pathspec.util.RecursionError` + +The following functions are also imported: + +- :func:`pathspec.util.iter_tree` +- :func:`pathspec.util.lookup_pattern` +- :func:`pathspec.util.match_files` +""" +from __future__ import unicode_literals + +__author__ = "Caleb P. Burns" +__copyright__ = "Copyright © 2013-2020 Caleb P. Burns" +__created__ = "2013-10-12" +__credits__ = [ + "dahlia ", + "highb ", + "029xue ", + "mikexstudios ", + "nhumrich ", + "davidfraser ", + "demurgos ", + "ghickman ", + "nvie ", + "adrienverge ", + "AndersBlomdell ", + "highb ", + "thmxv ", + "wimglenn ", + "hugovk ", + "dcecile ", + "mroutis ", + "jdufresne ", + "groodt ", + "ftrofin ", + "pykong ", + "nhhollander ", +] +__email__ = "cpburnz@gmail.com" +__license__ = "MPL 2.0" +__project__ = "pathspec" +__status__ = "Development" +__updated__ = "2020-11-07" +__version__ = "0.8.1" + +from .pathspec import PathSpec +from .pattern import Pattern, RegexPattern +from .util import iter_tree, lookup_pattern, match_files, RecursionError + +# Load pattern implementations. +from . import patterns + +# Expose `GitIgnorePattern` class in the root module for backward +# compatibility with v0.4. +from .patterns.gitwildmatch import GitIgnorePattern diff --git a/python/ray/_private/thirdparty/pathspec/compat.py b/python/ray/_private/thirdparty/pathspec/compat.py new file mode 100644 index 0000000000000..37c6480510f49 --- /dev/null +++ b/python/ray/_private/thirdparty/pathspec/compat.py @@ -0,0 +1,38 @@ +# encoding: utf-8 +""" +This module provides compatibility between Python 2 and 3. Hardly +anything is used by this project to constitute including `six`_. + +.. _`six`: http://pythonhosted.org/six +""" + +import sys + +if sys.version_info[0] < 3: + # Python 2. + unicode = unicode + string_types = (basestring,) + + from collections import Iterable + from itertools import izip_longest + + def iterkeys(mapping): + return mapping.iterkeys() + +else: + # Python 3. + unicode = str + string_types = (unicode,) + + from collections.abc import Iterable + from itertools import zip_longest as izip_longest + + def iterkeys(mapping): + return mapping.keys() + +try: + # Python 3.6+. + from collections.abc import Collection +except ImportError: + # Python 2.7 - 3.5. + from collections import Container as Collection diff --git a/python/ray/_private/thirdparty/pathspec/pathspec.py b/python/ray/_private/thirdparty/pathspec/pathspec.py new file mode 100644 index 0000000000000..73250efeb4da2 --- /dev/null +++ b/python/ray/_private/thirdparty/pathspec/pathspec.py @@ -0,0 +1,206 @@ +# encoding: utf-8 +""" +This module provides an object oriented interface for pattern matching +of files. +""" + +from . import util +from .compat import Collection, iterkeys, izip_longest, string_types, unicode + + +class PathSpec(object): + """ + The :class:`PathSpec` class is a wrapper around a list of compiled + :class:`.Pattern` instances. + """ + + def __init__(self, patterns): + """ + Initializes the :class:`PathSpec` instance. + + *patterns* (:class:`~collections.abc.Collection` or :class:`~collections.abc.Iterable`) + yields each compiled pattern (:class:`.Pattern`). + """ + + self.patterns = patterns if isinstance(patterns, Collection) else list(patterns) + """ + *patterns* (:class:`~collections.abc.Collection` of :class:`.Pattern`) + contains the compiled patterns. + """ + + def __eq__(self, other): + """ + Tests the equality of this path-spec with *other* (:class:`PathSpec`) + by comparing their :attr:`~PathSpec.patterns` attributes. + """ + if isinstance(other, PathSpec): + paired_patterns = izip_longest(self.patterns, other.patterns) + return all(a == b for a, b in paired_patterns) + else: + return NotImplemented + + def __len__(self): + """ + Returns the number of compiled patterns this path-spec contains + (:class:`int`). + """ + return len(self.patterns) + + def __add__(self, other): + """ + Combines the :attr:`Pathspec.patterns` patterns from two + :class:`PathSpec` instances. + """ + if isinstance(other, PathSpec): + return PathSpec(self.patterns + other.patterns) + else: + return NotImplemented + + def __iadd__(self, other): + """ + Adds the :attr:`Pathspec.patterns` patterns from one :class:`PathSpec` + instance to this instance. + """ + if isinstance(other, PathSpec): + self.patterns += other.patterns + return self + else: + return NotImplemented + + @classmethod + def from_lines(cls, pattern_factory, lines): + """ + Compiles the pattern lines. + + *pattern_factory* can be either the name of a registered pattern + factory (:class:`str`), or a :class:`~collections.abc.Callable` used + to compile patterns. It must accept an uncompiled pattern (:class:`str`) + and return the compiled pattern (:class:`.Pattern`). + + *lines* (:class:`~collections.abc.Iterable`) yields each uncompiled + pattern (:class:`str`). This simply has to yield each line so it can + be a :class:`file` (e.g., from :func:`open` or :class:`io.StringIO`) + or the result from :meth:`str.splitlines`. + + Returns the :class:`PathSpec` instance. + """ + if isinstance(pattern_factory, string_types): + pattern_factory = util.lookup_pattern(pattern_factory) + if not callable(pattern_factory): + raise TypeError("pattern_factory:{!r} is not callable.".format(pattern_factory)) + + if not util._is_iterable(lines): + raise TypeError("lines:{!r} is not an iterable.".format(lines)) + + lines = [pattern_factory(line) for line in lines if line] + return cls(lines) + + def match_file(self, file, separators=None): + """ + Matches the file to this path-spec. + + *file* (:class:`str` or :class:`~pathlib.PurePath`) is the file path + to be matched against :attr:`self.patterns `. + + *separators* (:class:`~collections.abc.Collection` of :class:`str`) + optionally contains the path separators to normalize. See + :func:`~pathspec.util.normalize_file` for more information. + + Returns :data:`True` if *file* matched; otherwise, :data:`False`. + """ + norm_file = util.normalize_file(file, separators=separators) + return util.match_file(self.patterns, norm_file) + + def match_entries(self, entries, separators=None): + """ + Matches the entries to this path-spec. + + *entries* (:class:`~collections.abc.Iterable` of :class:`~util.TreeEntry`) + contains the entries to be matched against :attr:`self.patterns `. + + *separators* (:class:`~collections.abc.Collection` of :class:`str`; + or :data:`None`) optionally contains the path separators to + normalize. See :func:`~pathspec.util.normalize_file` for more + information. + + Returns the matched entries (:class:`~collections.abc.Iterable` of + :class:`~util.TreeEntry`). + """ + if not util._is_iterable(entries): + raise TypeError("entries:{!r} is not an iterable.".format(entries)) + + entry_map = util._normalize_entries(entries, separators=separators) + match_paths = util.match_files(self.patterns, iterkeys(entry_map)) + for path in match_paths: + yield entry_map[path] + + def match_files(self, files, separators=None): + """ + Matches the files to this path-spec. + + *files* (:class:`~collections.abc.Iterable` of :class:`str; or + :class:`pathlib.PurePath`) contains the file paths to be matched + against :attr:`self.patterns `. + + *separators* (:class:`~collections.abc.Collection` of :class:`str`; + or :data:`None`) optionally contains the path separators to + normalize. See :func:`~pathspec.util.normalize_file` for more + information. + + Returns the matched files (:class:`~collections.abc.Iterable` of + :class:`str`). + """ + if not util._is_iterable(files): + raise TypeError("files:{!r} is not an iterable.".format(files)) + + file_map = util.normalize_files(files, separators=separators) + matched_files = util.match_files(self.patterns, iterkeys(file_map)) + for path in matched_files: + yield file_map[path] + + def match_tree_entries(self, root, on_error=None, follow_links=None): + """ + Walks the specified root path for all files and matches them to this + path-spec. + + *root* (:class:`str`; or :class:`pathlib.PurePath`) is the root + directory to search. + + *on_error* (:class:`~collections.abc.Callable` or :data:`None`) + optionally is the error handler for file-system exceptions. See + :func:`~pathspec.util.iter_tree_entries` for more information. + + *follow_links* (:class:`bool` or :data:`None`) optionally is whether + to walk symbolic links that resolve to directories. See + :func:`~pathspec.util.iter_tree_files` for more information. + + Returns the matched files (:class:`~collections.abc.Iterable` of + :class:`str`). + """ + entries = util.iter_tree_entries(root, on_error=on_error, follow_links=follow_links) + return self.match_entries(entries) + + def match_tree_files(self, root, on_error=None, follow_links=None): + """ + Walks the specified root path for all files and matches them to this + path-spec. + + *root* (:class:`str`; or :class:`pathlib.PurePath`) is the root + directory to search for files. + + *on_error* (:class:`~collections.abc.Callable` or :data:`None`) + optionally is the error handler for file-system exceptions. See + :func:`~pathspec.util.iter_tree_files` for more information. + + *follow_links* (:class:`bool` or :data:`None`) optionally is whether + to walk symbolic links that resolve to directories. See + :func:`~pathspec.util.iter_tree_files` for more information. + + Returns the matched files (:class:`~collections.abc.Iterable` of + :class:`str`). + """ + files = util.iter_tree_files(root, on_error=on_error, follow_links=follow_links) + return self.match_files(files) + + # Alias `match_tree_files()` as `match_tree()`. + match_tree = match_tree_files diff --git a/python/ray/_private/thirdparty/pathspec/pattern.py b/python/ray/_private/thirdparty/pathspec/pattern.py new file mode 100644 index 0000000000000..4ba4edf790c84 --- /dev/null +++ b/python/ray/_private/thirdparty/pathspec/pattern.py @@ -0,0 +1,146 @@ +# encoding: utf-8 +""" +This module provides the base definition for patterns. +""" + +import re + +from .compat import unicode + + +class Pattern(object): + """ + The :class:`Pattern` class is the abstract definition of a pattern. + """ + + # Make the class dict-less. + __slots__ = ('include',) + + def __init__(self, include): + """ + Initializes the :class:`Pattern` instance. + + *include* (:class:`bool` or :data:`None`) is whether the matched + files should be included (:data:`True`), excluded (:data:`False`), + or is a null-operation (:data:`None`). + """ + + self.include = include + """ + *include* (:class:`bool` or :data:`None`) is whether the matched + files should be included (:data:`True`), excluded (:data:`False`), + or is a null-operation (:data:`None`). + """ + + def match(self, files): + """ + Matches this pattern against the specified files. + + *files* (:class:`~collections.abc.Iterable` of :class:`str`) contains + each file relative to the root directory (e.g., ``"relative/path/to/file"``). + + Returns an :class:`~collections.abc.Iterable` yielding each matched + file path (:class:`str`). + """ + raise NotImplementedError("{}.{} must override match().".format(self.__class__.__module__, self.__class__.__name__)) + + +class RegexPattern(Pattern): + """ + The :class:`RegexPattern` class is an implementation of a pattern + using regular expressions. + """ + + # Make the class dict-less. + __slots__ = ('regex',) + + def __init__(self, pattern, include=None): + """ + Initializes the :class:`RegexPattern` instance. + + *pattern* (:class:`unicode`, :class:`bytes`, :class:`re.RegexObject`, + or :data:`None`) is the pattern to compile into a regular + expression. + + *include* (:class:`bool` or :data:`None`) must be :data:`None` + unless *pattern* is a precompiled regular expression (:class:`re.RegexObject`) + in which case it is whether matched files should be included + (:data:`True`), excluded (:data:`False`), or is a null operation + (:data:`None`). + + .. NOTE:: Subclasses do not need to support the *include* + parameter. + """ + + self.regex = None + """ + *regex* (:class:`re.RegexObject`) is the regular expression for the + pattern. + """ + + if isinstance(pattern, (unicode, bytes)): + assert include is None, "include:{!r} must be null when pattern:{!r} is a string.".format(include, pattern) + regex, include = self.pattern_to_regex(pattern) + # NOTE: Make sure to allow a null regular expression to be + # returned for a null-operation. + if include is not None: + regex = re.compile(regex) + + elif pattern is not None and hasattr(pattern, 'match'): + # Assume pattern is a precompiled regular expression. + # - NOTE: Used specified *include*. + regex = pattern + + elif pattern is None: + # NOTE: Make sure to allow a null pattern to be passed for a + # null-operation. + assert include is None, "include:{!r} must be null when pattern:{!r} is null.".format(include, pattern) + + else: + raise TypeError("pattern:{!r} is not a string, RegexObject, or None.".format(pattern)) + + super(RegexPattern, self).__init__(include) + self.regex = regex + + def __eq__(self, other): + """ + Tests the equality of this regex pattern with *other* (:class:`RegexPattern`) + by comparing their :attr:`~Pattern.include` and :attr:`~RegexPattern.regex` + attributes. + """ + if isinstance(other, RegexPattern): + return self.include == other.include and self.regex == other.regex + else: + return NotImplemented + + def match(self, files): + """ + Matches this pattern against the specified files. + + *files* (:class:`~collections.abc.Iterable` of :class:`str`) + contains each file relative to the root directory (e.g., "relative/path/to/file"). + + Returns an :class:`~collections.abc.Iterable` yielding each matched + file path (:class:`str`). + """ + if self.include is not None: + for path in files: + if self.regex.match(path) is not None: + yield path + + @classmethod + def pattern_to_regex(cls, pattern): + """ + Convert the pattern into an uncompiled regular expression. + + *pattern* (:class:`str`) is the pattern to convert into a regular + expression. + + Returns the uncompiled regular expression (:class:`str` or :data:`None`), + and whether matched files should be included (:data:`True`), + excluded (:data:`False`), or is a null-operation (:data:`None`). + + .. NOTE:: The default implementation simply returns *pattern* and + :data:`True`. + """ + return pattern, True diff --git a/python/ray/_private/thirdparty/pathspec/patterns/__init__.py b/python/ray/_private/thirdparty/pathspec/patterns/__init__.py new file mode 100644 index 0000000000000..1a0d55ec74d90 --- /dev/null +++ b/python/ray/_private/thirdparty/pathspec/patterns/__init__.py @@ -0,0 +1,8 @@ +# encoding: utf-8 +""" +The *pathspec.patterns* package contains the pattern matching +implementations. +""" + +# Load pattern implementations. +from .gitwildmatch import GitWildMatchPattern diff --git a/python/ray/_private/thirdparty/pathspec/patterns/gitwildmatch.py b/python/ray/_private/thirdparty/pathspec/patterns/gitwildmatch.py new file mode 100644 index 0000000000000..07fd03880a9ad --- /dev/null +++ b/python/ray/_private/thirdparty/pathspec/patterns/gitwildmatch.py @@ -0,0 +1,330 @@ +# encoding: utf-8 +""" +This module implements Git's wildmatch pattern matching which itself is +derived from Rsync's wildmatch. Git uses wildmatch for its ".gitignore" +files. +""" +from __future__ import unicode_literals + +import re +import warnings + +from .. import util +from ..compat import unicode +from ..pattern import RegexPattern + +#: The encoding to use when parsing a byte string pattern. +_BYTES_ENCODING = 'latin1' + + +class GitWildMatchPattern(RegexPattern): + """ + The :class:`GitWildMatchPattern` class represents a compiled Git + wildmatch pattern. + """ + + # Keep the dict-less class hierarchy. + __slots__ = () + + @classmethod + def pattern_to_regex(cls, pattern): + """ + Convert the pattern into a regular expression. + + *pattern* (:class:`unicode` or :class:`bytes`) is the pattern to + convert into a regular expression. + + Returns the uncompiled regular expression (:class:`unicode`, :class:`bytes`, + or :data:`None`), and whether matched files should be included + (:data:`True`), excluded (:data:`False`), or if it is a + null-operation (:data:`None`). + """ + if isinstance(pattern, unicode): + return_type = unicode + elif isinstance(pattern, bytes): + return_type = bytes + pattern = pattern.decode(_BYTES_ENCODING) + else: + raise TypeError("pattern:{!r} is not a unicode or byte string.".format(pattern)) + + pattern = pattern.strip() + + if pattern.startswith('#'): + # A pattern starting with a hash ('#') serves as a comment + # (neither includes nor excludes files). Escape the hash with a + # back-slash to match a literal hash (i.e., '\#'). + regex = None + include = None + + elif pattern == '/': + # EDGE CASE: According to `git check-ignore` (v2.4.1), a single + # '/' does not match any file. + regex = None + include = None + + elif pattern: + + if pattern.startswith('!'): + # A pattern starting with an exclamation mark ('!') negates the + # pattern (exclude instead of include). Escape the exclamation + # mark with a back-slash to match a literal exclamation mark + # (i.e., '\!'). + include = False + # Remove leading exclamation mark. + pattern = pattern[1:] + else: + include = True + + if pattern.startswith('\\'): + # Remove leading back-slash escape for escaped hash ('#') or + # exclamation mark ('!'). + pattern = pattern[1:] + + # Split pattern into segments. + pattern_segs = pattern.split('/') + + # Normalize pattern to make processing easier. + + if not pattern_segs[0]: + # A pattern beginning with a slash ('/') will only match paths + # directly on the root directory instead of any descendant + # paths. So, remove empty first segment to make pattern relative + # to root. + del pattern_segs[0] + + elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]): + # A single pattern without a beginning slash ('/') will match + # any descendant path. This is equivalent to "**/{pattern}". So, + # prepend with double-asterisks to make pattern relative to + # root. + # EDGE CASE: This also holds for a single pattern with a + # trailing slash (e.g. dir/). + if pattern_segs[0] != '**': + pattern_segs.insert(0, '**') + + else: + # EDGE CASE: A pattern without a beginning slash ('/') but + # contains at least one prepended directory (e.g. + # "dir/{pattern}") should not match "**/dir/{pattern}", + # according to `git check-ignore` (v2.4.1). + pass + + if not pattern_segs[-1] and len(pattern_segs) > 1: + # A pattern ending with a slash ('/') will match all descendant + # paths if it is a directory but not if it is a regular file. + # This is equivilent to "{pattern}/**". So, set last segment to + # double asterisks to include all descendants. + pattern_segs[-1] = '**' + + # Build regular expression from pattern. + output = ['^'] + need_slash = False + end = len(pattern_segs) - 1 + for i, seg in enumerate(pattern_segs): + if seg == '**': + if i == 0 and i == end: + # A pattern consisting solely of double-asterisks ('**') + # will match every path. + output.append('.+') + elif i == 0: + # A normalized pattern beginning with double-asterisks + # ('**') will match any leading path segments. + output.append('(?:.+/)?') + need_slash = False + elif i == end: + # A normalized pattern ending with double-asterisks ('**') + # will match any trailing path segments. + output.append('/.*') + else: + # A pattern with inner double-asterisks ('**') will match + # multiple (or zero) inner path segments. + output.append('(?:/.+)?') + need_slash = True + elif seg == '*': + # Match single path segment. + if need_slash: + output.append('/') + output.append('[^/]+') + need_slash = True + else: + # Match segment glob pattern. + if need_slash: + output.append('/') + output.append(cls._translate_segment_glob(seg)) + if i == end and include is True: + # A pattern ending without a slash ('/') will match a file + # or a directory (with paths underneath it). E.g., "foo" + # matches "foo", "foo/bar", "foo/bar/baz", etc. + # EDGE CASE: However, this does not hold for exclusion cases + # according to `git check-ignore` (v2.4.1). + output.append('(?:/.*)?') + need_slash = True + output.append('$') + regex = ''.join(output) + + else: + # A blank pattern is a null-operation (neither includes nor + # excludes files). + regex = None + include = None + + if regex is not None and return_type is bytes: + regex = regex.encode(_BYTES_ENCODING) + + return regex, include + + @staticmethod + def _translate_segment_glob(pattern): + """ + Translates the glob pattern to a regular expression. This is used in + the constructor to translate a path segment glob pattern to its + corresponding regular expression. + + *pattern* (:class:`str`) is the glob pattern. + + Returns the regular expression (:class:`str`). + """ + # NOTE: This is derived from `fnmatch.translate()` and is similar to + # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set. + + escape = False + regex = '' + i, end = 0, len(pattern) + while i < end: + # Get next character. + char = pattern[i] + i += 1 + + if escape: + # Escape the character. + escape = False + regex += re.escape(char) + + elif char == '\\': + # Escape character, escape next character. + escape = True + + elif char == '*': + # Multi-character wildcard. Match any string (except slashes), + # including an empty string. + regex += '[^/]*' + + elif char == '?': + # Single-character wildcard. Match any single character (except + # a slash). + regex += '[^/]' + + elif char == '[': + # Braket expression wildcard. Except for the beginning + # exclamation mark, the whole braket expression can be used + # directly as regex but we have to find where the expression + # ends. + # - "[][!]" matchs ']', '[' and '!'. + # - "[]-]" matchs ']' and '-'. + # - "[!]a-]" matchs any character except ']', 'a' and '-'. + j = i + # Pass brack expression negation. + if j < end and pattern[j] == '!': + j += 1 + # Pass first closing braket if it is at the beginning of the + # expression. + if j < end and pattern[j] == ']': + j += 1 + # Find closing braket. Stop once we reach the end or find it. + while j < end and pattern[j] != ']': + j += 1 + + if j < end: + # Found end of braket expression. Increment j to be one past + # the closing braket: + # + # [...] + # ^ ^ + # i j + # + j += 1 + expr = '[' + + if pattern[i] == '!': + # Braket expression needs to be negated. + expr += '^' + i += 1 + elif pattern[i] == '^': + # POSIX declares that the regex braket expression negation + # "[^...]" is undefined in a glob pattern. Python's + # `fnmatch.translate()` escapes the caret ('^') as a + # literal. To maintain consistency with undefined behavior, + # I am escaping the '^' as well. + expr += '\\^' + i += 1 + + # Build regex braket expression. Escape slashes so they are + # treated as literal slashes by regex as defined by POSIX. + expr += pattern[i:j].replace('\\', '\\\\') + + # Add regex braket expression to regex result. + regex += expr + + # Set i to one past the closing braket. + i = j + + else: + # Failed to find closing braket, treat opening braket as a + # braket literal instead of as an expression. + regex += '\\[' + + else: + # Regular character, escape it for regex. + regex += re.escape(char) + + return regex + + @staticmethod + def escape(s): + """ + Escape special characters in the given string. + + *s* (:class:`unicode` or :class:`bytes`) a filename or a string + that you want to escape, usually before adding it to a `.gitignore` + + Returns the escaped string (:class:`unicode`, :class:`bytes`) + """ + # Reference: https://git-scm.com/docs/gitignore#_pattern_format + meta_characters = r"[]!*#?" + + return "".join("\\" + x if x in meta_characters else x for x in s) + +util.register_pattern('gitwildmatch', GitWildMatchPattern) + + +class GitIgnorePattern(GitWildMatchPattern): + """ + The :class:`GitIgnorePattern` class is deprecated by :class:`GitWildMatchPattern`. + This class only exists to maintain compatibility with v0.4. + """ + + def __init__(self, *args, **kw): + """ + Warn about deprecation. + """ + self._deprecated() + return super(GitIgnorePattern, self).__init__(*args, **kw) + + @staticmethod + def _deprecated(): + """ + Warn about deprecation. + """ + warnings.warn("GitIgnorePattern ('gitignore') is deprecated. Use GitWildMatchPattern ('gitwildmatch') instead.", DeprecationWarning, stacklevel=3) + + @classmethod + def pattern_to_regex(cls, *args, **kw): + """ + Warn about deprecation. + """ + cls._deprecated() + return super(GitIgnorePattern, cls).pattern_to_regex(*args, **kw) + +# Register `GitIgnorePattern` as "gitignore" for backward compatibility +# with v0.4. +util.register_pattern('gitignore', GitIgnorePattern) diff --git a/python/ray/_private/thirdparty/pathspec/util.py b/python/ray/_private/thirdparty/pathspec/util.py new file mode 100644 index 0000000000000..bcba8783b61c9 --- /dev/null +++ b/python/ray/_private/thirdparty/pathspec/util.py @@ -0,0 +1,600 @@ +# encoding: utf-8 +""" +This module provides utility methods for dealing with path-specs. +""" + +import os +import os.path +import posixpath +import stat + +from .compat import Collection, Iterable, string_types, unicode + +NORMALIZE_PATH_SEPS = [sep for sep in [os.sep, os.altsep] if sep and sep != posixpath.sep] +""" +*NORMALIZE_PATH_SEPS* (:class:`list` of :class:`str`) contains the path +separators that need to be normalized to the POSIX separator for the +current operating system. The separators are determined by examining +:data:`os.sep` and :data:`os.altsep`. +""" + +_registered_patterns = {} +""" +*_registered_patterns* (:class:`dict`) maps a name (:class:`str`) to the +registered pattern factory (:class:`~collections.abc.Callable`). +""" + + +def detailed_match_files(patterns, files, all_matches=None): + """ + Matches the files to the patterns, and returns which patterns matched + the files. + + *patterns* (:class:`~collections.abc.Iterable` of :class:`~pathspec.pattern.Pattern`) + contains the patterns to use. + + *files* (:class:`~collections.abc.Iterable` of :class:`str`) contains + the normalized file paths to be matched against *patterns*. + + *all_matches* (:class:`boot` or :data:`None`) is whether to return all + matches patterns (:data:`True`), or only the last matched pattern + (:data:`False`). Default is :data:`None` for :data:`False`. + + Returns the matched files (:class:`dict`) which maps each matched file + (:class:`str`) to the patterns that matched in order (:class:`.MatchDetail`). + """ + all_files = files if isinstance(files, Collection) else list(files) + return_files = {} + for pattern in patterns: + if pattern.include is not None: + result_files = pattern.match(all_files) + if pattern.include: + # Add files and record pattern. + for result_file in result_files: + if result_file in return_files: + if all_matches: + return_files[result_file].patterns.append(pattern) + else: + return_files[result_file].patterns[0] = pattern + else: + return_files[result_file] = MatchDetail([pattern]) + + else: + # Remove files. + for file in result_files: + del return_files[file] + + return return_files + + +def _is_iterable(value): + """ + Check whether the value is an iterable (excludes strings). + + *value* is the value to check, + + Returns whether *value* is a iterable (:class:`bool`). + """ + return isinstance(value, Iterable) and not isinstance(value, (unicode, bytes)) + + +def iter_tree_entries(root, on_error=None, follow_links=None): + """ + Walks the specified directory for all files and directories. + + *root* (:class:`str`) is the root directory to search. + + *on_error* (:class:`~collections.abc.Callable` or :data:`None`) + optionally is the error handler for file-system exceptions. It will be + called with the exception (:exc:`OSError`). Reraise the exception to + abort the walk. Default is :data:`None` to ignore file-system + exceptions. + + *follow_links* (:class:`bool` or :data:`None`) optionally is whether + to walk symbolic links that resolve to directories. Default is + :data:`None` for :data:`True`. + + Raises :exc:`RecursionError` if recursion is detected. + + Returns an :class:`~collections.abc.Iterable` yielding each file or + directory entry (:class:`.TreeEntry`) relative to *root*. + """ + if on_error is not None and not callable(on_error): + raise TypeError("on_error:{!r} is not callable.".format(on_error)) + + if follow_links is None: + follow_links = True + + for entry in _iter_tree_entries_next(os.path.abspath(root), '', {}, on_error, follow_links): + yield entry + + +def iter_tree_files(root, on_error=None, follow_links=None): + """ + Walks the specified directory for all files. + + *root* (:class:`str`) is the root directory to search for files. + + *on_error* (:class:`~collections.abc.Callable` or :data:`None`) + optionally is the error handler for file-system exceptions. It will be + called with the exception (:exc:`OSError`). Reraise the exception to + abort the walk. Default is :data:`None` to ignore file-system + exceptions. + + *follow_links* (:class:`bool` or :data:`None`) optionally is whether + to walk symbolic links that resolve to directories. Default is + :data:`None` for :data:`True`. + + Raises :exc:`RecursionError` if recursion is detected. + + Returns an :class:`~collections.abc.Iterable` yielding the path to + each file (:class:`str`) relative to *root*. + """ + if on_error is not None and not callable(on_error): + raise TypeError("on_error:{!r} is not callable.".format(on_error)) + + if follow_links is None: + follow_links = True + + for entry in _iter_tree_entries_next(os.path.abspath(root), '', {}, on_error, follow_links): + if not entry.is_dir(follow_links): + yield entry.path + + +# Alias `iter_tree_files()` as `iter_tree()`. +iter_tree = iter_tree_files + + +def _iter_tree_entries_next(root_full, dir_rel, memo, on_error, follow_links): + """ + Scan the directory for all descendant files. + + *root_full* (:class:`str`) the absolute path to the root directory. + + *dir_rel* (:class:`str`) the path to the directory to scan relative to + *root_full*. + + *memo* (:class:`dict`) keeps track of ancestor directories + encountered. Maps each ancestor real path (:class:`str`) to relative + path (:class:`str`). + + *on_error* (:class:`~collections.abc.Callable` or :data:`None`) + optionally is the error handler for file-system exceptions. + + *follow_links* (:class:`bool`) is whether to walk symbolic links that + resolve to directories. + + Yields each entry (:class:`.TreeEntry`). + """ + dir_full = os.path.join(root_full, dir_rel) + dir_real = os.path.realpath(dir_full) + + # Remember each encountered ancestor directory and its canonical + # (real) path. If a canonical path is encountered more than once, + # recursion has occurred. + if dir_real not in memo: + memo[dir_real] = dir_rel + else: + raise RecursionError(real_path=dir_real, first_path=memo[dir_real], second_path=dir_rel) + + for node_name in os.listdir(dir_full): + node_rel = os.path.join(dir_rel, node_name) + node_full = os.path.join(root_full, node_rel) + + # Inspect child node. + try: + node_lstat = os.lstat(node_full) + except OSError as e: + if on_error is not None: + on_error(e) + continue + + if stat.S_ISLNK(node_lstat.st_mode): + # Child node is a link, inspect the target node. + is_link = True + try: + node_stat = os.stat(node_full) + except OSError as e: + if on_error is not None: + on_error(e) + continue + else: + is_link = False + node_stat = node_lstat + + if stat.S_ISDIR(node_stat.st_mode) and (follow_links or not is_link): + # Child node is a directory, recurse into it and yield its + # descendant files. + yield TreeEntry(node_name, node_rel, node_lstat, node_stat) + + for entry in _iter_tree_entries_next(root_full, node_rel, memo, on_error, follow_links): + yield entry + + elif stat.S_ISREG(node_stat.st_mode) or is_link: + # Child node is either a file or an unfollowed link, yield it. + yield TreeEntry(node_name, node_rel, node_lstat, node_stat) + + # NOTE: Make sure to remove the canonical (real) path of the directory + # from the ancestors memo once we are done with it. This allows the + # same directory to appear multiple times. If this is not done, the + # second occurrence of the directory will be incorrectly interpreted + # as a recursion. See . + del memo[dir_real] + + +def lookup_pattern(name): + """ + Lookups a registered pattern factory by name. + + *name* (:class:`str`) is the name of the pattern factory. + + Returns the registered pattern factory (:class:`~collections.abc.Callable`). + If no pattern factory is registered, raises :exc:`KeyError`. + """ + return _registered_patterns[name] + + +def match_file(patterns, file): + """ + Matches the file to the patterns. + + *patterns* (:class:`~collections.abc.Iterable` of :class:`~pathspec.pattern.Pattern`) + contains the patterns to use. + + *file* (:class:`str`) is the normalized file path to be matched + against *patterns*. + + Returns :data:`True` if *file* matched; otherwise, :data:`False`. + """ + matched = False + for pattern in patterns: + if pattern.include is not None: + if file in pattern.match((file,)): + matched = pattern.include + return matched + + +def match_files(patterns, files): + """ + Matches the files to the patterns. + + *patterns* (:class:`~collections.abc.Iterable` of :class:`~pathspec.pattern.Pattern`) + contains the patterns to use. + + *files* (:class:`~collections.abc.Iterable` of :class:`str`) contains + the normalized file paths to be matched against *patterns*. + + Returns the matched files (:class:`set` of :class:`str`). + """ + all_files = files if isinstance(files, Collection) else list(files) + return_files = set() + for pattern in patterns: + if pattern.include is not None: + result_files = pattern.match(all_files) + if pattern.include: + return_files.update(result_files) + else: + return_files.difference_update(result_files) + return return_files + + +def _normalize_entries(entries, separators=None): + """ + Normalizes the entry paths to use the POSIX path separator. + + *entries* (:class:`~collections.abc.Iterable` of :class:`.TreeEntry`) + contains the entries to be normalized. + + *separators* (:class:`~collections.abc.Collection` of :class:`str`; or + :data:`None`) optionally contains the path separators to normalize. + See :func:`normalize_file` for more information. + + Returns a :class:`dict` mapping the each normalized file path (:class:`str`) + to the entry (:class:`.TreeEntry`) + """ + norm_files = {} + for entry in entries: + norm_files[normalize_file(entry.path, separators=separators)] = entry + return norm_files + + +def normalize_file(file, separators=None): + """ + Normalizes the file path to use the POSIX path separator (i.e., ``'/'``). + + *file* (:class:`str` or :class:`pathlib.PurePath`) is the file path. + + *separators* (:class:`~collections.abc.Collection` of :class:`str`; or + :data:`None`) optionally contains the path separators to normalize. + This does not need to include the POSIX path separator (``'/'``), but + including it will not affect the results. Default is :data:`None` for + :data:`NORMALIZE_PATH_SEPS`. To prevent normalization, pass an empty + container (e.g., an empty tuple ``()``). + + Returns the normalized file path (:class:`str`). + """ + # Normalize path separators. + if separators is None: + separators = NORMALIZE_PATH_SEPS + + # Convert path object to string. + norm_file = str(file) + + for sep in separators: + norm_file = norm_file.replace(sep, posixpath.sep) + + # Remove current directory prefix. + if norm_file.startswith('./'): + norm_file = norm_file[2:] + + return norm_file + + +def normalize_files(files, separators=None): + """ + Normalizes the file paths to use the POSIX path separator. + + *files* (:class:`~collections.abc.Iterable` of :class:`str` or + :class:`pathlib.PurePath`) contains the file paths to be normalized. + + *separators* (:class:`~collections.abc.Collection` of :class:`str`; or + :data:`None`) optionally contains the path separators to normalize. + See :func:`normalize_file` for more information. + + Returns a :class:`dict` mapping the each normalized file path (:class:`str`) + to the original file path (:class:`str`) + """ + norm_files = {} + for path in files: + norm_files[normalize_file(path, separators=separators)] = path + return norm_files + + +def register_pattern(name, pattern_factory, override=None): + """ + Registers the specified pattern factory. + + *name* (:class:`str`) is the name to register the pattern factory + under. + + *pattern_factory* (:class:`~collections.abc.Callable`) is used to + compile patterns. It must accept an uncompiled pattern (:class:`str`) + and return the compiled pattern (:class:`.Pattern`). + + *override* (:class:`bool` or :data:`None`) optionally is whether to + allow overriding an already registered pattern under the same name + (:data:`True`), instead of raising an :exc:`AlreadyRegisteredError` + (:data:`False`). Default is :data:`None` for :data:`False`. + """ + if not isinstance(name, string_types): + raise TypeError("name:{!r} is not a string.".format(name)) + if not callable(pattern_factory): + raise TypeError("pattern_factory:{!r} is not callable.".format(pattern_factory)) + if name in _registered_patterns and not override: + raise AlreadyRegisteredError(name, _registered_patterns[name]) + _registered_patterns[name] = pattern_factory + + +class AlreadyRegisteredError(Exception): + """ + The :exc:`AlreadyRegisteredError` exception is raised when a pattern + factory is registered under a name already in use. + """ + + def __init__(self, name, pattern_factory): + """ + Initializes the :exc:`AlreadyRegisteredError` instance. + + *name* (:class:`str`) is the name of the registered pattern. + + *pattern_factory* (:class:`~collections.abc.Callable`) is the + registered pattern factory. + """ + super(AlreadyRegisteredError, self).__init__(name, pattern_factory) + + @property + def message(self): + """ + *message* (:class:`str`) is the error message. + """ + return "{name!r} is already registered for pattern factory:{pattern_factory!r}.".format( + name=self.name, + pattern_factory=self.pattern_factory, + ) + + @property + def name(self): + """ + *name* (:class:`str`) is the name of the registered pattern. + """ + return self.args[0] + + @property + def pattern_factory(self): + """ + *pattern_factory* (:class:`~collections.abc.Callable`) is the + registered pattern factory. + """ + return self.args[1] + + +class RecursionError(Exception): + """ + The :exc:`RecursionError` exception is raised when recursion is + detected. + """ + + def __init__(self, real_path, first_path, second_path): + """ + Initializes the :exc:`RecursionError` instance. + + *real_path* (:class:`str`) is the real path that recursion was + encountered on. + + *first_path* (:class:`str`) is the first path encountered for + *real_path*. + + *second_path* (:class:`str`) is the second path encountered for + *real_path*. + """ + super(RecursionError, self).__init__(real_path, first_path, second_path) + + @property + def first_path(self): + """ + *first_path* (:class:`str`) is the first path encountered for + :attr:`self.real_path `. + """ + return self.args[1] + + @property + def message(self): + """ + *message* (:class:`str`) is the error message. + """ + return "Real path {real!r} was encountered at {first!r} and then {second!r}.".format( + real=self.real_path, + first=self.first_path, + second=self.second_path, + ) + + @property + def real_path(self): + """ + *real_path* (:class:`str`) is the real path that recursion was + encountered on. + """ + return self.args[0] + + @property + def second_path(self): + """ + *second_path* (:class:`str`) is the second path encountered for + :attr:`self.real_path `. + """ + return self.args[2] + + +class MatchDetail(object): + """ + The :class:`.MatchDetail` class contains information about + """ + + #: Make the class dict-less. + __slots__ = ('patterns',) + + def __init__(self, patterns): + """ + Initialize the :class:`.MatchDetail` instance. + + *patterns* (:class:`~collections.abc.Sequence` of :class:`~pathspec.pattern.Pattern`) + contains the patterns that matched the file in the order they were + encountered. + """ + + self.patterns = patterns + """ + *patterns* (:class:`~collections.abc.Sequence` of :class:`~pathspec.pattern.Pattern`) + contains the patterns that matched the file in the order they were + encountered. + """ + + +class TreeEntry(object): + """ + The :class:`.TreeEntry` class contains information about a file-system + entry. + """ + + #: Make the class dict-less. + __slots__ = ('_lstat', 'name', 'path', '_stat') + + def __init__(self, name, path, lstat, stat): + """ + Initialize the :class:`.TreeEntry` instance. + + *name* (:class:`str`) is the base name of the entry. + + *path* (:class:`str`) is the relative path of the entry. + + *lstat* (:class:`~os.stat_result`) is the stat result of the direct + entry. + + *stat* (:class:`~os.stat_result`) is the stat result of the entry, + potentially linked. + """ + + self._lstat = lstat + """ + *_lstat* (:class:`~os.stat_result`) is the stat result of the direct + entry. + """ + + self.name = name + """ + *name* (:class:`str`) is the base name of the entry. + """ + + self.path = path + """ + *path* (:class:`str`) is the path of the entry. + """ + + self._stat = stat + """ + *_stat* (:class:`~os.stat_result`) is the stat result of the linked + entry. + """ + + def is_dir(self, follow_links=None): + """ + Get whether the entry is a directory. + + *follow_links* (:class:`bool` or :data:`None`) is whether to follow + symbolic links. If this is :data:`True`, a symlink to a directory + will result in :data:`True`. Default is :data:`None` for :data:`True`. + + Returns whether the entry is a directory (:class:`bool`). + """ + if follow_links is None: + follow_links = True + + node_stat = self._stat if follow_links else self._lstat + return stat.S_ISDIR(node_stat.st_mode) + + def is_file(self, follow_links=None): + """ + Get whether the entry is a regular file. + + *follow_links* (:class:`bool` or :data:`None`) is whether to follow + symbolic links. If this is :data:`True`, a symlink to a regular file + will result in :data:`True`. Default is :data:`None` for :data:`True`. + + Returns whether the entry is a regular file (:class:`bool`). + """ + if follow_links is None: + follow_links = True + + node_stat = self._stat if follow_links else self._lstat + return stat.S_ISREG(node_stat.st_mode) + + def is_symlink(self): + """ + Returns whether the entry is a symbolic link (:class:`bool`). + """ + return stat.S_ISLNK(self._lstat.st_mode) + + def stat(self, follow_links=None): + """ + Get the cached stat result for the entry. + + *follow_links* (:class:`bool` or :data:`None`) is whether to follow + symbolic links. If this is :data:`True`, the stat result of the + linked file will be returned. Default is :data:`None` for :data:`True`. + + Returns that stat result (:class:`~os.stat_result`). + """ + if follow_links is None: + follow_links = True + + return self._stat if follow_links else self._lstat diff --git a/python/ray/tests/test_runtime_env.py b/python/ray/tests/test_runtime_env.py index fa6b96ec5b794..bcc284a877dbe 100644 --- a/python/ray/tests/test_runtime_env.py +++ b/python/ray/tests/test_runtime_env.py @@ -2,7 +2,7 @@ import pytest import sys import unittest - +import random import tempfile from pathlib import Path import ray @@ -79,6 +79,13 @@ def one(self): """ +def create_file(p): + if not p.parent.exists(): + p.parent.mkdir() + with p.open("w") as f: + f.write("Test") + + @pytest.fixture(scope="function") def working_dir(): with tempfile.TemporaryDirectory() as tmp_dir: @@ -111,6 +118,69 @@ def start_client_server(cluster, client_mode): return ("localhost:10003", {"USE_RAY_CLIENT": "1"}, PKG_DIR) +@unittest.skipIf(sys.platform == "win32", "Fail to create temp dir.") +def test_travel(): + import uuid + with tempfile.TemporaryDirectory() as tmp_dir: + dir_paths = set() + file_paths = set() + item_num = 0 + excludes = [] + root = Path(tmp_dir) / "test" + + def construct(path, excluded=False, depth=0): + nonlocal item_num + path.mkdir(parents=True) + if not excluded: + dir_paths.add(str(path)) + if depth > 8: + return + if item_num > 500: + return + dir_num = random.randint(0, 10) + file_num = random.randint(0, 10) + for _ in range(dir_num): + uid = str(uuid.uuid4()).split("-")[0] + dir_path = path / uid + exclud_sub = random.randint(0, 5) == 0 + if not excluded and exclud_sub: + excludes.append(str(dir_path.relative_to(root))) + if not excluded: + construct(dir_path, exclud_sub or excluded, depth + 1) + item_num += 1 + if item_num > 1000: + return + + for _ in range(file_num): + uid = str(uuid.uuid4()).split("-")[0] + with (path / uid).open("w") as f: + v = random.randint(0, 1000) + f.write(str(v)) + if not excluded: + if random.randint(0, 5) == 0: + excludes.append( + str((path / uid).relative_to(root))) + else: + file_paths.add((str(path / uid), str(v))) + item_num += 1 + + construct(root) + exclude_spec = ray._private.runtime_env._get_excludes(root, excludes) + visited_dir_paths = set() + visited_file_paths = set() + + def handler(path): + if path.is_dir(): + visited_dir_paths.add(str(path)) + else: + with open(path) as f: + visited_file_paths.add((str(path), f.read())) + + ray._private.runtime_env._dir_travel(root, [exclude_spec], handler) + assert file_paths == visited_file_paths + assert dir_paths == visited_dir_paths + + """ The following test cases are related with runtime env. It following these steps 1) Creating a temporary dir with fixture working_dir @@ -246,12 +316,6 @@ def test_exclusion(ray_start_cluster_head, working_dir, client_mode): (address, env, PKG_DIR) = start_client_server(cluster, client_mode) working_path = Path(working_dir) - def create_file(p): - if not p.parent.exists(): - p.parent.mkdir() - with p.open("w") as f: - f.write("Test") - create_file(working_path / "tmp_dir" / "test_1") create_file(working_path / "tmp_dir" / "test_2") create_file(working_path / "tmp_dir" / "test_3") @@ -285,23 +349,84 @@ def create_file(p): runtime_env = f"""{{ "working_dir": r"{working_dir}", "excludes": [ - # exclude by absolute path - r"{tmp_dir_test_3}", # exclude by relative path - r"{str(working_path / "test2")}", + r"test2", # exclude by dir - r"{str(working_path / "tmp_dir" / "sub_dir")}", + r"{str(Path("tmp_dir") / "sub_dir")}", # exclude part of the dir - r"{str(working_path / "tmp_dir" / "test_1")}", + r"{str(Path("tmp_dir") / "test_1")}", # exclude part of the dir - r"{str(working_path / "tmp_dir" / "test_2")}", + r"{str(Path("tmp_dir") / "test_2")}", ] }}""" + script = driver_script.format(**locals()) + out = run_string_as_driver(script, env) + assert out.strip().split("\n")[-1] == \ + "Test,FAILED,Test,FAILED,FAILED,Test,FAILED,FAILED" + + +@unittest.skipIf(sys.platform == "win32", "Fail to create temp dir.") +@pytest.mark.parametrize("client_mode", [True, False]) +def test_exclusion_2(ray_start_cluster_head, working_dir, client_mode): + cluster = ray_start_cluster_head + (address, env, PKG_DIR) = start_client_server(cluster, client_mode) + working_path = Path(working_dir) + + def create_file(p): + if not p.parent.exists(): + p.parent.mkdir(parents=True) + with p.open("w") as f: + f.write("Test") + + create_file(working_path / "tmp_dir" / "test_1") + create_file(working_path / "tmp_dir" / "test_2") + create_file(working_path / "tmp_dir" / "test_3") + create_file(working_path / "tmp_dir" / "sub_dir" / "test_1") + create_file(working_path / "tmp_dir" / "sub_dir" / "test_2") + create_file(working_path / "test1") + create_file(working_path / "test2") + create_file(working_path / "test3") + create_file(working_path / "cache" / "test_1") + create_file(working_path / "tmp_dir" / "cache" / "test_1") + create_file(working_path / "another_dir" / "cache" / "test_1") + tmp_dir_test_3 = str((working_path / "tmp_dir" / "test_3").absolute()) + runtime_env = f"""{{ + "working_dir": r"{working_dir}", + }}""" + execute_statement = """ + vals = ray.get([ + check_file.remote('test1'), + check_file.remote('test2'), + check_file.remote('test3'), + check_file.remote(os.path.join('tmp_dir', 'test_1')), + check_file.remote(os.path.join('tmp_dir', 'test_2')), + check_file.remote(os.path.join('tmp_dir', 'test_3')), + check_file.remote(os.path.join('tmp_dir', 'sub_dir', 'test_1')), + check_file.remote(os.path.join('tmp_dir', 'sub_dir', 'test_2')), + check_file.remote(os.path.join("cache", "test_1")), + check_file.remote(os.path.join("tmp_dir", "cache", "test_1")), + check_file.remote(os.path.join("another_dir", "cache", "test_1")), + ]) + print(','.join(vals)) +""" script = driver_script.format(**locals()) out = run_string_as_driver(script, env) # Test it works before assert out.strip().split("\n")[-1] == \ - "Test,FAILED,Test,FAILED,FAILED,FAILED,FAILED,FAILED" + "Test,Test,Test,Test,Test,Test,Test,Test,Test,Test,Test" + with open(f"{working_dir}/.gitignore", "w") as f: + f.write(""" +# Comment +test_[12] +/test1 +!/tmp_dir/sub_dir/test_1 +cache/ +""") + script = driver_script.format(**locals()) + out = run_string_as_driver(script, env) + t = out.strip().split("\n")[-1] + assert out.strip().split("\n")[-1] == \ + "FAILED,Test,Test,FAILED,FAILED,Test,Test,FAILED,FAILED,FAILED,FAILED" @unittest.skipIf(sys.platform == "win32", "Fail to create temp dir.")