Skip to content

Commit

Permalink
feat: Alternative Schema Locations (#1753)
Browse files Browse the repository at this point in the history
- Refactors
  * Migrate from `pkg_resources` to `importlib`. See importlib's migration guide.
    https://importlib-resources.readthedocs.io/en/latest/migration.html
  * Add importlib_resources as core dependency for Pythons older than 3.9.
  * `load_schema` will handle functionality for finding the schema locations
    locally in multiple places.
  * `load_schema` will not handle `version` identification (that is done through
    `validate` only).

- Features
  * Support overriding the paths for finding schemas, using the `pyhf` installed
    location as a base via `pyhf.schema.variables.path`.
  * Add support for offline access to currently supported version of schemas
    (via extra `load_schema` commands).
  * Add `SchemaNotFound` exception for when a schema cannot be found locally
    (outside of `jsonschema.RefResolver` calls)

- Python API
  * `pyhf.schema` introduced as a new API for all things schema-related.
  * `pyhf.schema.version`, `pyhf.schema.path`, `pyhf.schema.load_schema`, and
    `pyhf.schema.validate` are migrated over from `pyhf.utils`.
  • Loading branch information
kratsg authored Mar 23, 2022
1 parent 1884c6c commit 2789d54
Show file tree
Hide file tree
Showing 33 changed files with 719 additions and 128 deletions.
17 changes: 15 additions & 2 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Top-Level
readxml
writexml
compat
schema

Probability Distribution Functions (PDFs)
-----------------------------------------
Expand Down Expand Up @@ -158,6 +159,20 @@ Fits and Tests
intervals.upperlimit
utils.all_pois_floating


Schema
------

.. currentmodule:: pyhf.schema

.. autosummary::
:toctree: _generated/
:nosignatures:

Schema
load_schema
validate

Exceptions
----------

Expand Down Expand Up @@ -194,8 +209,6 @@ Utilities
:toctree: _generated/
:nosignatures:

load_schema
validate
options_from_eqdelimstring
digest
citation
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ install_requires =
jsonschema>=3.0.0 # for utils
jsonpatch>=1.15
pyyaml>=5.1 # for parsing CLI equal-delimited options
importlib_resources>=1.3.0; python_version < "3.9" # for resources in schema

[options.packages.find]
where = src
Expand Down
2 changes: 2 additions & 0 deletions src/pyhf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from pyhf.pdf import Model
from pyhf.workspace import Workspace
from pyhf import schema
from pyhf import simplemodels
from pyhf import infer
from pyhf import compat
Expand All @@ -28,6 +29,7 @@
"patchset",
"pdf",
"probability",
"schema",
"set_backend",
"simplemodels",
"tensor",
Expand Down
6 changes: 6 additions & 0 deletions src/pyhf/exceptions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ class InvalidNameReuse(Exception):
pass


class SchemaNotFound(FileNotFoundError):
"""
SchemaNotFound is raised when a given schema does not exist in the local file system.
"""


class InvalidSpecification(Exception):
"""
InvalidSpecification is raised when a specification does not validate against the given schema.
Expand Down
3 changes: 2 additions & 1 deletion src/pyhf/patchset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import jsonpatch
from pyhf import exceptions
from pyhf import utils
from pyhf import schema
from pyhf.workspace import Workspace

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -164,7 +165,7 @@ def __init__(self, spec, **config_kwargs):

# run jsonschema validation of input specification against the (provided) schema
log.info(f"Validating spec against schema: {self.schema}")
utils.validate(spec, self.schema, version=self._version)
schema.validate(spec, self.schema, version=self._version)

# set properties based on metadata
self._metadata = spec['metadata']
Expand Down
4 changes: 2 additions & 2 deletions src/pyhf/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pyhf
from pyhf.tensor.manager import get_backend
from pyhf import exceptions
from pyhf import utils
from pyhf import schema
from pyhf import events
from pyhf import probability as prob
from pyhf.constraints import gaussian_constraint_combined, poisson_constraint_combined
Expand Down Expand Up @@ -666,7 +666,7 @@ def __init__(
# run jsonschema validation of input specification against the (provided) schema
if validate:
log.info(f"Validating spec against schema: {self.schema:s}")
utils.validate(self.spec, self.schema, version=self.version)
schema.validate(self.spec, self.schema, version=self.version)
# build up our representation of the specification
poi_name = config_kwargs.pop('poi_name', 'mu')
self.config = _ModelConfig(self.spec, **config_kwargs)
Expand Down
6 changes: 3 additions & 3 deletions src/pyhf/readxml.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pyhf import utils
from pyhf import schema
from pyhf import compat

import logging
Expand Down Expand Up @@ -364,9 +364,9 @@ def parse(configfile, rootdir, track_progress=False):
{'name': channel_name, 'data': channel_spec['data']}
for channel_name, channel_spec in channels.items()
],
'version': utils.SCHEMA_VERSION,
'version': schema.version,
}
utils.validate(result, 'workspace.json')
schema.validate(result, 'workspace.json')

return result

Expand Down
68 changes: 68 additions & 0 deletions src/pyhf/schema/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""
See :class:`~pyhf.schema.Schema` for documentation.
"""
import pathlib
import sys
from pyhf.schema.loader import load_schema
from pyhf.schema.validator import validate
from pyhf.schema import variables

__all__ = [
"load_schema",
"validate",
"path",
"version",
]


def __dir__():
return __all__


class Schema(sys.modules[__name__].__class__):
"""
A module-level wrapper around :mod:`pyhf.schema` which will provide additional functionality for interacting with schemas.
Example:
>>> import pyhf.schema
>>> import pathlib
>>> curr_path = pyhf.schema.path
>>> curr_path # doctest: +ELLIPSIS
PosixPath('.../pyhf/schemas')
>>> pyhf.schema(pathlib.Path('/home/root/my/new/path'))
>>> pyhf.schema.path
PosixPath('/home/root/my/new/path')
>>> pyhf.schema(curr_path)
>>> pyhf.schema.path # doctest: +ELLIPSIS
PosixPath('.../pyhf/schemas')
"""

def __call__(self, new_path: pathlib.Path):
"""
Change the local search path for finding schemas locally.
Args:
new_path (pathlib.Path): Path to folder containing the schemas
Returns:
None
"""
variables.schemas = new_path

@property
def path(self):
"""
The local path for schemas.
"""
return variables.schemas

@property
def version(self):
"""
The default version used for finding schemas.
"""
return variables.SCHEMA_VERSION


sys.modules[__name__].__class__ = Schema
41 changes: 41 additions & 0 deletions src/pyhf/schema/loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from pathlib import Path
import sys
import json
import pyhf.exceptions
from pyhf.schema import variables

# importlib.resources.as_file wasn't added until Python 3.9
# c.f. https://docs.python.org/3.9/library/importlib.html#importlib.resources.as_file
if sys.version_info >= (3, 9):
from importlib import resources
else:
import importlib_resources as resources


def load_schema(schema_id: str):
"""
Get a schema by relative path from cache, or load it into the cache and return.
Args:
schema_id (str): Relative path to schema from :attr:`pyhf.schema.path`
Returns:
schema (dict): The loaded schema.
"""
try:
return variables.SCHEMA_CACHE[
f'{Path(variables.SCHEMA_BASE).joinpath(schema_id)}'
]
except KeyError:
pass

ref = variables.schemas.joinpath(schema_id)
with resources.as_file(ref) as path:
if not path.exists():
raise pyhf.exceptions.SchemaNotFound(
f'The schema {schema_id} was not found. Do you have the right version or the right path? {path}'
)
with path.open() as json_schema:
schema = json.load(json_schema)
variables.SCHEMA_CACHE[schema['$id']] = schema
return variables.SCHEMA_CACHE[schema['$id']]
41 changes: 41 additions & 0 deletions src/pyhf/schema/validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import jsonschema
import pyhf.exceptions
from pyhf.schema.loader import load_schema
from pyhf.schema import variables
from typing import Union


def validate(spec: dict, schema_name: str, version: Union[str, None] = None):
"""
Validate a provided specification against a schema.
Args:
spec (dict): The specification to validate.
schema_name (str): The name of the schema to use.
version (None or str): The version to use if not the default from :attr:`pyhf.schema.version`.
Returns:
None: schema validated fine
Raises:
pyhf.exceptions.InvalidSpecification: the specification is invalid
"""

version = version or variables.SCHEMA_VERSION

schema = load_schema(f'{version}/{schema_name}')

# note: trailing slash needed for RefResolver to resolve correctly
resolver = jsonschema.RefResolver(
base_uri=f"file://{variables.schemas}/",
referrer=f"{version}/{schema_name}",
store=variables.SCHEMA_CACHE,
)
validator = jsonschema.Draft6Validator(
schema, resolver=resolver, format_checker=None
)

try:
return validator.validate(spec)
except jsonschema.ValidationError as err:
raise pyhf.exceptions.InvalidSpecification(err, schema_name)
13 changes: 13 additions & 0 deletions src/pyhf/schema/variables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import sys

# importlib.resources.as_file wasn't added until Python 3.9
# c.f. https://docs.python.org/3.9/library/importlib.html#importlib.resources.as_file
if sys.version_info >= (3, 9):
from importlib import resources
else:
import importlib_resources as resources
schemas = resources.files('pyhf') / "schemas"

SCHEMA_CACHE = {}
SCHEMA_BASE = "https://scikit-hep.org/pyhf/schemas/"
SCHEMA_VERSION = '1.0.0'
65 changes: 10 additions & 55 deletions src/pyhf/utils.py
Original file line number Diff line number Diff line change
@@ -1,69 +1,29 @@
import json
import jsonschema
import pkg_resources
from pathlib import Path
import yaml
import click
import hashlib

from pyhf.exceptions import InvalidSpecification
import sys

SCHEMA_CACHE = {}
SCHEMA_BASE = "https://scikit-hep.org/pyhf/schemas/"
SCHEMA_VERSION = '1.0.0'
# importlib.resources.as_file wasn't added until Python 3.9
# c.f. https://docs.python.org/3.9/library/importlib.html#importlib.resources.as_file
if sys.version_info >= (3, 9):
from importlib import resources
else:
import importlib_resources as resources

__all__ = [
"EqDelimStringParamType",
"citation",
"digest",
"load_schema",
"options_from_eqdelimstring",
"validate",
]


def __dir__():
return __all__


def load_schema(schema_id, version=None):
global SCHEMA_CACHE
if not version:
version = SCHEMA_VERSION
try:
return SCHEMA_CACHE[f'{SCHEMA_BASE}{Path(version).joinpath(schema_id)}']
except KeyError:
pass

path = pkg_resources.resource_filename(
__name__, str(Path('schemas').joinpath(version, schema_id))
)
with open(path) as json_schema:
schema = json.load(json_schema)
SCHEMA_CACHE[schema['$id']] = schema
return SCHEMA_CACHE[schema['$id']]


# load the defs.json as it is included by $ref
load_schema('defs.json')


def validate(spec, schema_name, version=None):
schema = load_schema(schema_name, version=version)
try:
resolver = jsonschema.RefResolver(
base_uri=f"file://{pkg_resources.resource_filename(__name__, 'schemas/'):s}",
referrer=schema_name,
store=SCHEMA_CACHE,
)
validator = jsonschema.Draft6Validator(
schema, resolver=resolver, format_checker=None
)
return validator.validate(spec)
except jsonschema.ValidationError as err:
raise InvalidSpecification(err, schema_name)


def options_from_eqdelimstring(opts):
document = '\n'.join(
f"{opt.split('=', 1)[0]}: {opt.split('=', 1)[1]}" for opt in opts
Expand Down Expand Up @@ -140,14 +100,9 @@ def citation(oneline=False):
Returns:
citation (:obj:`str`): The citation for this software
"""
path = Path(
pkg_resources.resource_filename(
__name__, str(Path('data').joinpath('citation.bib'))
)
)
with path.open() as fp:
# remove end-of-file newline if there is one
data = fp.read().strip()
ref = resources.files('pyhf') / 'data' / 'citation.bib'
with resources.as_file(ref) as path:
data = path.read_text().strip()

if oneline:
data = ''.join(data.splitlines())
Expand Down
Loading

0 comments on commit 2789d54

Please sign in to comment.