diff --git a/.coveragerc b/.coveragerc index bcd564e..2a5ecd1 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,5 +1,5 @@ [run] -include = */timefhuman/* +include = timefhuman/* omit = tests/* [report] diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 8720994..0000000 --- a/.travis.yml +++ /dev/null @@ -1,15 +0,0 @@ -language: python - -sudo: false - -python: - - "3.6.2" - -install: - - python setup.py install - -script: - - py.test --cov=timefhuman - -after_success: - - CI=true TRAVIS=true coveralls diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..be86dd9 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include README.md +include LICENSE +include timefhuman/grammar.lark \ No newline at end of file diff --git a/Makefile b/Makefile deleted file mode 100644 index 1d2a863..0000000 --- a/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -coverage: - py.test --cov - coverage html - open htmlcov/index.html diff --git a/README.md b/README.md index b30dd7c..38b5b70 100644 --- a/README.md +++ b/README.md @@ -1,116 +1,183 @@ # timefhuman +[![PyPi Downloads per Month](https://img.shields.io/pypi/dm/timefhuman.svg)](https://pypi.python.org/pypi/timefhuman/) [![Coverage Status](https://coveralls.io/repos/github/alvinwan/timefhuman/badge.svg?branch=master)](https://coveralls.io/github/alvinwan/timefhuman?branch=master) -[![Build Status](https://travis-ci.org/alvinwan/timefhuman.svg?branch=master)](https://travis-ci.org/alvinwan/timefhuman) -Convert human-readable, date-like strings written in natural language to Python objects. Describe specific datetimes or ranges of datetimes. +Extract datetimes, datetime ranges, and datetime lists from natural language text. Supports Python3+[^1] -To start, describe days of the week or times of day in the vernacular. +[^1]: https://github.com/alvinwan/timefhuman/issues/3 +---- + +## Getting Started + +Install with pip using + +```python +pip install timefhuman ``` + +Then, find natural language dates and times in any text. + +```python >>> from timefhuman import timefhuman ->>> timefhuman('upcoming Monday noon') -datetime.datetime(2018, 8, 6, 12, 0) + +>>> timefhuman("How does 5p mon sound? Or maybe 4p tu?") +[datetime.datetime(2018, 8, 6, 17, 0), datetime.datetime(2018, 8, 7, 16, 0)] ``` -Use any human-readable format with a time range, choices of times, or choices of time ranges. +The text can contain not only datetimes but also ranges of datetimes or lists of datetimes. -``` ->>> timefhuman('7/17 3-4 PM') +```python +>>> timefhuman('3p-4p') # time range (datetime.datetime(2018, 7, 17, 15, 0), datetime.datetime(2018, 7, 17, 16, 0)) ->>> timefhuman('7/17 3 p.m. - 4 p.m.') -(datetime.datetime(2018, 7, 17, 15, 30), datetime.datetime(2018, 7, 17, 16, 0)) ->>> timefhuman('Monday 3 pm or Tu noon') + +>>> timefhuman('7/17 4PM to 7/17 5PM') # range of datetimes +(datetime.datetime(2018, 7, 17, 16, 0), datetime.datetime(2018, 7, 17, 17, 0)) + +>>> timefhuman('Monday 3 pm or Tu noon') # list of datetimes [datetime.datetime(2018, 8, 6, 15, 0), datetime.datetime(2018, 8, 7, 12, 0)] ->>> timefhuman('7/17 4 or 5 PM') -[datetime.datetime(2018, 7, 17, 16, 0), datetime.datetime(2018, 7, 17, 17, 0)] ->>> timefhuman('7/17 4-5 or 5-6 PM') + +>>> timefhuman('7/17 4-5 or 5-6 PM') # list of ranges of datetimes! [(datetime.datetime(2018, 7, 17, 16, 0), datetime.datetime(2018, 7, 17, 17, 0)), (datetime.datetime(2018, 7, 17, 17, 0), datetime.datetime(2018, 7, 17, 18, 0))] ``` -Parse lists of dates and times with more complex relationships. +Durations are also supported. -``` ->>> timefhuman('7/17, 7/18, 7/19 at 2') -[datetime.datetime(2018, 7, 17, 2, 0), datetime.datetime(2018, 7, 18, 2, 0), datetime.datetime(2018, 7, 19, 2, 0)] ->>> timefhuman('2 PM on 7/17 or 7/19') -[datetime.datetime(2018, 7, 17, 14, 0), datetime.datetime(2018, 7, 19, 14, 0)] -``` +```python +>>> timefhuman('30 minutes') # duration +datetime.timedelta(seconds=1800) -Use the vernacular to describe ranges or days. +>>> timefhuman('30-40 mins') # range of durations +(datetime.timedelta(seconds=1800), datetime.timedelta(seconds=2400)) +>>> timefhuman('30 or 40m') # list of durations +[datetime.timedelta(seconds=1800), datetime.timedelta(seconds=2400)] ``` ->>> timefhuman('noon next week') # coming soon ->>> timefhuman('today or tomorrow noon') # when run on August 4, 2018 -[datetime.datetime(2018, 8, 4, 12, 0), datetime.datetime(2018, 8, 5, 12, 0)] -``` +When possible, timefhuman will infer any missing information, using context from other datetimes. -# Installation +```python +>>> timefhuman('3-4p') # infer "PM" for "3" +(datetime.datetime(2018, 7, 17, 15, 0), datetime.datetime(2018, 7, 17, 16, 0)) -Install with pip using +>>> timefhuman('7/17 4 or 5 PM') # infer "PM" for "4" and infer "7/17" for "5 PM" +[datetime.datetime(2018, 7, 17, 16, 0), datetime.datetime(2018, 7, 17, 17, 0)] +>>> timefhuman('7/17, 7/18, 7/19 at 9') # infer "9a" for "7/17", "7/18" +[datetime.datetime(2018, 7, 17, 9, 0), datetime.datetime(2018, 7, 18, 9, 0), + datetime.datetime(2018, 7, 19, 9, 0)] + +>>> timefhuman('3p -4p PDT') # infer timezone "PDT" for "3p" +(datetime.datetime(2018, 8, 4, 15, 0, tzinfo=pytz.timezone('US/Pacific')), + datetime.datetime(2018, 8, 4, 16, 0, tzinfo=pytz.timezone('US/Pacific'))) ``` -pip install timefhuman + +You can also use natural language descriptions of dates and times. + +```python +>>> timefhuman('next Monday') +datetime.datetime(2018, 8, 6, 0, 0) + +>>> timefhuman('next next Monday') +datetime.datetime(2018, 8, 13, 0, 0) + +>>> timefhuman('last Wednesday of December') +datetime.datetime(2018, 12, 26, 0, 0) + +>>> timefhuman('afternoon') +datetime.datetime(2018, 8, 4, 15, 0) ``` -Optionally, clone the repository and run `python setup.py install`. +See more examples in [`tests/test_e2e.py`](tests/test_e2e.py). -# Usage +## Advanced Usage -Use the `now` kwarg to use different default values for the parser. +For more configuration options, simply create a `tfhConfig` object. +```python +from timefhuman import tfhConfig +config = tfhConfig() ``` ->>> import datetime ->>> now = datetime.datetime(2018, 8, 4, 0, 0) ->>> timefhuman('upcoming Monday noon', now=now) -datetime.datetime(2018, 8, 6, 12, 0) + +**Return matched text**: You can additionally grab the matched text from the input string. This is useful for modifying the input string, for example. + +```python +>>> config = tfhConfig(return_matched_text=True) + +>>> timefhuman('We could maybe do 3 PM, if you still have time', config=config) +[('3 PM', datetime.datetime(2018, 8, 4, 15, 0))] ``` -Use a variety of different formats, even with days of the week, months, and times with everyday speech. These are structured formats. [`dateparser`](https://github.com/scrapinghub/dateparser) supports structured formats across languages, customs etc. +**Change 'Now'**: You can configure the default date that timefhuman uses to fill in missing information. This would be useful if you're extracting dates from an email sent a year ago. +```python +>>> config = tfhConfig(now=datetime.datetime(2018, 8, 4, 0, 0)) + +>>> timefhuman('upcoming Monday noon', config=config) +datetime.datetime(2018, 8, 6, 12, 0) ``` ->>> from timefhuman import timefhuman ->>> now = datetime.datetime(year=2018, month=7, day=7) ->>> timefhuman('July 17, 2018 at 3p.m.') -datetime.datetime(2018, 7, 17, 15, 0) ->>> timefhuman('July 17, 2018 3 p.m.') -datetime.datetime(2018, 7, 17, 15, 0) ->>> timefhuman('3PM on July 17', now=now) -datetime.datetime(2018, 7, 17, 15, 0) ->>> timefhuman('July 17 at 3') -datetime.datetime(2018, 7, 17, 3, 0) ->>> timefhuman('7/17/18 3:00 p.m.') -datetime.datetime(2018, 7, 17, 15, 0) + +**Use explicit information only**: Say you only want to extract *dates* OR *times*. You don't want the library to infer information. You can disable most inference by setting `infer_datetimes=False`. Instead of always returning a datetime, timefhuman will be able to return date or time objects, depending on what's provided. + +```python +>>> config = tfhConfig(infer_datetimes=False) + +>>> timefhuman('3 PM', config=config) +datetime.time(15, 0) + +>>> timefhuman('12/18/18', config=config) +datetime.date(2018, 12, 18) ``` -# Why +**Past datetimes**: By default, datetimes are assumed to occur in the future, so if "3pm" today has already passed, the returned datetime will be for *tomorrow*. However, if datetimes are assumed to have occurred in the past (e.g., from an old letter, talking about past events), you can configure the direction. -[`dateparser`](https://github.com/scrapinghub/dateparser) is the current king of human-readable-date parsing--it supports most common structured dates by trying each one sequentially ([see code](https://github.com/scrapinghub/dateparser/blob/a01a4d2071a8f1d4b368543e5e09cde5eb880799/dateparser/date.py#L220)). However, this isn't optimal for understanding natural language: +```python +>>> from timefhuman import Direction +>>> config = tfhConfig(direction=Direction.previous) +>>> timefhuman('3PM') # the default +datetime.datetime(2018, 8, 5, 15, 0) + +>>> timefhuman('3PM', config=config) # changing direction +datetime.datetime(2018, 8, 4, 15, 0) ``` ->>> import dateparser ->>> dateparser.parse("7/7/18 3 p.m.") # yay! -datetime.datetime(2018, 7, 7, 15, 0) ->>> dateparser.parse("7/7/18 at 3") # :( ->>> dateparser.parse("7/17 12 PM") # yay! -datetime.datetime(2018, 7, 7, 12, 0) ->>> dateparser.parse("7/17/18 noon") # :( ->>> dateparser.parse("7/18 3-4 p.m.") # :((((( Parsed July 18 3-4 p.m. as July 3 4 p.m. -datetime.datetime(2018, 7, 3, 16, 0) + +Here is the full set of supported configuration options: + +```python +@dataclass +class tfhConfig: + # Default to the next valid datetime or the previous one + direction: Direction = Direction.next + + # Always produce datetime objects. If no date, use the current date. If no time, use midnight. + infer_datetimes: bool = True + + # The 'current' datetime, used if infer_datetimes is True + now: datetime = datetime.now() + + # Return the matched text from the input string + return_matched_text: bool = False + + # Return a single object instead of a list when there's only one match + return_single_object: bool = True ``` -To remedy this, we can replace "noon" with "12 p.m.", "next Monday" with "7/17/18", "Tu" with "Tuesday" etc. and pass the cleaned string to `dateparser`. However, consider the number of ways we can say "next Monday at 12 p.m.". Ignoring synonyms, we have a number of different grammars to express this: +## Development -- 12 p.m. on Monday -- first Monday of August 12 p.m. -- next week Monday noon +Install the development version. -This issue compounds when you consider listing noontimes for several different days. +```shell +$ pip install .e .[test] # for bash +$ pip install -e .\[test\] # for zsh +``` -- first half of next week at noon -- 12 p.m. on Monday Tuesday or Wednesday -- early next week midday +To run tests and simultaneously generate a coverage report, use the following commands: -The permutations--even the possible *combinations*--are endless. Instead of enumerating each permutation, `timefhuman` extracts tokens: "anytime" modifies the type from 'date' to 'range', "next week" shifts the range by 7 days, "p.m." means the string right before is a time or a time range etc. Each set of tokens is then combined to produce datetimes, datetime ranges, or datetime lists. This then allows `timefhuman` to handle any permutation of these modifiers. Said another way: `timefhuman` aims to parse *unstructured* dates, written in natural language. +```shell +$ py.test --cov +$ coverage html +$ open htmlcov/index.html +``` diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index df3eb51..0000000 --- a/pytest.ini +++ /dev/null @@ -1,2 +0,0 @@ -[pytest] -addopts = --doctest-modules diff --git a/setup.py b/setup.py index deb00a4..4ea1f4d 100644 --- a/setup.py +++ b/setup.py @@ -2,8 +2,8 @@ from setuptools import setup from setuptools.command.test import test as TestCommand -tests_require = ['pytest==3.7.1', 'pytest-cov==2.5.1', 'coverage==4.5.1', 'coveralls==1.3.0'] -install_requires = [] +tests_require = ['pytest==8.3.4', 'pytest-cov==6.0.0', 'coverage==7.6.10', 'coveralls==4.0.1'] +install_requires = ['lark==1.2.2', 'babel==2.16.0', 'pytz==2024.2', 'python-dateutil==2.9.0.post0'] class PyTest(TestCommand): @@ -24,20 +24,24 @@ def run_tests(self): sys.exit(errno) -VERSION = '0.0.5' +VERSION = '0.1.1' setup( name="timefhuman", version=VERSION, author="Alvin Wan", author_email='hi@alvinwan.com', - description=("Convert natural language date-like string to Python objects"), - license="BSD", + description=("Extract datetimes, datetime ranges, and datetime lists from natural language text"), + long_description=open('README.md', 'r', encoding='utf-8').read(), + long_description_content_type='text/markdown', + license="Apache 2.0", url="https://github.com/alvinwan/timefhuman", packages=['timefhuman'], tests_require=tests_require, - install_requires=install_requires + tests_require, + install_requires=install_requires, download_url='https://github.com/alvinwan/timefhuman/archive/%s.zip' % VERSION, + extras_require={"test":tests_require}, + include_package_data=True, classifiers=[ "Topic :: Utilities", "Intended Audience :: Developers", diff --git a/test.py b/test.py deleted file mode 100644 index 20ad6d5..0000000 --- a/test.py +++ /dev/null @@ -1,3 +0,0 @@ -from timefhuman import timefhuman -print(timefhuman('7/17-7/18')) - diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 62b1f13..25d73a6 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -1,74 +1,180 @@ from timefhuman import timefhuman import datetime import pytest +from timefhuman.main import Direction, tfhConfig +import pytz @pytest.fixture def now(): - return datetime.datetime(year=2018, month=8, day=4) + return datetime.datetime(year=2018, month=8, day=4, hour=14) -def test_main(now): - assert timefhuman('July 17, 2018 at 3p.m.', now) == \ - datetime.datetime(2018, 7, 17, 15, 0) - assert timefhuman('July 17, 2018 3 p.m.', now) == \ - datetime.datetime(2018, 7, 17, 15, 0) - assert timefhuman('3PM on July 17', now) == \ - datetime.datetime(2018, 7, 17, 15, 0) - assert timefhuman('July 17 at 3', now) == \ - datetime.datetime(2018, 7, 17, 3, 0) - assert timefhuman('July 2019', now) == \ - datetime.datetime(2019, 7, 1, 0, 0) - assert timefhuman('7/17/18 3:00 p.m.', now) == \ - datetime.datetime(2018, 7, 17, 15, 0) - - -def test_ambiguity(now): - assert timefhuman('7-17 3-4 p.m.', now) == ( - datetime.datetime(2018, 7, 17, 15, 0), - datetime.datetime(2018, 7, 17, 16, 0) - ) - - -def test_choices(now): - assert timefhuman('7/17 4 or 5 PM', now) == [ - datetime.datetime(2018, 7, 17, 16, 0), - datetime.datetime(2018, 7, 17, 17, 0), - ] - assert timefhuman('7/17 4-5 PM or 5-6 PM') == [ +@pytest.mark.parametrize("test_input, expected", [ + # time only + ('5p', datetime.datetime(2018, 8, 4, 17, 0)), + ('3p EST', datetime.datetime(2018, 8, 4, 15, 0, tzinfo=pytz.timezone('US/Michigan'))), # fixes gh#6 + + # date only + ('July 2019', datetime.datetime(2019, 7, 1, 0, 0)), + ('7-17-18', datetime.datetime(2018, 7, 17, 0, 0)), + ('2018-7-17', datetime.datetime(2018, 7, 17, 0, 0)), # support YMD + ('7/2018', datetime.datetime(2018, 7, 1, 0, 0)), + + # datetimes + ('July 17, 2018 at 3p.m.', datetime.datetime(2018, 7, 17, 15, 0)), + ('July 17, 2018 3 p.m.', datetime.datetime(2018, 7, 17, 15, 0)), + ('3PM on July 17', datetime.datetime(2018, 7, 17, 15, 0)), + ('July 17 at 3', datetime.datetime(2018, 7, 17, 3, 0)), + ('7/17/18 3:00 p.m.', datetime.datetime(2018, 7, 17, 15, 0)), + ('3 p.m. today', datetime.datetime(2018, 8, 4, 15, 0)), + ('Tomorrow 3p', datetime.datetime(2018, 8, 5, 15, 0)), # gh#24 + ('3p tomorrow', datetime.datetime(2018, 8, 5, 15, 0)), + ('July 3rd', datetime.datetime(2018, 7, 3, 0, 0)), + + # date-only ranges + ('7/17-7/18', (datetime.datetime(2018, 7, 17), datetime.datetime(2018, 7, 18))), + ('July 17-18', (datetime.datetime(2018, 7, 17), datetime.datetime(2018, 7, 18))), # distribute month + + # time-only ranges + ('3p -4p', (datetime.datetime(2018, 8, 4, 15, 0), datetime.datetime(2018, 8, 4, 16, 0))), + ('3p -4p PDT', (datetime.datetime(2018, 8, 4, 15, 0, tzinfo=pytz.timezone('US/Pacific')), datetime.datetime(2018, 8, 4, 16, 0, tzinfo=pytz.timezone('US/Pacific')))), + ('6:00 pm - 12:00 am', (datetime.datetime(2018, 8, 4, 18, 0), datetime.datetime(2018, 8, 5, 0, 0))), # gh#8 + ('8/4 6:00 pm - 8/4 12:00 am', (datetime.datetime(2018, 8, 4, 18, 0), datetime.datetime(2018, 8, 4, 0, 0))), # force date, do not infer + + # date and time ranges + ('7/17 3 pm- 7/19 2 pm', (datetime.datetime(2018, 7, 17, 15, 0), datetime.datetime(2018, 7, 19, 14, 0))), + ('Jun 28 5:00 PM - Aug 02 7:00 PM', (datetime.datetime(2018, 6, 28, 17, 0), datetime.datetime(2018, 8, 2, 19, 0))), + ('Jun 28 2019 5:00 PM - Aug 02 2019 7:00 PM', (datetime.datetime(2019, 6, 28, 17, 0), datetime.datetime(2019, 8, 2, 19, 0))), + ('6/28 5:00 PM - 8/02 7:00 PM', (datetime.datetime(2018, 6, 28, 17, 0), datetime.datetime(2018, 8, 2, 19, 0))), + ('6/28/2019 5:00 PM - 8/02/2019 7:00 PM', (datetime.datetime(2019, 6, 28, 17, 0), datetime.datetime(2019, 8, 2, 19, 0))), + + # choices + ('July 4th or 5th at 3PM', [datetime.datetime(2018, 7, 4, 15, 0), datetime.datetime(2018, 7, 5, 15, 0)]), # distribute month and time + ('tomorrow noon,Wed 3 p.m.,Fri 11 AM', [datetime.datetime(2018, 8, 5, 12, 0), datetime.datetime(2018, 8, 8, 15, 0), datetime.datetime(2018, 8, 10, 11, 0)]), # distribute meridiem + # ('2, 3, or 4p tmw', [datetime.datetime(2018, 8, 4, 2, 0), datetime.datetime(2018, 8, 4, 3, 0), datetime.datetime(2018, 8, 4, 4, 0)]), # multiple ambiguous tokens #TODO (check month?) + + # choices of ranges + ('7/17 4-5 PM or 5-6 PM today', [ (datetime.datetime(2018, 7, 17, 16, 0), datetime.datetime(2018, 7, 17, 17, 0)), - (datetime.datetime(2018, 7, 17, 17, 0), datetime.datetime(2018, 7, 17, 18, 0)) - ] - assert timefhuman('7/17 4-5 or 5-6 PM') == [ + (datetime.datetime(2018, 8, 4, 17, 0), datetime.datetime(2018, 8, 4, 18, 0)) + ]), + + # readme + ('Monday noon', datetime.datetime(2018, 8, 6, 12, 0)), + ('3-4p', (datetime.datetime(2018, 8, 4, 15, 0), datetime.datetime(2018, 8, 4, 16, 0))), # infer meridiem + ('Monday 3 pm or Tu noon', [datetime.datetime(2018, 8, 6, 15, 0), datetime.datetime(2018, 8, 7, 12, 0)]), + ('7/17 4 or 5 PM', [datetime.datetime(2018, 7, 17, 16, 0), datetime.datetime(2018, 7, 17, 17, 0)]), # distribute meridiem / date + ('7/17 4-5 or 5-6 PM', [ (datetime.datetime(2018, 7, 17, 16, 0), datetime.datetime(2018, 7, 17, 17, 0)), (datetime.datetime(2018, 7, 17, 17, 0), datetime.datetime(2018, 7, 17, 18, 0)) - ] + ]), + + ('7/17, 7/18, 7/19 at 2', [datetime.datetime(2018, 7, 17, 2, 0), datetime.datetime(2018, 7, 18, 2, 0), datetime.datetime(2018, 7, 19, 2, 0)]), # distribute dates + ('2 PM on 7/17 or 7/19', [datetime.datetime(2018, 7, 17, 14, 0), datetime.datetime(2018, 7, 19, 14, 0)]), # distribute time across dates + ('2022-12-27T09:15:01.002', datetime.datetime(2022, 12, 27, 9, 15, 1, 2)), # fixes gh#31 +]) +def test_default(now, test_input, expected): + """Default behavior should be to infer datetimes and times.""" + actual = timefhuman(test_input, config=tfhConfig(now=now)) + assert actual == expected, f"Expected: {expected}\nGot: {actual}" + +@pytest.mark.parametrize("test_input, expected", [ + # time only + ('5p', datetime.time(hour=17, minute=0)), + ("3 o'clock pm", datetime.time(hour=15, minute=0)), # fixes gh#12 + ('5p Eastern Time', datetime.time(hour=17, minute=0, tzinfo=pytz.timezone('US/Michigan'))), # fixes gh#6 + + # date only + ('July 2019', datetime.date(2019, 7, 1)), + ('Sunday 7/7/2019', datetime.date(2019, 7, 7)), # fixes gh#27 + + # date-only ranges + ('7/17-7/18', (datetime.date(2018, 7, 17), datetime.date(2018, 7, 18))), + ('July 17-18', (datetime.date(2018, 7, 17), datetime.date(2018, 7, 18))), # distribute month + + # time-only ranges + ('3p -4p', (datetime.time(15, 0), datetime.time(16, 0))), + ('3-4p', (datetime.time(15, 0), datetime.time(16, 0))), # distribute meridiem + + # durations + ('30 minutes', datetime.timedelta(minutes=30)), + ('30 mins', datetime.timedelta(minutes=30)), + ('2 hours', datetime.timedelta(hours=2)), + ('2 hours 30 minutes', datetime.timedelta(hours=2, minutes=30)), + ('2 hours and 30 minutes', datetime.timedelta(hours=2, minutes=30)), # gh#22 + ('2h30m', datetime.timedelta(hours=2, minutes=30)), + ('1 day and an hour', datetime.timedelta(days=1, hours=1)), + ('1.5 hours', datetime.timedelta(hours=1, minutes=30)), + ('1.5h', datetime.timedelta(hours=1, minutes=30)), + ('in five minutes', datetime.timedelta(minutes=5)), # gh#25 + ('awk', []), # should *not become 'a week' + ('a wk', datetime.timedelta(days=7)), + ('thirty two hours', datetime.timedelta(hours=32)), + + # duration ranges and lists + ('30-40 mins', (datetime.timedelta(minutes=30), datetime.timedelta(minutes=40))), + ('1 or 2 days', [datetime.timedelta(days=1), datetime.timedelta(days=2)]), -def test_multiple_choices(now): - print(timefhuman('7/17, 7/18, 7/19 at 2', now, raw=True)) - assert timefhuman('7/17, 7/18, 7/19 at 2', now) == [ - datetime.datetime(2018, 7, 17, 2, 0), - datetime.datetime(2018, 7, 18, 2, 0), - datetime.datetime(2018, 7, 19, 2, 0), - ] + # TODO: support "quarter to 3" + # TODO: support "one and a half hours" + + # TODO ('noon next week') <- should be a list of options + # TODO: support recurrences, like "5pm on thursdays" (see gh#33) + + # TODO: support natural language date ranges e.g., this week, next weekend, any weekday gh#18 + # TODO: support natural language time ranges e.g., afternoon, morning, evening, tonight, today night gh#30 + + # TODO: christmas? new years? eve? + # TODO: support 'this past July' (e.g., reduce to 'this') + # TODO: support 'last week of dec' + + # support for date and month modifiers + ('next Monday', datetime.date(2018, 8, 6)), + ('next next Monday', datetime.date(2018, 8, 13)), + ('last Monday', datetime.date(2018, 7, 30)), + ('next July', datetime.date(2019, 7, 1)), + ('last July', datetime.date(2017, 7, 1)), + ('last Wednesday of December', datetime.date(2018, 12, 26)), # gh#4 + + # support for vernacular datetimes + ('afternoon', datetime.time(hour=15, minute=0)), + ('morning', datetime.time(hour=6, minute=0)), + ('evening', datetime.time(hour=18, minute=0)), + ('night', datetime.time(hour=20, minute=0)), + ('today night', datetime.datetime(2018, 8, 4, 20, 0)), + ('tonight', datetime.datetime(2018, 8, 4, 20, 0)), # gh#30 +]) +def test_no_inference(now, test_input, expected): + """Return exactly the date or time, without inferring the other.""" + config = tfhConfig(infer_datetimes=False, now=now) + assert timefhuman(test_input, config=config) == expected -def test_edge_cases_range(now): - assert timefhuman('3-4 pm', now) == ( - datetime.datetime(2018, 8, 4, 15, 0), - datetime.datetime(2018, 8, 4, 16, 0),) - assert timefhuman('7/17-7/18', now) == ( - datetime.datetime(2018, 7, 17, 0, 0), - datetime.datetime(2018, 7, 18, 0, 0),) - assert timefhuman('7/17 3 pm- 7/19 2 pm') == ( - datetime.datetime(2018, 7, 17, 15, 0), - datetime.datetime(2018, 7, 19, 14, 0),) +@pytest.mark.parametrize("config, test_input, expected", [ + (tfhConfig(direction=Direction.next, infer_datetimes=False), 'mon', datetime.date(2018, 8, 6)), + (tfhConfig(direction=Direction.previous, infer_datetimes=False), 'mon', datetime.date(2018, 7, 30)), + + (tfhConfig(infer_datetimes=True), '5p', datetime.datetime(2018, 8, 4, 17, 0)), + (tfhConfig(infer_datetimes=False), '5p', datetime.time(hour=17, minute=0)), + (tfhConfig(infer_datetimes=True), '1p', datetime.datetime(2018, 8, 5, 13, 0)), # gh#12 +]) +def test_custom_config(now, config, test_input, expected): + config.now = now + assert timefhuman(test_input, config=config) == expected -def test_comma_delimited_combination(now): - assert timefhuman('tomorrow noon,Wed 3 p.m.,Fri 11 AM', now) == [ - datetime.datetime(2018, 8, 5, 12, 0), - datetime.datetime(2018, 8, 8, 15, 0), - datetime.datetime(2018, 8, 10, 11, 0) - ] +@pytest.mark.parametrize("test_input, expected", [ + ('September 30, 2019.', [ + ('September 30, 2019', (0, 18), datetime.datetime(2019, 9, 30, 0, 0)) + ]), # gh#26 + ('How does 5p mon sound? Or maybe 4p tu?', [ + ('5p mon', (9, 15), datetime.datetime(2018, 8, 6, 17, 0)), + ('4p tu', (32, 37), datetime.datetime(2018, 8, 7, 16, 0)) + ]), + ('There are 3 ways to do it', []), # '3' should remain ambiguous and then be ignored + ('salmon for 9 amnesty tickets', []), # no date or time (contains 'mon' and '9 am') +]) +def test_matched_text(now, test_input, expected): # gh#9 + assert timefhuman(test_input, tfhConfig(now=now, return_matched_text=True)) == expected \ No newline at end of file diff --git a/timefhuman/__init__.py b/timefhuman/__init__.py index 8af7426..6604995 100644 --- a/timefhuman/__init__.py +++ b/timefhuman/__init__.py @@ -1 +1 @@ -from .main import timefhuman +from .main import timefhuman, tfhConfig, Direction \ No newline at end of file diff --git a/timefhuman/categorize.py b/timefhuman/categorize.py deleted file mode 100644 index a6da5d5..0000000 --- a/timefhuman/categorize.py +++ /dev/null @@ -1,384 +0,0 @@ -from .constants import MONTHS -from .constants import DAYS_OF_WEEK -from .data import DayToken -from .data import TimeToken -from .data import DayRange -from .data import TimeRange -from .data import AmbiguousToken -from .data import Token - -import datetime - - -def categorize(tokens, now): - """ - >>> now = datetime.datetime(2018, 8, 6, 6, 0) - >>> categorize(['upcoming', 'Monday', 'noon'], now) - [8/6/2018, 12 pm] - >>> categorize(['7/17', '3:30', 'p.m.', '-', '4', 'p.m.'], now) - [7/17/2018, 3:30 pm, '-', 4 pm] - >>> categorize(['7/17', 'or', '7/18', '3', 'p.m.'], now) - [7/17/2018, 'or', 7/18/2018, 3 pm] - >>> categorize(['today', 'or', 'tomorrow', 'noon'], now) - [8/6/2018, 'or', 8/7/2018, 12 pm] - >>> categorize(['7/17', '4', 'or', '5', 'PM'], now) - [7/17/2018, 4:00, 'or', 5 pm] - >>> categorize(['7/17', '3', 'pm', '-', '7/19', '2', 'pm'], now) - [7/17/2018, 3 pm, '-', 7/19/2018, 2 pm] - """ - tokens = list(tokens) - tokens = convert_day_of_week(tokens, now) - tokens = convert_relative_days(tokens, now) - tokens = convert_time_of_day(tokens) - tokens = maybe_substitute_hour_minute(tokens) - tokens = maybe_substitute_using_date(tokens, now) - tokens = maybe_substitute_using_month(tokens, now) - tokens = substitute_hour_minute_in_remaining(tokens, now) - return tokens - - -# TODO: "monday next week" -def convert_day_of_week(tokens, now=datetime.datetime.now()): - """Convert day-of-week vernacular into date-like string. - - WARNING: assumes that 'upcoming', and (no specification) implies - the same day. e.g., 'upcoming Monday', and 'Monday' are both - the same day. However, it assumes that 'next Monday' is the one *after. - Also assumes that 'last', 'past', and 'previous' are the same. - - >>> now = datetime.datetime(year=2018, month=8, day=4) - >>> convert_day_of_week(['Monday', 'at', '3'], now) - [8/6/2018, 'at', '3'] - >>> convert_day_of_week(['next', 'Monday', 'at', '3'], now) - [8/13/2018, 'at', '3'] - >>> convert_day_of_week(['past', 'Monday', 'at', '3'], now) - [7/30/2018, 'at', '3'] - >>> convert_day_of_week(['sat', 'at', '5'], now) - [8/4/2018, 'at', '5'] - >>> convert_day_of_week(['suNday', 'at', '5'], now) - [8/5/2018, 'at', '5'] - """ - tokens = tokens.copy() - for i in range(7): - day = now + datetime.timedelta(i) - day_of_week = DAYS_OF_WEEK[day.weekday()] - - for string in (day_of_week, day_of_week[:2], day_of_week[:3], day_of_week[:4]): - for index, token in enumerate(tokens): - if isinstance(token, str) and string.lower() == token.lower(): - new_index, tokens, weeks = extract_weeks_offset(tokens, end=index) - day = now + datetime.timedelta(weeks*7 + i) - tokens[new_index] = DayToken(day.month, day.day, day.year) - break - return tokens - - -def convert_relative_days(tokens, now=datetime.datetime.now()): - """Convert relative days (e.g., "today", "tomorrow") into date-like string. - - >>> now = datetime.datetime(2018, 8, 6) - >>> convert_relative_days(['today', 'or', 'tomorrow', TimeToken(12, 'pm')], now) - [8/6/2018, 'or', 8/7/2018, 12 pm] - """ - for keywords, replacement in ( - (("today",), DayToken.from_datetime(now)), - (("tomorrow", "tmw"), DayToken.from_datetime(now + datetime.timedelta(1))), - (("yesterday",), DayToken.from_datetime(now - datetime.timedelta(1)))): - for keyword in keywords: - tokens = [replacement if token == keyword else token \ - for token in tokens] - return tokens - - - -# TODO: convert to new token-based system -def extract_weeks_offset(tokens, end=None, key_tokens=( - 'next', 'previous', 'last', 'upcoming', 'past', 'prev')): - """Extract the number of week offsets needed. - - >>> extract_weeks_offset(['next', 'next', 'week']) - (0, ['week'], 2) - >>> extract_weeks_offset(['upcoming', 'Monday']) - (0, ['Monday'], 0) - >>> extract_weeks_offset(['last', 'Monday']) - (0, ['Monday'], -1) - >>> extract_weeks_offset(['past', 'Tuesday']) - (0, ['Tuesday'], -1) - >>> extract_weeks_offset(['past', 'Wed', 'next', 'week'], end=1) - (0, ['Wed', 'next', 'week'], -1) - """ - offset = 0 - end = len(tokens) - 1 if end is None else end - start = end - 1 - if start < 0 or start >= len(tokens): - return 0, tokens, 0 - - while len(tokens) > start >= 0 and \ - tokens[start] in key_tokens: - candidate = tokens[start] - if candidate == 'upcoming': - return start, tokens[:end-1] + tokens[end:], 0 - if candidate == 'next': - offset += 1 - elif candidate in ('previous', 'prev', 'last', 'past'): - offset -= 1 - start -= 1 - return start + 1, tokens[:start + 1] + tokens[end:], offset - - -def convert_time_of_day(tokens): - """Convert time-of-day vernacular into time-like string. - - >>> convert_time_of_day(['Monday', 'noon', 'huehue']) - ['Monday', 12 pm, 'huehue'] - >>> convert_time_of_day(['Monday', 'afternoon']) - ['Monday', 3 pm] - >>> convert_time_of_day(['Tu', 'evening']) - ['Tu', 6 pm] - >>> convert_time_of_day(['Wed', 'morning']) - ['Wed', 9 am] - >>> convert_time_of_day(['Thu', 'midnight']) - ['Thu', 12 am] - """ - temp_tokens = [token.lower() if isinstance(token, str) else token for token in tokens] - for keyword, time_tokens in ( - ('morning', [TimeToken(9, 'am')]), - ('noon', [TimeToken(12, 'pm')]), - ('afternoon', [TimeToken(3, 'pm')]), - ('evening', [TimeToken(6, 'pm')]), - ('night', [TimeToken(9, 'pm')]), - ('midnight', [TimeToken(12, 'am')])): - if keyword in temp_tokens: - index = temp_tokens.index(keyword) - tokens = tokens[:index] + time_tokens + tokens[index+1:] - return tokens - - -def maybe_substitute_using_month(tokens, now=datetime.datetime.now()): - """ - - >>> now = datetime.datetime(year=2018, month=7, day=7) - >>> maybe_substitute_using_month(['July', '17', ',', '2018', 'at']) - [7/17/2018, 'at'] - >>> maybe_substitute_using_month(['Jul', '17', 'at'], now=now) - [7/17/2018, 'at'] - >>> maybe_substitute_using_month(['July', 'at'], now=now) - [7/7/2018, 'at'] - >>> maybe_substitute_using_month(['August', '17', ','], now=now) - [8/17/2018, ','] - >>> maybe_substitute_using_month(['Aug', 'at'], now=now) - [8/1/2018, 'at'] - >>> maybe_substitute_using_month(['gibberish'], now=now) - ['gibberish'] - >>> time_range = TimeRange(TimeToken(3, 'pm'), TimeToken(5, 'pm')) - >>> day_range = DayRange(DayToken(None, 3, None), DayToken(None, 5, None)) - >>> day = DayToken(3, 5, 2018) - >>> ambiguous_token = AmbiguousToken(time_range, day, day_range) - >>> maybe_substitute_using_month(['May', ambiguous_token]) - [5/3/2018 - 5/5/2018] - """ - temp_tokens = [token.lower() if isinstance(token, str) else token for token in tokens] - for mo, month in enumerate(MONTHS, start=1): - - index = None - month = month.lower() - if month in temp_tokens: - index = temp_tokens.index(month) - if month[:3] in temp_tokens: - index = temp_tokens.index(month[:3]) - - if index is None: - continue - - next_candidate = tokens[index+1] - day = 1 if now.month != mo else now.day - if isinstance(next_candidate, AmbiguousToken): - if next_candidate.has_day_range_token(): - day_range = next_candidate.get_day_range_token() - day_range.apply_month(mo) - day_range.apply_year(now.year) # TODO: fails on July 3-5, 2018 - return tokens[:index] + [day_range] + tokens[index+2:] - if not next_candidate.isnumeric(): - day = DayToken(month=mo, day=day, year=now.year) - return tokens[:index] + [day] + tokens[index+1:] - - # allow formats July 17, 2018. Do not consume comma if July 17, July 18 ... - next_candidate = int(next_candidate) - next_next_candidate = tokens[index+2] if len(tokens) > index+2 else '' - if next_next_candidate == ',': - next_next_candidate = tokens[index+3] if len(tokens) > index+3 else '' - if next_next_candidate.isnumeric(): - tokens = tokens[:index+1] + tokens[index+2:] - - if next_candidate > 31: - day = 1 if now.month != mo else now.day - day = DayToken(month=mo, day=day, year=next_candidate) - return tokens[:index] + [day] + tokens[index+2:] - elif not next_next_candidate.isnumeric(): - day = DayToken(month=mo, day=next_candidate, year=now.year) - return tokens[:index] + [day] + tokens[index+2:] - - next_next_candidate = int(next_next_candidate) - day = DayToken(month=mo, day=next_candidate, year=next_next_candidate) - return tokens[:index] + [day] + tokens[index+3:] - return tokens - - -def maybe_substitute_using_date(tokens, now=datetime.datetime.now()): - """Attempt to extract dates. - - Look for dates in the form of the following: - - (month)/(day) - (month).(day) - (month)-(day) - (month)/(day)/(year) - (month).(day).(year) - (month)-(day)-(year) - - >>> now = datetime.datetime(2018, 8, 18) - >>> maybe_substitute_using_date(['7/17/18']) - [7/17/2018] - >>> maybe_substitute_using_date(['7-17-18']) - [7/17/2018] - >>> maybe_substitute_using_date(['3', 'on', '7.17.18']) - ['3', 'on', 7/17/2018] - >>> maybe_substitute_using_date(['7-25', '3-4', 'pm'], now=now) - [7/25/2018, 3/4/2018 OR 3:00 - 4:00, 'pm'] - >>> maybe_substitute_using_date(['7/4', '-', '7/6'], now=now) - [7/4/2018, '-', 7/6/2018] - """ - i = 0 - while i < len(tokens): - token = tokens[i] - if isinstance(token, Token): - i += 1 - continue - for punctuation in ('/', '.', '-'): - if punctuation == token: # dash joins other tokens, skip parsing - continue - if punctuation not in token: - continue - - parts = tuple(map(int, token.split(punctuation))) - if len(parts) == 2: - day = DayToken(month=parts[0], day=parts[1], year=now.year) - if punctuation == '-' and parts[1] <= 24: - day = AmbiguousToken( - day, extract_hour_minute(token)) - tokens = tokens[:i] + [day] + tokens[i+1:] - continue - - month, day, year = parts - if year < 1000: - year = year + 2000 if year < 50 else year + 1000 - day = DayToken(month=month, day=day, year=year) - tokens = tokens[:i] + [day] + tokens[i+1:] - i += 1 - return tokens - - -def extract_hour_minute(string, time_of_day=None): - """ - - >>> extract_hour_minute('3:00') - 3:00 - >>> extract_hour_minute('3:00', 'pm') - 3 pm - >>> time = extract_hour_minute('3') - >>> time - 3:00 - >>> time.time_of_day - >>> extract_hour_minute('3:30-4', 'pm') - 3:30-4 pm - >>> time_range = TimeRange(TimeToken(3, 'pm'), TimeToken(5, 'pm')) - >>> day_range = DayRange(DayToken(None, 3, None), DayToken(None, 5, None)) - >>> day = DayToken(3, 5, 2018) - >>> ambiguous_token = AmbiguousToken(time_range, day, day_range) - >>> extract_hour_minute(ambiguous_token) - 3-5 pm - >>> extract_hour_minute(AmbiguousToken(day)) - """ - if isinstance(string, AmbiguousToken): - if string.has_time_range_token(): - return string.get_time_range_token() - return None - - if '-' in string: - times = string.split('-') - start = extract_hour_minute(times[0], time_of_day) - end = extract_hour_minute(times[1], time_of_day) - return TimeRange(start, end) - - parts = string.split(':') - hour = int(parts[0]) - minute = int(parts[1]) if len(parts) >= 2 else 0 - return TimeToken(relative_hour=hour, minute=minute, time_of_day=time_of_day) - - -def maybe_substitute_hour_minute(tokens): - """Attempt to extract hour and minute. - - If am and pm are found, grab the hour and minute before it. If colon, use - that as time. - - >>> maybe_substitute_hour_minute(['7/17/18', '3', 'PM']) - ['7/17/18', 3 pm] - >>> maybe_substitute_hour_minute(['7/17/18', '3:00', 'p.m.']) - ['7/17/18', 3 pm] - >>> maybe_substitute_hour_minute(['July', '17', '2018', 'at', '3', 'p.m.']) - ['July', '17', '2018', 'at', 3 pm] - >>> maybe_substitute_hour_minute(['July', '17', '2018', '3', 'p.m.']) - ['July', '17', '2018', 3 pm] - >>> maybe_substitute_hour_minute(['3', 'PM', 'on', 'July', '17']) - [3 pm, 'on', 'July', '17'] - >>> maybe_substitute_hour_minute(['July', 'at', '3']) - ['July', 'at', '3'] - >>> maybe_substitute_hour_minute(['7/17/18', '15:00']) - ['7/17/18', 3 pm] - >>> maybe_substitute_hour_minute(['7/17/18', TimeToken(3, 'pm')]) - ['7/17/18', 3 pm] - >>> maybe_substitute_hour_minute(['3', 'p.m.', '-', '4', 'p.m.']) - [3 pm, '-', 4 pm] - """ - remove_dots = lambda token: token.replace('.', '') - temp_tokens = clean_tokens(tokens, remove_dots) - - for time_of_day in ('am', 'pm'): - while time_of_day in temp_tokens: - index = temp_tokens.index(time_of_day) - time_token = extract_hour_minute(temp_tokens[index-1], time_of_day) - tokens = tokens[:index-1] + [time_token] + tokens[index+1:] - temp_tokens = clean_tokens(tokens, remove_dots) - - tokens = [extract_hour_minute(token, None) - if isinstance(token, str) and ':' in token else token - for token in tokens] - - return tokens - - -def clean_tokens(tokens, callback=lambda token: token): - """ - >>> clean_tokens(['Hello', '3', 'P.M.']) - ['hello', '3', 'p.m.'] - >>> clean_tokens(['Hello', '3', 'P.M.'], lambda token: token.replace('.', '')) - ['hello', '3', 'pm'] - """ - return [callback(token.lower()) if isinstance(token, str) - else token for token in tokens] - - -def substitute_hour_minute_in_remaining(tokens, now=datetime.datetime.now()): - """Sketch collector for leftovers integers. - - >>> substitute_hour_minute_in_remaining(['gibberish']) - ['gibberish'] - """ - for i, token in enumerate(tokens): - if isinstance(token, Token): - continue - if token.isnumeric(): - time_token = extract_hour_minute(token) - return tokens[:i] + [time_token] + tokens[i+1:] - return tokens diff --git a/timefhuman/constants.py b/timefhuman/constants.py deleted file mode 100644 index fedf221..0000000 --- a/timefhuman/constants.py +++ /dev/null @@ -1,24 +0,0 @@ -MONTHS = ( - 'January', - 'February', - 'March', - 'April', - 'May', - 'June', - 'July', - 'August', - 'September', - 'October', - 'November', - 'December' -) - -DAYS_OF_WEEK = ( - 'Monday', - 'Tuesday', - 'Wednesday', - 'Thursday', - 'Friday', - 'Saturday', - 'Sunday' -) diff --git a/timefhuman/data.py b/timefhuman/data.py deleted file mode 100644 index 9a5dd75..0000000 --- a/timefhuman/data.py +++ /dev/null @@ -1,435 +0,0 @@ -import datetime - - -class Token: - - def share(self, property, other, setter=setattr): - """ - >>> t1 = Token() - >>> t2 = Token() - >>> t1.is_special = True - >>> t1.share('is_special', t2) - >>> t2.is_special - True - """ - mine = getattr(self, property, None) - others = getattr(other, property, None) - if mine is None and others is not None: - setter(self, property, others) - elif others is None and mine is not None: - setter(other, property, mine) - - -class ListToken(Token): - - def __init__(self, *tokens): - self.tokens = list(tokens) - - def append(self, other): - self.tokens.append(other) - - def extend(self, others): - self.tokens.extend(others) - - def datetime(self, now): - return [token.datetime(now) for token in self.tokens] - - def __getitem__(self, i): - return self.tokens[i] - - def __repr__(self): - tokens = list(map(repr, self.tokens)) - return '[{}]'.format(', '.join(tokens)) - - -class DayTimeToken(Token): - - def __init__(self, year, month, day, relative_hour, minute=0, time_of_day=None): - self.day = DayToken(month, day, year) - self.time = TimeToken(relative_hour, time_of_day, minute) - - def combine(self, other): - """ - - >>> dt = DayTimeToken(2018, 8, 18, 3, 0, 'pm') - >>> day = DayToken(8, 20, 2018) - >>> dt.combine(day) - 8/20/2018 3 pm - >>> time = TimeToken(5, 'pm') - >>> dt.combine(time) - 8/18/2018 5 pm - """ - assert isinstance(other, (DayToken, TimeToken)) - if isinstance(other, DayToken): - return other.combine(self.time) - elif isinstance(other, TimeToken): - self.time.apply(other) - return self.day.combine(other) - - def datetime(self, now): - # TODO: handle Nones - return datetime.datetime( - self.day.year, self.day.month, self.day.day, self.time.hour, self.time.minute) - - @staticmethod - def from_day_time(day, time): - return DayTimeToken( - day.year, day.month, day.day, time.relative_hour, time.minute, - time.time_of_day) - - def __repr__(self): - return '{} {}'.format(repr(self.day), repr(self.time)) - - -class DayTimeRange(Token): - """ - >>> dt1 = DayTimeToken(2018, 8, 1, 10) - >>> dt2 = DayTimeToken(2018, 8, 1, 11) - >>> dt3 = DayTimeToken(2018, 8, 1, 1, time_of_day='pm') - >>> dt4 = DayTimeToken(2018, 8, 3, 11) - >>> DayTimeRange(dt1, dt2) - 8/1/2018 10:00 - 11:00 - >>> DayTimeRange(dt1, dt3) - 8/1/2018 10:00 - 1 pm - >>> DayTimeRange(dt1, dt4) - 8/1/2018 10:00 - 8/3/2018 11:00 - """ - - def __init__(self, start, end): - self.start = start - self.end = end - - def datetime(self, now): - return (self.start.datetime(now), self.end.datetime(now)) - - def __repr__(self): - if self.start.day == self.end.day: - time_range = TimeRange(self.start.time, self.end.time) - return '{} {}'.format(repr(self.start.day), repr(time_range)) - return '{} - {}'.format(repr(self.start), repr(self.end)) - - -class DayTimeList(ListToken): - """ - >>> now = datetime.datetime(2018, 7, 5) - >>> dt1 = DayTimeToken(2018, 8, 1, 10) - >>> dt2 = DayTimeToken(2018, 8, 1, 11) - >>> dts = DayTimeList(dt1, dt2) - >>> dts - [8/1/2018 10:00, 8/1/2018 11:00] - >>> dts.datetime(now) - [datetime.datetime(2018, 8, 1, 10, 0), datetime.datetime(2018, 8, 1, 11, 0)] - """ - pass - - -class DayToken(Token): - - def __init__(self, month, day, year): # TODO: default Nones? - self.month = month - self.day = day - self.year = year - - assert month is None or 1 <= month <= 12 - assert day is None or 1 <= day <= 31 - - def combine(self, time): - """ - >>> day = DayToken(8, 5, 2018) - >>> time = TimeToken(3, 'pm') - >>> time_range = TimeRange(TimeToken(3, 'pm'), TimeToken(5, 'pm')) - >>> day.combine(time) - 8/5/2018 3 pm - >>> day.combine(time_range) - 8/5/2018 3-5 pm - """ - assert isinstance(time, (TimeRange, TimeToken, DayTimeToken)) - if isinstance(time, TimeToken): - return DayTimeToken.from_day_time(self, time) - if isinstance(time, DayTimeToken): - return self.combine(time.time) - return DayTimeRange( - DayTimeToken.from_day_time(self, time.start), - DayTimeToken.from_day_time(self, time.end)) - - def apply(self, other): - """ - >>> d1 = DayToken(3, 2, None) - >>> d2 = DayToken(4, 1, 2018) - >>> d1.apply(d2) - >>> d2.year - 2018 - >>> d3 = DayToken(None, 3, None) - >>> d2.apply(d3) - >>> d3 - 4/3/2018 - """ - assert isinstance(other, DayToken) - for attr in ('year', 'month'): - self.share(attr, other) - - def datetime(self, now): - return datetime.datetime(self.year, self.month, self.day) - - @staticmethod - def from_datetime(datetime): - return DayToken(datetime.month, datetime.day, datetime.year) - - def __eq__(self, other): - """ - >>> DayToken(5, 7, 2018) == DayToken(5, 7, 2018) - True - >>> DayToken(7, 4, 2018) == DayToken(7, 6, 2018) - False - """ - if not isinstance(other, DayToken): - return False - return self.month == other.month and self.day == other.day and \ - self.year == other.year - - def __repr__(self): # TODO: handles Nones - return '{}/{}/{}'.format( - self.month, self.day, self.year) - - -class DayRange(Token): - - def __init__(self, start, end): - self.start = start - self.end = end - - def apply_month(self, month): - self.start.month = month - self.end.month = month - - def apply_year(self, year): - self.start.year = year - self.end.year = year - - def datetime(self, now): - return (self.start.datetime(now), self.end.datetime(now)) - - def combine(self, time): - assert isinstance(time, (TimeRange, TimeToken)) - if isinstance(time, TimeToken): - return DayTimeRange( - DayTimeToken.from_day_time(self.start, time), - DayTimeToken.from_day_time(self.end, time)) - raise NotImplementedError() # return list of two ranges - - def __repr__(self): - return '{} - {}'.format(repr(self.start), repr(self.end)) - - -class DayList(ListToken): - """ - >>> now = datetime.datetime(2018, 7, 5) - >>> dt1 = DayToken(8, 1, 2018) - >>> dt2 = DayToken(8, 2, 2018) - >>> dts = DayList(dt1, dt2) - >>> dts - [8/1/2018, 8/2/2018] - >>> dts.datetime(now) - [datetime.datetime(2018, 8, 1, 0, 0), datetime.datetime(2018, 8, 2, 0, 0)] - >>> dts.combine(TimeToken(15)) - [8/1/2018 3 pm, 8/2/2018 3 pm] - >>> dts2 = DayList(dt1) - >>> dts3 = DayList() - >>> dts.extend(dts3) == dts - True - >>> dts.extend(dts2) - [8/1/2018, 8/2/2018, 8/1/2018] - >>> dts.combine(AmbiguousToken()) == dts - True - """ - - def combine(self, other): - if isinstance(other, (TimeRange, TimeToken, DayTimeToken)): - return DayTimeList(*[token.combine(other) for token in self.tokens]) - return self - - def extend(self, other): - assert isinstance(other, DayList) - if other.tokens: - tokens = self.tokens + other.tokens - return DayList(*tokens) - return self - - -class TimeToken(Token): - """ - >>> TimeToken(3, 'pm') - 3 pm - >>> TimeToken(3, None) - 3:00 - >>> TimeToken(3) - 3:00 - >>> TimeToken(12, 'pm') - 12 pm - >>> TimeToken(12, 'am') - 12 am - >>> TimeToken(12) - 12 pm - """ - - def __init__(self, relative_hour, time_of_day=None, minute=0): - self.relative_hour = relative_hour - self.minute = minute - self.time_of_day = time_of_day - - if relative_hour > 12: - assert time_of_day != 'pm' - self.relative_hour = relative_hour - 12 - self.hour = relative_hour - self.time_of_day = 'pm' - elif time_of_day == 'pm' and relative_hour == 12: - self.hour = 12 - elif time_of_day == 'pm' and relative_hour != 12: - self.hour = self.relative_hour + 12 - elif time_of_day == 'am' and relative_hour == 12: - self.hour = 0 - elif relative_hour == 12: - self.hour = 12 - self.time_of_day = 'pm' - else: - self.hour = self.relative_hour - - assert 0 <= self.hour < 24 - assert 0 <= self.minute < 60 - - def datetime(self, now): - return datetime.datetime(now.year, now.month, now.day, self.hour, self.minute) - - def string(self, with_time_of_day=True): - if self.time_of_day is None: - return '{}:{:02d}'.format(self.hour, self.minute) - if self.minute == 0: - if with_time_of_day: - return '{} {}'.format(self.relative_hour, self.time_of_day) - else: - return str(self.relative_hour) - if not with_time_of_day: - return '{}:{:02d}'.format(self.relative_hour, self.minute) - return '{}:{:02d} {}'.format( - self.relative_hour, self.minute, self.time_of_day) - - @staticmethod - def update_time_of_day(self, _, time_of_day): - """ - >>> time = TimeToken(3) - >>> TimeToken.update_time_of_day(time, None, 'pm') - >>> time - 3 pm - >>> time.hour - 15 - >>> TimeToken.update_time_of_day(time, None, 'am') - >>> time - 3 am - >>> time.hour - 3 - """ - if time_of_day != self.time_of_day: - if time_of_day == 'pm': - self.hour += 12 - else: - self.hour -= 12 - self.time_of_day = time_of_day - - def apply(self, other): - assert isinstance(other, TimeToken) - self.share('time_of_day', other, setter=TimeToken.update_time_of_day) - - def __repr__(self): - return self.string() - - -class TimeRange(Token): - - def __init__(self, start, end): - self.start = start - self.end = end - - def datetime(self, now): - return (self.start.datetime(now), self.end.datetime(now)) - - def __repr__(self): - if self.start.time_of_day == self.end.time_of_day != None: - return '{}-{}'.format(self.start.string(False), self.end.string()) - return '{} - {}'.format(repr(self.start), repr(self.end)) - - -class TimeList(ListToken): - """ - >>> t1 = TimeToken(15) - >>> t2 = TimeToken(17) - >>> t3 = TimeToken(3, None) - >>> ts = TimeList(t1, t2) - >>> dt = DayToken(8, 1, 2018) - >>> ts.combine(dt) - [8/1/2018 3 pm, 8/1/2018 5 pm] - >>> ts2 = TimeList(t3) - >>> ts3 = TimeList() - >>> ts.extend(ts3) == ts - True - >>> t3.time_of_day - >>> ts.extend(ts2) - [3 pm, 5 pm, 3 pm] - >>> ts.combine(AmbiguousToken()) == ts - True - """ - - def combine(self, other): - if isinstance(other, (DayRange, DayToken)): - return DayTimeList(*[other.combine(token) for token in self.tokens]) - return self - - def extend(self, other): - assert isinstance(other, TimeList) - if len(other.tokens) > 0: - for token in self.tokens: - token.apply(other.tokens[0]) - tokens = self.tokens + other.tokens - return TimeList(*tokens) - return self - - -class AmbiguousToken(Token): - """ - >>> now = datetime.datetime(2018, 1, 1) - >>> amb = AmbiguousToken(TimeToken(15)) - >>> amb.datetime(now) - datetime.datetime(2018, 1, 1, 15, 0) - """ - - def __init__(self, *tokens): - self.tokens = tokens - - def has_time_range_token(self): - return any([isinstance(token, TimeRange) for token in self.tokens]) - - def get_time_range_token(self): - for token in self.tokens: - if isinstance(token, TimeRange): - return token - - def has_day_range_token(self): - return any([isinstance(token, DayRange) for token in self.tokens]) - - def get_day_range_token(self): - for token in self.tokens: - if isinstance(token, DayRange): - return token - - def has_day_token(self): - return any([isinstance(token, DayToken) for token in self.tokens]) - - def get_day_token(self): - for token in self.tokens: - if isinstance(token, DayToken): - return token - - def datetime(self, now): - return self.tokens[0].datetime(now=now) - - def __repr__(self): - return ' OR '.join(map(repr, self.tokens)) diff --git a/timefhuman/grammar.lark b/timefhuman/grammar.lark new file mode 100644 index 0000000..26c15db --- /dev/null +++ b/timefhuman/grammar.lark @@ -0,0 +1,134 @@ +%import common.WS +%import common.INT +%ignore WS + +// ---------------------- +// TERMINAL DEFINITIONS +// ---------------------- + +// Month names as a regex token, case-insensitive +MONTHNAME: /(?i)(?>> now = datetime.datetime(year=2018, month=8, day=4) - >>> timefhuman('upcoming Monday noon', now=now) # natural language - datetime.datetime(2018, 8, 6, 12, 0) - >>> timefhuman('Monday 3 pm, Tu noon', now=now) # multiple datetimes - [datetime.datetime(2018, 8, 6, 15, 0), datetime.datetime(2018, 8, 7, 12, 0)] - >>> timefhuman('7/17 3:30-4 PM', now=now) # time range - (datetime.datetime(2018, 7, 17, 15, 30), datetime.datetime(2018, 7, 17, 16, 0)) - >>> timefhuman('7/17 3:30 p.m. - 4 p.m.', now=now) - (datetime.datetime(2018, 7, 17, 15, 30), datetime.datetime(2018, 7, 17, 16, 0)) - >>> timefhuman('7/17 or 7/18 3 p.m.', now=now) # date range - [datetime.datetime(2018, 7, 17, 15, 0), datetime.datetime(2018, 7, 18, 15, 0)] - >>> timefhuman('today or tomorrow noon', now=now) # choices w. natural language - [datetime.datetime(2018, 8, 4, 12, 0), datetime.datetime(2018, 8, 5, 12, 0)] - >>> timefhuman('2 PM on 7/17 or 7/19') # time applies to both dates - [datetime.datetime(2018, 7, 17, 14, 0), datetime.datetime(2018, 7, 19, 14, 0)] - >>> timefhuman('2 PM on 7/17 or 7/19', raw=True) - [[7/17/2018 2 pm, 7/19/2018 2 pm]] - """ - if now is None: - now = datetime.datetime.now() +DIRECTORY = Path(__file__).parent +parser = None +timezone_mapping = None + - tokens = timefhuman_tokens(string, now) +def get_parser(): + global parser, timezone_mapping + if parser is None: + timezone_mapping = generate_timezone_mapping() + with open(DIRECTORY / 'grammar.lark', 'r') as file: + grammar = file.read() + grammar = grammar.replace('(TIMEZONE_MAPPING)', '|'.join(timezone_mapping.keys())) + parser = Lark(grammar, start="start", propagate_positions=True) + return parser + + +def timefhuman(string, config: tfhConfig = tfhConfig(), raw=None): + parser = get_parser() + tree = parser.parse(string) if raw: - return tokens - datetimes = [tok.datetime(now) for tok in tokens if isinstance(tok, Token)] + return tree - if len(datetimes) == 1: # TODO: bad idea? + transformer = tfhTransformer(config=config) + renderers = transformer.transform(tree) + renderers = list(filter(lambda r: not isinstance(r, (tfhUnknown, tfhAmbiguous)), renderers)) + datetimes = [renderer.to_object(config) for renderer in renderers] + + if config.return_matched_text: + positions = [(renderer.matched_text_pos[0], renderer.matched_text_pos[1]) for renderer in renderers] + matched_texts = [string[start: end] for start, end in positions] + return list(zip(matched_texts, positions, datetimes)) + + if config.return_single_object and len(datetimes) == 1: return datetimes[0] return datetimes - # TODO: What if user specifies vernacular AND actual date time. Let - # specified date time take precedence. +def infer_from(source: tfhDatelike, target: tfhDatelike): + if isinstance(source, tfhAmbiguous): + # NOTE: Ambiguous tokens have no information to offer + return target + if isinstance(target, tfhAmbiguous) and isinstance(source, tfhDatelike): + if source.time: + target = tfhDatetime(time=tfhTime(hour=target.value, meridiem=source.meridiem)) + elif source.year: + target = tfhDatetime(date=tfhDate(year=target.value)) + elif source.day: + target = tfhDatetime(date=tfhDate(day=target.value)) + elif source.month: + target = tfhDatetime(date=tfhDate(month=target.value)) + else: + raise NotImplementedError(f"Not enough context to infer what {target} is") + if isinstance(source, tfhDatelike) and isinstance(target, tfhDatelike): + if source.date and not target.date: + target.date = source.date + if source.time and not target.time: + target.time = source.time + if source.month and not target.month: + target.month = source.month + if source.year and not target.year: + target.year = source.year + if source.meridiem and not target.meridiem: + target.meridiem = source.meridiem + if source.tz and not target.tz: + target.tz = source.tz + if isinstance(source, tfhTimedelta) and isinstance(target, tfhAmbiguous): + target = tfhTimedelta.from_object(timedelta(**{source.unit: target.value}), unit=source.unit) + return target + + +def infer(datetimes): + """ + Infer any missing components of datetimes from the first or last datetime. + """ + for i, dt in enumerate(datetimes[1:], start=1): + datetimes[i] = infer_from(datetimes[0], dt) + + for i, dt in enumerate(datetimes[:-1]): + datetimes[i] = infer_from(datetimes[-1], dt) + + return datetimes + + +class tfhTransformer(Transformer): + def __init__(self, config: tfhConfig = tfhConfig()): + self.config = config + + def start(self, children): + """Strip the 'start' rule and return child(ren) directly.""" + return children + + @v_args(tree=True) + def expression(self, tree): + """The top-level expression could be a range, list, or single.""" + expr = tree.children[0] + if self.config.return_matched_text: + assert isinstance(expr, tfhMatchable), f"Expected tfhDatelike or tfhAmbiguous, got {type(expr)}" + expr.matched_text_pos = (tree.meta.start_pos, tree.meta.end_pos) + return expr + + def unknown(self, children): + return tfhUnknown(children[0].value) + + def single(self, children): + """A single object can be a datetime, a date, or a time.""" + if len(children) == 1 and hasattr(children[0], 'data') and children[0].data.value == 'ambiguous': + return tfhAmbiguous(int(children[0].children[0].value)) + return children[0] + + ############### + # Collections # + ############### + + def range(self, children): + """Handles expressions like '7/17 3 PM - 7/18 4 PM'.""" + assert len(children) == 2 + return tfhRange(infer(children)) + + def list(self, children): + """Handles comma/or lists like '7/17, 7/18, 7/19' or '7/17 or 7/18'.""" + return tfhList(infer(children)) + + ############ + # Duration # + ############ + + def duration(self, children): + # TODO: just grabbing the first may cause problems later. how to do this more generically? + return tfhTimedelta.from_object(sum([child.to_object(self.config) for child in children], timedelta()), unit=children[0].unit) + + def duration_part(self, children): + mapping = { + 'an': 1, + 'a': 1, + 'one': 1, + 'two': 2, + 'three': 3, + 'four': 4, + 'five': 5, + 'six': 6, + 'seven': 7, + 'eight': 8, + 'nine': 9, + 'ten': 10, + 'eleven': 11, + 'twelve': 12, + 'thirteen': 13, + 'fourteen': 14, + 'fifteen': 15, + 'sixteen': 16, + 'seventeen': 17, + 'eighteen': 18, + 'nineteen': 19, + 'twenty': 20, + 'thirty': 30, + 'forty': 40, + 'fifty': 50, + 'sixty': 60, + 'seventy': 70, + 'eighty': 80, + 'ninety': 90, + } + # TODO: write my own multidict? + data = {child.data.value: [_child.value for _child in child.children] for child in children} + duration_number = float(data['duration_number'][0]) if 'duration_number' in data else sum([mapping[value] for value in data.get('duration_numbername', [])]) + duration_unit = data.get('duration_unit', data.get('duration_unit_letter', None))[0] + for group in ( + ('minutes', 'minute', 'mins', 'min', 'm'), + ('hours', 'hour', 'hrs', 'hr', 'h'), + ('days', 'day', 'd'), + ('weeks', 'week', 'wks', 'wk'), + ('months', 'month', 'mos'), + ('years', 'year', 'yrs', 'yr'), + ): + if duration_unit in group: + return tfhTimedelta.from_object(timedelta(**{group[0]: duration_number}), unit=group[0]) + raise NotImplementedError(f"Unknown duration unit: {data['duration_unit']}") + + ############ + # Datetime # + ############ + + def datetime(self, children): + data = nodes_to_dict(children) + if 'datetime' in data: + return data['datetime'] + return tfhDatetime(date=data.get('date'), time=data.get('time')) + + def date(self, children): + data = nodes_to_dict(children) + + if 'date' in data: + # TODO: simply return data? + return {'date': data['date']} + + # If there's a weekday and no other date info, use the weekday + if 'weekday' in data and all(key not in data for key in ('day', 'month', 'year')): + return {'date': data['weekday']} + + delta = None + if 'offset' in data: + _data = nodes_to_multidict(children) + delta = relativedelta(years=sum(_data['offset'])) # sum offsets, such as 'next next' + elif 'position' in data: + assert 'month' in data and 'weekday' in data + weekday = weekdays[data['weekday'].to_object(self.config).weekday()] + position = data['position'] + if position == 'first': + delta = relativedelta(day=1, weekday=weekday(+1)) + elif position == 'second': + delta = relativedelta(day=8, weekday=weekday(+1)) + elif position == 'third': + delta = relativedelta(day=15, weekday=weekday(+1)) + elif position == 'fourth': + delta = relativedelta(day=22, weekday=weekday(+1)) + elif position == 'last': + delta = relativedelta(day=31, weekday=weekday(-1)) + else: + raise NotImplementedError(f"Unknown position: {position}") + + return {'date': tfhDate( + year=data.get('year'), + month=data.get('month'), + day=data.get('day'), + delta=delta, + )} + + + def day(self, children): + return {'day': int(children[0].value)} + + def month(self, children): + return {'month': int(children[0].value)} + + def year(self, children): + value = int(children[0].value) + + if 50 < value < 100: + value = 1900 + value + elif 0 < value < 50: + value = 2000 + value + + return {'year': value} + + def monthname(self, children): + monthname = children[0].value.lower() + month = get_month_mapping().get(monthname, self.config.now.month) + return {'month': month} + + def weekday(self, children): + data = nodes_to_multidict(children) + + weekday = data['WEEKDAY'][0][:2].lower() + target_weekday = ['mo', 'tu', 'we', 'th', 'fr', 'sa', 'su'].index(weekday) + + offset = direction_to_offset(self.config.direction) + if 'offset' in data: + offset = sum(data['offset']) # sum offsets, such as 'next next' + + # TODO: store as delta and let renderer infer date? + date = self.config.now.date() + relativedelta(weekday=weekdays[target_weekday](offset)) + return {'weekday': tfhDate.from_object(date)} + + def modifier(self, children): + value = children[0].value + if value in ('next', 'upcoming', 'following'): + return {'offset': +1} + elif value in ('previous', 'last', 'past', 'preceding'): # TODO: support 'last' for both meanings + return {'offset': -1} + elif value == 'this': + return {'offset': 0} + raise NotImplementedError(f"Unknown modifier: {value}") + + def datename(self, children): + datename = children[0].value.lower() + if datename == 'today': + _date = tfhDate.from_object(self.config.now.date()) + elif datename == 'tomorrow': + _date = tfhDate.from_object(self.config.now.date() + timedelta(days=1)) + elif datename == 'yesterday': + _date = tfhDate.from_object(self.config.now.date() - timedelta(days=1)) + else: + raise NotImplementedError(f"Unknown datename: {datename}") + return {'date': _date} + + def dayoryear(self, children): + if children[0].value.isdigit(): + value = int(children[0].value) + return {'day': value} if value < 32 else {'year': value} + raise NotImplementedError(f"Unknown day or year: {children[0]}") + + def time(self, children): + data = nodes_to_dict(children) + + if 'time' in data: + # TODO: simply return data? + return {'time': data['time']} + + return {'time': tfhTime( + hour=int(data.get("hour", 0)), + minute=int(data.get("minute", 0)), + second=int(data.get("second", 0)), + millisecond=int(data.get("millisecond", 0)), + meridiem=data.get("meridiem", None), + tz=data.get("timezone", None), + )} + + def meridiem(self, children): + meridiem = children[0].value.lower() + if meridiem.startswith('a'): + return {'meridiem': tfhTime.Meridiem.AM} + elif meridiem.startswith('p'): + return {'meridiem': tfhTime.Meridiem.PM} + raise NotImplementedError(f"Unknown meridiem: {meridiem}") + + def timezone(self, children): + timezone = children[0].value.lower() + return {'timezone': pytz.timezone(timezone_mapping[timezone])} + + def timename(self, children): + timename = children[0].value.lower() + if timename == 'noon': + _time = tfhTime(hour=12, minute=0, meridiem=tfhTime.Meridiem.PM) + elif timename == 'midday': + _time = tfhTime(hour=12, minute=0, meridiem=tfhTime.Meridiem.PM) + elif timename == 'midnight': + _time = tfhTime(hour=0, minute=0, meridiem=tfhTime.Meridiem.AM) + elif timename == 'morning': + _time = tfhTime(hour=6, minute=0, meridiem=tfhTime.Meridiem.AM) + elif timename == 'afternoon': + _time = tfhTime(hour=15, minute=0, meridiem=tfhTime.Meridiem.PM) + elif timename == 'evening': + _time = tfhTime(hour=18, minute=0, meridiem=tfhTime.Meridiem.PM) + elif timename == 'night': + _time = tfhTime(hour=20, minute=0, meridiem=tfhTime.Meridiem.PM) + else: + raise NotImplementedError(f"Unknown timename: {timename}") + return {'time': _time} + + def houronly(self, children): + return {'time': tfhTime(hour=int(children[0].value))} + + def datetimename(self, children): + datetimename = children[0].value.lower() + if datetimename == 'tonight': + _datetime = tfhDatetime(date=tfhDate.from_object(self.config.now.date()), time=tfhTime(hour=20, minute=0, meridiem=tfhTime.Meridiem.PM)) + else: + raise NotImplementedError(f"Unknown datetimename: {datetimename}") + return {'datetime': _datetime} -def timefhuman_tokens(string, now): - """Convert string into timefhuman parsed, imputed, combined tokens""" - tokens = tokenize(string) - tokens = categorize(tokens, now) - tokens = build_tree(tokens, now) - return tokens diff --git a/timefhuman/renderers.py b/timefhuman/renderers.py new file mode 100644 index 0000000..42bbdbc --- /dev/null +++ b/timefhuman/renderers.py @@ -0,0 +1,268 @@ +""" +Renderers for timefhuman. Responsible for converting the custom data structures +into native Python objects, such as datetime, date, time, and timedelta. +""" + + +from typing import Optional, Union, Tuple +from datetime import datetime, date, time, timedelta +from enum import Enum +import pytz +from timefhuman.utils import tfhConfig, Direction +from dateutil.relativedelta import relativedelta + + +class tfhMatchable: + matched_text_pos: Optional[Tuple[int, int]] = None + + +class tfhDatelike(tfhMatchable): + """ + A result is a single object that can be converted to a datetime, date, or time. + + It must provide settable properties for date, time, and meridiem. + """ + date: Optional['tfhDate'] = None + time: Optional['tfhTime'] = None + year: Optional[int] = None + month: Optional[int] = None + day: Optional[int] = None + meridiem: Optional['tfhTime.Meridiem'] = None + tz: Optional[pytz.timezone] = None + + def to_object(self, config: tfhConfig = tfhConfig()) -> Union[datetime, 'date', 'time', timedelta]: + """Convert to real datetime, date, or time. Assumes partial fields are filled.""" + raise NotImplementedError("Subclass must implement to_object()") + + @classmethod + def from_object(cls, obj: Union[datetime, 'date', 'time', timedelta]): + raise NotImplementedError("Subclass must implement from_object()") + + +class tfhCollection(tfhDatelike): + """ + A collection of tfhDatelike objects. Provides direct getters and setters for each + tfhDatelike property. + """ + def __init__(self, items): + self.items = items + + def getter(key): + def get(self): + for item in self.items: + if getattr(item, key): + return getattr(item, key) + return None + return get + + def setter(key): + def set(self, value): + for item in self.items: + setattr(item, key, value) + return set + + date = property(getter('date'), setter('date')) + time = property(getter('time'), setter('time')) + year = property(getter('year'), setter('year')) + month = property(getter('month'), setter('month')) + day = property(getter('day'), setter('day')) + meridiem = property(getter('meridiem'), setter('meridiem')) + tz = property(getter('tz'), setter('tz')) + + +class tfhRange(tfhCollection): + def to_object(self, config: tfhConfig = tfhConfig()): + if config.infer_datetimes: + _start, _end = self.items + start, end = _start.to_object(config), _end.to_object(config) + if start > end and not _end.date: + end += timedelta(days=1) + return (start, end) + return tuple([item.to_object(config) for item in self.items]) + + def __repr__(self): + return f"tfhRange({self.items})" + + +class tfhList(tfhCollection): + def to_object(self, config: tfhConfig = tfhConfig()): + return list([item.to_object(config) for item in self.items]) + + def __repr__(self): + return f"tfhList({self.items})" + + +class tfhTimedelta(tfhMatchable): + def __init__(self, days: int = 0, seconds: int = 0, unit: Optional[str] = None): + self.days = days + self.seconds = seconds + self.unit = unit + + def to_object(self, config: tfhConfig = tfhConfig()): + return timedelta(days=self.days, seconds=self.seconds) + + @classmethod + def from_object(cls, obj: timedelta, unit: Optional[str] = None): + return cls(days=obj.days, seconds=obj.seconds, unit=unit) + + def __repr__(self): + return f"tfhTimedelta(days={self.days}, seconds={self.seconds}, unit='{self.unit}')" + + +class tfhDate: + def __init__( + self, + year: Optional[int] = None, + month: Optional[int] = None, + day: Optional[int] = None, + delta: Optional[relativedelta] = None, + ): + self.year = year + self.month = month + self.day = day + self.delta = delta + def to_object(self, config: tfhConfig = tfhConfig()) -> date: + """Convert to a real date. Assumes all fields are filled in.""" + # NOTE: This must be here, because we need values for each field + value = date(self.year or config.now.year, self.month or config.now.month, self.day or 1) + if self.delta: + value += self.delta + return value + + @classmethod + def from_object(cls, obj: date): + return cls(year=obj.year, month=obj.month, day=obj.day) + + def __repr__(self): + return (f"tfhDate(" + f"year={self.year}, month={self.month}, day={self.day})") + + +class tfhTime: + Meridiem = Enum('Meridiem', ['AM', 'PM']) + + def __init__( + self, + hour: Optional[int] = None, + minute: Optional[int] = None, + second: Optional[int] = None, + millisecond: Optional[int] = None, + meridiem: Optional[Meridiem] = None, + tz: Optional[pytz.timezone] = None, + ): + self.hour = hour + self.minute = minute + self.second = second + self.millisecond = millisecond + self.meridiem = meridiem + self.tz = tz + + def to_object(self, config: tfhConfig = tfhConfig()) -> time: + """Convert to a real time object. Assumes all fields are filled in.""" + if self.meridiem == tfhTime.Meridiem.PM and self.hour < 12: + self.hour += 12 + elif self.meridiem == tfhTime.Meridiem.AM and self.hour == 12: + self.hour = 0 + object = time(self.hour, self.minute or 0, self.second or 0, self.millisecond or 0, tzinfo=self.tz) + return object + + @classmethod + def from_object(cls, obj: time): + return cls(hour=obj.hour, minute=obj.minute, second=obj.second, millisecond=obj.millisecond, meridiem=obj.meridiem, tz=obj.tz) + + def __repr__(self): + return (f"tfhTime(" + f"hour={self.hour}, minute={self.minute}, second={self.second}, millisecond={self.millisecond}, meridiem={self.meridiem}, tz={self.tz})") + + +class tfhDatetime(tfhDatelike): + """A combination of tfhDate + tfhTime.""" + + def getter(attr, key): + def get(self): + obj = getattr(self, attr) + return getattr(obj, key) if obj else None + return get + + def setter(attr, key): + def set(self, value): + obj = getattr(self, attr) + if obj: + setattr(obj, key, value) + return set + + year = property(getter('date', 'year'), setter('date', 'year')) + month = property(getter('date', 'month'), setter('date', 'month')) + day = property(getter('date', 'day'), setter('date', 'day')) + meridiem = property(getter('time', 'meridiem'), setter('time', 'meridiem')) + tz = property(getter('time', 'tz'), setter('time', 'tz')) + + def __init__( + self, + date: Optional[tfhDate] = None, + time: Optional[tfhTime] = None + ): + self.date = date + self.time = time + + def to_object(self, config: tfhConfig = tfhConfig()) -> Union[datetime, date, time]: + """Convert to real datetime, assumes partial fields are filled.""" + if self.date and self.time: + return datetime.combine(self.date.to_object(config), self.time.to_object(config), tzinfo=self.time.tz) + elif self.date: + if config.infer_datetimes: + return datetime.combine(self.date.to_object(config), time(0, 0)) + return self.date.to_object(config) + elif self.time: + if config.infer_datetimes: + _now = config.now.replace(tzinfo=self.time.tz) + candidate = datetime.combine(_now.date(), self.time.to_object(config)) + if candidate < _now and config.direction == Direction.next: + candidate += timedelta(days=1) + elif candidate > _now and config.direction == Direction.previous: + candidate -= timedelta(days=1) + elif config.direction == Direction.this: + pass + return candidate + return self.time.to_object(config) + raise ValueError("Datetime is missing both date and time") + + @classmethod + def from_object(cls, obj: datetime): + return cls(date=tfhDate.from_object(obj.date()), time=tfhTime.from_object(obj.time())) + + def __repr__(self): + return f"tfhDatetime({self.date}, {self.time})" + + +class tfhAmbiguous(tfhMatchable): + """Can represent an hour, a day, month, or year.""" + + def __init__(self, value: int): + self.value = value + + def to_object(self, config: tfhConfig = tfhConfig()): + # NOTE: If the ambiguous token was never resolved, simply return the value as a str + return str(self.value) + + @classmethod + def from_object(cls, obj: int): + return cls(obj) + + def __repr__(self): + return f"tfhAmbiguous({self.value})" + + +class tfhUnknown(tfhMatchable): + def __init__(self, value: str): + self.value = value + + def to_object(self, config: tfhConfig = tfhConfig()): + return self.value + + @classmethod + def from_object(cls, obj: str): + return cls(obj) + + def __repr__(self): + return f"tfhUnknown({self.value})" \ No newline at end of file diff --git a/timefhuman/tokenize.py b/timefhuman/tokenize.py deleted file mode 100644 index 07ec840..0000000 --- a/timefhuman/tokenize.py +++ /dev/null @@ -1,99 +0,0 @@ -import string - - -def tokenize(characters): - """Tokenize all characters in the string. - - >>> list(tokenize('7/17-7/18 3 pm- 4 pm')) - ['7/17', '-', '7/18', '3', 'pm', '-', '4', 'pm'] - >>> list(tokenize('7/17 3 pm- 7/19 2 pm')) - ['7/17', '3', 'pm', '-', '7/19', '2', 'pm'] - >>> list(tokenize('7/17, 7/18, 7/19 at 2')) - ['7/17', ',', '7/18', ',', '7/19', 'at', '2'] - """ - tokens = generic_tokenize(characters) - tokens = clean_dash_tokens(tokens) - return tokens - - -def generic_tokenize(characters): - """Default tokenizer - - >>> list(generic_tokenize('7/17/18 3:00 p.m.')) - ['7/17/18', '3:00', 'p.m.'] - >>> list(generic_tokenize('July 17, 2018 at 3p.m.')) - ['July', '17', ',', '2018', 'at', '3', 'p.m.'] - >>> list(generic_tokenize('July 17, 2018 3 p.m.')) - ['July', '17', ',', '2018', '3', 'p.m.'] - >>> list(generic_tokenize('3PM on July 17')) - ['3', 'PM', 'on', 'July', '17'] - >>> list(generic_tokenize('tomorrow noon,Wed 3 p.m.,Fri 11 AM')) - ['tomorrow', 'noon', ',', 'Wed', '3', 'p.m.', ',', 'Fri', '11', 'AM'] - """ - token = '' - punctuation = '' - last_type = None - for character in characters: - type = get_character_type(character) - is_different_type = None not in (type, last_type) and type != last_type \ - and 'punctuation' not in (type, last_type) - is_skip_character = character in string.whitespace - is_break_character = character in ',' - - if is_skip_character or is_different_type or is_break_character: - if token: - yield token - token = '' - token = character if not is_skip_character else '' - if is_break_character: - yield token - token = '' - last_type = type - continue - token += character - last_type = type - yield token - - -def clean_dash_tokens(tokens): - """Clean up dash tokens. - - - If the dash-delimited values are not integers, the values joined by dashes - will need further parsing. - - >>> list(clean_dash_tokens(['7-18', '3', 'pm-'])) - ['7-18', '3', 'pm', '-'] - >>> list(clean_dash_tokens(['7/17-7/18'])) - ['7/17', '-', '7/18'] - """ - for token in tokens: - if '-' in token: - parts = token.split('-') - if not all([s.isdigit() for s in parts]): - if parts[0]: - yield parts[0] - for part in parts[1:]: - yield '-' - if part: - yield part - continue - yield token - - -def get_character_type(character): - """ - >>> get_character_type('a') - 'alpha' - >>> get_character_type('1') - 'numeric' - >>> get_character_type('.') - 'punctuation' - >>> get_character_type(' ') - """ - if character.isalpha(): - return 'alpha' - elif character.isnumeric(): - return 'numeric' - elif character in string.punctuation: - return 'punctuation' - return None diff --git a/timefhuman/tree.py b/timefhuman/tree.py deleted file mode 100644 index c509ff6..0000000 --- a/timefhuman/tree.py +++ /dev/null @@ -1,326 +0,0 @@ -from .data import TimeToken -from .data import TimeRange -from .data import DayToken -from .data import DayRange -from .data import DayTimeToken -from .data import DayTimeRange -from .data import DayList -from .data import TimeList -from .data import DayTimeList -from .data import AmbiguousToken - -import datetime - - -def build_tree(tokens, now=datetime.datetime.now()): - """Assemble datetime object optionally using time. - - >>> build_tree([DayToken(7, 5, 2018), TimeToken(12, 'pm')]) - [7/5/2018 12 pm] - >>> build_tree([TimeToken(9), 'on', DayToken(7, 5, 2018)]) - [7/5/2018 9:00] - >>> build_tree([DayToken(7, 5, 2018), TimeToken(9), '-', TimeToken(11)]) - [7/5/2018 9:00 - 11:00] - >>> build_tree([DayToken(7, 5, 2018), 'to', DayToken(7, 7, 2018), TimeToken(11)]) - [7/5/2018 11:00 - 7/7/2018 11:00] - >>> build_tree([DayToken(7, 5, 2018), 'or', DayToken(7, 7, 2018), TimeToken(11)]) - [[7/5/2018 11:00, 7/7/2018 11:00]] - >>> build_tree([DayToken(7, 5, 2018), TimeToken(3, None), 'or', TimeToken(4, 'pm')]) - [[7/5/2018 3 pm, 7/5/2018 4 pm]] - """ - tokens = combine_on_at(tokens) - tokens = apply_ors(tokens) - tokens = combine_days_and_times(tokens) - tokens = apply_ors(tokens) # TODO: is this the cleanest way to do this? - tokens = combine_ors(tokens) - tokens = combine_ranges(tokens) - return tokens - - -def areinstance(tokens, classes): - """ - >>> tokens = (TimeToken(15), TimeToken(16)) - >>> areinstance(tokens, TimeToken) - True - >>> tokens = (TimeToken(15), DayToken(7, 5, 2018)) - >>> areinstance(tokens, TimeToken) - False - >>> areinstance(tokens, (TimeToken, DayToken)) - True - """ - assert isinstance(classes, type) or isinstance(classes, tuple), \ - "Classes must either be a tuple or a type." - if isinstance(classes, type): - classes = (classes,) - return all([ - any([isinstance(token, cls) for cls in classes]) for token in tokens]) - - -def ifmatchinstance(tokens, classes): - """ - >>> tokens = (TimeToken(15), TimeToken(16)) - >>> ifmatchinstance(tokens, (TimeToken, TimeToken)) - 1 - >>> ifmatchinstance(tokens, (TimeToken, DayToken)) - 0 - >>> both = (DayToken, TimeToken) - >>> ifmatchinstance(tokens, (both, both)) - 1 - >>> tokens = (TimeToken(15), DayToken(5, 7, 2018)) - >>> ifmatchinstance(tokens, (DayToken, TimeToken)) - -1 - >>> ifmatchinstance(tokens, ()) - 0 - """ - if len(tokens) != len(classes): - return 0 - if all([isinstance(token, cls) for token, cls in zip(tokens, classes)]): - return 1 - if all([isinstance(token, cls) for token, cls in zip(tokens[::-1], classes)]): - return -1 - return 0 - - -def matchinstance(tokens, classes): - """ - >>> tokens = (TimeToken(15), TimeToken(16)) - >>> matchinstance(tokens, (TimeToken, TimeToken)) - (3 pm, 4 pm) - >>> matchinstance(tokens, (TimeToken, DayToken)) - () - >>> both = (DayToken, TimeToken) - >>> matchinstance(tokens, (both, both)) - (3 pm, 4 pm) - >>> tokens = (TimeToken(15), DayToken(5, 7, 2018)) - >>> day_tokens = (DayToken, DayRange) - >>> time_tokens = (TimeToken, TimeRange) - >>> matchinstance(tokens, (day_tokens, time_tokens)) - (5/7/2018, 3 pm) - >>> matchinstance(tokens, ()) - () - """ - if len(tokens) != len(classes): - return () - step = ifmatchinstance(tokens, classes) - if step == 0: - return () - return tokens[::step] - - -def combine_ranges(tokens): - """ - >>> combine_ranges([DayToken(7, 5, 2018), TimeToken(9), '-', TimeToken(11)]) - [7/5/2018, 9:00 - 11:00] - >>> combine_ranges([DayToken(7, 5, 2018), 'to', DayToken(7, 7, 2018), - ... TimeToken(9), '-', TimeToken(11)]) - [7/5/2018 - 7/7/2018, 9:00 - 11:00] - >>> combine_ranges([TimeToken(7, 'pm'), 'to', DayToken(7, 7, 2018)]) # ignore meaningless 'to' # TODO: assert? - [7 pm, 7/7/2018] - >>> combine_ranges([DayToken(7, 5, 2018), 'to', DayTimeToken(2018, 7, 7, 11)]) - [7/5/2018 11:00 - 7/7/2018 11:00] - >>> combine_ranges([DayTimeToken(2018, 7, 17, 15, 30), '-', TimeToken(16)]) - [7/17/2018 3:30-4 pm] - """ - while '-' in tokens or 'to' in tokens: - if '-' in tokens: - index = tokens.index('-') - elif 'to' in tokens: - index = tokens.index('to') - else: - return tokens # TODO: incorrect; these returns should skip over this index - - if index == len(tokens) - 1 or index == 0: # doesn't have both start, end - return tokens - - end = tokens[index+1] - start = tokens[index-1] - - daytime_day_or_time_step = ifmatchinstance([start, end], (DayTimeToken, (DayToken, TimeToken))) - - if areinstance((start, end), TimeToken): - tokens = tokens[:index-1] + [TimeRange(start, end)] + tokens[index+2:] - elif areinstance((start, end), DayToken): - tokens = tokens[:index-1] + [DayRange(start, end)] + tokens[index+2:] - elif daytime_day_or_time_step: - daytime1, day_or_time = [start, end][::daytime_day_or_time_step] - daytime2 = daytime1.combine(day_or_time) - if daytime1 is not start: - daytime1, daytime2 = daytime2, daytime1 - tokens = tokens[:index-1] + [DayTimeRange(daytime1, daytime2)] + tokens[index+2:] - elif areinstance((start, end), DayTimeToken): - tokens = tokens[:index-1] + [DayTimeRange(start, end)] + tokens[index+2:] - else: - tokens = tokens[:index] + tokens[index+1:] # ignore meaningless dashes, to - return tokens - - -def combine_on_at(tokens): - """ - >>> combine_on_at([TimeToken(9), 'on', DayToken(7, 5, 2018)]) - [7/5/2018 9:00] - >>> combine_on_at([DayToken(7, 5, 2018), 'at', TimeToken(9)]) - [7/5/2018 9:00] - >>> combine_on_at(['at', TimeToken(9)]) - [9:00] - >>> combine_on_at([TimeToken(9), 'on']) - [9:00] - >>> combine_on_at([TimeToken(9), 'at', TimeToken(9)]) # malformed, ignored at # TODO: alert? - [9:00, 9:00] - """ - for keyword in ('on', 'at'): - while keyword in tokens: - i = tokens.index(keyword) - if i <= 0 or i + 1 >= len(tokens): - tokens = tokens[:i] + tokens[i+1:] - continue - match = matchinstance((tokens[i-1], tokens[i+1]), (TimeToken, DayToken)) - if not match: - tokens = tokens[:i] + tokens[i+1:] - continue - time, day = match - daytime = DayTimeToken.from_day_time(day, time) - tokens = tokens[:i-1] + [daytime] + tokens[i+2:] - return tokens - - -def combine_days_and_times(tokens): - """ - >>> combine_days_and_times([DayToken(7, 5, 2018), 'or', DayToken(7, 7, 2018), TimeToken(11)]) - [7/5/2018, 'or', 7/7/2018 11:00] - >>> combine_days_and_times(['or', DayToken(7, 7, 2018), TimeToken(11)]) - ['or', 7/7/2018 11:00] - >>> combine_days_and_times([TimeToken(11), DayToken(7, 7, 2018)]) - [7/7/2018 11:00] - >>> combine_days_and_times([DayToken(7, 17, 2018), TimeToken(15, minute=30), '-', TimeToken(16)]) - [7/17/2018 3:30 pm, '-', 4 pm] - """ - cursor = 0 - day_tokens = (DayToken, DayRange) - time_tokens = (TimeToken, TimeRange) - while cursor + 1 < len(tokens): - amb_time_match = matchinstance(tokens[cursor:cursor+2], (AmbiguousToken, time_tokens)) - day_time_match = matchinstance(tokens[cursor:cursor+2], (day_tokens, time_tokens)) - - if amb_time_match and amb_time_match[0].has_day_token(): - ambiguous, time = amb_time_match - day = ambiguous.get_day_token() - day_time_match = (day, time) - - if day_time_match: - day, time = day_time_match - token = day.combine(time) - tokens = tokens[:cursor] + [token] + tokens[cursor+2:] - cursor += 1 - return tokens - - -def apply_ors(tokens): - """Transfer times across days if the other days don't have times. - - >>> apply_ors([DayToken(7, 5, 2018), 'or', DayToken(7, 7, 2018)]) - [7/5/2018, 'or', 7/7/2018] - >>> apply_ors([TimeToken(3, None), 'or', TimeToken(4, 'pm')]) - [3 pm, 'or', 4 pm] - >>> apply_ors([DayToken(7, 5, 2018), 'or', DayTimeToken(2018, 7, 7, 15)]) - [7/5/2018 3 pm, 'or', 7/7/2018 3 pm] - >>> apply_ors(['or', TimeToken(4, 'pm')]) - ['or', 4 pm] - """ - tokens = [token if token != ',' else 'or' for token in tokens] - index = 1 - while index + 1 < len(tokens): - if tokens[index] != 'or': - index += 1 - continue - - # TODO: too explicit, need generic way to "cast" - candidates = (tokens[index-1], tokens[index+1]) - day_or_time_daytime_step = ifmatchinstance(candidates, ((TimeToken, DayToken), DayTimeToken)) - amb_timerange_step = ifmatchinstance(candidates, (AmbiguousToken, TimeRange)) - timerange_daytimerange_step = ifmatchinstance(candidates, (TimeRange, DayTimeRange)) - if day_or_time_daytime_step: - day_or_time, daytime1 = candidates[::day_or_time_daytime_step] - daytime2 = daytime1.combine(day_or_time) - tokens[index-day_or_time_daytime_step] = daytime2 - elif areinstance(candidates, TimeToken): - time1, time2 = candidates - time1.apply(time2) - elif areinstance(candidates, DayToken): - day1, day2 = candidates - day1.apply(day2) - elif amb_timerange_step and candidates[::amb_timerange_step][0].has_time_range_token(): - ambiguous, timerange = candidates[::amb_timerange_step] - tokens[index-amb_timerange_step] = ambiguous.get_time_range_token() - elif timerange_daytimerange_step: - timerange, daytimerange = candidates[::timerange_daytimerange_step] - start = daytimerange.start.day.combine(timerange.start) - end = daytimerange.end.day.combine(timerange.end) - daytimerange2 = DayTimeRange(start, end) - tokens[index-timerange_daytimerange_step] = daytimerange2 - - candidates = (tokens[index-1], tokens[index+1]) - timerange_timerange_step = ifmatchinstance(candidates, (TimeRange, TimeRange)) - if timerange_timerange_step: - timerange1, timerange2 = candidates[::timerange_timerange_step] - timerange1.start.apply(timerange2.start) - timerange1.end.apply(timerange2.end) - index += 1 - return tokens - - -def combine_ors(tokens): - """Combine lists. - - >>> combine_ors([DayToken(7, 5, 2018), 'or', DayToken(7, 7, 2018), 'or', DayToken(7, 9, 2018), 'or', DayTimeToken(2018, 7, 11, 15)]) - [[7/5/2018 3 pm, 7/7/2018 3 pm, 7/9/2018 3 pm, 7/11/2018 3 pm]] - >>> combine_ors([TimeToken(3, 'pm'), 'or', TimeToken(4, 'pm')]) - [[3 pm, 4 pm]] - >>> combine_ors([DayToken(7, 5, 2018), 'or', DayToken(7, 7, 2018), 'or', TimeToken(4, 'pm')]) - [[7/5/2018 4 pm, 7/7/2018 4 pm]] - >>> combine_ors([DayTimeToken(2018, 7, 5, 12), 'or', DayTimeToken(2018, 7, 7, 15), 'or', DayToken(7, 9, 2018)]) - [[7/5/2018 12 pm, 7/7/2018 3 pm, 7/9/2018 3 pm]] - >>> combine_ors(['or', TimeToken(4, 'pm')]) - ['or', 4 pm] - """ - tokens = [token if token != ',' else 'or' for token in tokens] - index = 1 - while index + 1 < len(tokens): - if tokens[index] != 'or': - index += 1 - continue - - candidates = (tokens[index-1], tokens[index+1]) - if areinstance(candidates, DayTimeToken): - daytime1, daytime2 = candidates - tokens = tokens[:index-1] + [DayTimeList(daytime1, daytime2)] + tokens[index+2:] - elif areinstance(candidates, TimeToken): - time1, time2 = candidates - tokens = tokens[:index-1] + [TimeList(time1, time2)] + tokens[index+2:] - elif areinstance(candidates, DayToken): - day1, day2 = candidates - tokens = tokens[:index-1] + [DayList(day1, day2)] + tokens[index+2:] - elif areinstance(candidates, (DayTimeList, (TimeToken, DayToken))): - lst, token = candidates - lst.append(lst[-1].combine(token)) - tokens = tokens[:index] + tokens[index+2:] - elif areinstance(candidates, (DayTimeList, DayTimeToken)): - lst, token = candidates - lst.append(token) - tokens = tokens[:index] + tokens[index+2:] - elif areinstance(candidates, (DayList, DayToken)): - lst, token = candidates - lst.append(token) - tokens = tokens[:index] + tokens[index+2:] - elif areinstance(candidates, (DayList, DayTimeToken)): - lst, token = candidates - lst = lst.combine(token) - lst.append(token) - tokens[index-1] = lst - tokens = tokens[:index] + tokens[index+2:] - elif areinstance(candidates, (DayList, TimeToken)): - lst, token = candidates - tokens[index-1] = lst.combine(token) - tokens = tokens[:index] + tokens[index+2:] - else: - index += 1 - return tokens diff --git a/timefhuman/utils.py b/timefhuman/utils.py new file mode 100644 index 0000000..7f39218 --- /dev/null +++ b/timefhuman/utils.py @@ -0,0 +1,99 @@ +from datetime import datetime +import pytz +from babel.dates import get_timezone_name +from lark.tree import Tree +from lark.lexer import Token +from dataclasses import dataclass +from enum import Enum +from typing import List + + +MONTHS = [ + "january", "february", "march", "april", "may", "june", + "july", "august", "september", "october", "november", "december", +] +Direction = Enum('Direction', ['previous', 'next', 'this']) + + +@dataclass +class tfhConfig: + # Default to the next valid datetime or the previous one + direction: Direction = Direction.next + + # Always produce datetime objects. If no date, use the current date. If no time, use midnight. + infer_datetimes: bool = True + + # The 'current' datetime, used if infer_datetimes is True + now: datetime = datetime.now() + + # Return the matched text from the input string + return_matched_text: bool = False + + # Return a single object instead of a list when there's only one match + return_single_object: bool = True + + +def generate_timezone_mapping(): + text_to_timezone = {} + + for tz_name in pytz.all_timezones: + timezone = pytz.timezone(tz_name) + abbreviation1 = timezone.localize(datetime(2025, 1, 15)).strftime('%Z') + abbreviation2 = timezone.localize(datetime(2025, 7, 15)).strftime('%Z') + name = get_timezone_name(timezone) + text_to_timezone[abbreviation1] = tz_name + text_to_timezone[abbreviation2] = tz_name + text_to_timezone[name] = tz_name + + return { + key.lower(): value for key, value in text_to_timezone.items() + if key[0] not in ('+', '-') and not key.startswith('Unknown') + } + + +def get_month_mapping(): + mapping = { + month: i + 1 for i, month in enumerate(MONTHS) + } + mapping.update({ + month[:3]: i + 1 for i, month in enumerate(MONTHS) + }) + return mapping + + +def node_to_dict(node: Tree) -> dict: + assert isinstance(node, (Tree, dict, Token)), f"Expected a Tree or dict, got {type(node)} ({node})" + if isinstance(node, dict): + return node + elif isinstance(node, Tree): + assert len(node.children) == 1, f"Expected 1 child for {node.data.value}, got {len(node.children)}" + return {node.data.value: node.children[0].value} + elif isinstance(node, Token): + return {node.type: node.value} + raise ValueError(f"Unknown node type: {type(node)} ({node})") + + +def nodes_to_dict(nodes: List[Tree]) -> dict: + result = {} + for node in nodes: + result.update(node_to_dict(node)) + return result + + +def nodes_to_multidict(nodes: List[Tree]) -> dict: + result = {} + for node in nodes: + for key, value in node_to_dict(node).items(): + if key not in result: + result[key] = [] + result[key].append(value) + return result + + +def direction_to_offset(direction: Direction) -> int: + if direction == Direction.next: + return +1 + elif direction == Direction.previous: + return -1 + else: + return 0