From d744bbd3f5492b385418cf48e8fb4d4955915f28 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Mon, 8 Jun 2015 13:35:31 +0200 Subject: [PATCH 1/8] Add progress bar to pandas apply function --- tqdm/__init__.py | 5 +- tqdm/_pandas.py | 105 +++++++++++++++++++++++++++++++++++++ tqdm/tests/tests_pandas.py | 53 +++++++++++++++++++ 3 files changed, 162 insertions(+), 1 deletion(-) create mode 100644 tqdm/_pandas.py create mode 100644 tqdm/tests/tests_pandas.py diff --git a/tqdm/__init__.py b/tqdm/__init__.py index f8953cd41..4d448eda6 100644 --- a/tqdm/__init__.py +++ b/tqdm/__init__.py @@ -2,5 +2,8 @@ from ._tqdm import trange from ._tqdm import format_interval from ._tqdm import format_meter +from ._pandas import enable_progress_apply -__all__ = ['tqdm', 'trange', 'format_interval', 'format_meter'] + +__all__ = ['tqdm', 'trange', 'format_interval', + 'format_meter', 'enable_progress_apply'] diff --git a/tqdm/_pandas.py b/tqdm/_pandas.py new file mode 100644 index 000000000..05bd04557 --- /dev/null +++ b/tqdm/_pandas.py @@ -0,0 +1,105 @@ +import sys +import time + +from tqdm._tqdm import StatusPrinter +from tqdm._tqdm import format_meter + +__all__ = ['enable_progress_apply'] + + +def enable_progress_apply(): + try: + from pandas.core.groupby import DataFrameGroupBy + DataFrameGroupBy.progress_apply = _progress_apply + except ImportError: + raise("You can't enable Pandas progress apply because Pandas is not installed") + + +def _progress_apply(groups, func, progress_kwargs={}, *args, **kwargs): + """Add a progress bar during DataFrameGroupBy.apply(). Largely inspired from + https://stackoverflow.com/questions/18603270/progress-indicator-during-pandas-operations-python. + + Parameters + ---------- + groups : DataFrameGroupBy + Grouped data. + func : function + To be applied on the grouped data. + progress_kwargs : dict + Parameters for the progress bar (same as for `tqdm.tqdm`). + + *args and *kwargs are transmitted to DataFrameGroupBy.apply() + + Examples + -------- + >>> import time + >>> import pandas as pd + >>> import numpy as np + >>> + >>> from tqdm import enable_progress_apply + >>> enable_progress_apply() + >>> + >>> # Now you can use `progress_apply` instead of `apply` + >>> df = pd.DataFrame(np.random.randint(0, 100, (100000, 6))) + >>> df.groupby(0).progress_apply(lambda x: time.sleep(0.01)) + + """ + + mininterval = progress_kwargs['mininterval'] if 'mininterval' \ + in progress_kwargs.keys() else 0.5 + miniters = progress_kwargs['miniters'] if 'miniters' \ + in progress_kwargs.keys() else 1 + file = progress_kwargs['file'] if 'file' \ + in progress_kwargs.keys() else sys.stderr + desc = progress_kwargs['desc'] if 'desc' \ + in progress_kwargs.keys() else '' + leave = progress_kwargs['leave'] if 'leave' \ + in progress_kwargs.keys() else False + + for key, value in progress_kwargs.items(): + locals()[key] = value + + prefix = desc + ': ' if desc else '' + + total = len(groups) + + sp = StatusPrinter(file) + sp.print_status(prefix + format_meter(0, total, 0)) + + def progress_decorator(func): + + def wrapper(*args, **kwargs): + + start_t = wrapper.start_t + last_print_t = wrapper.last_print_t + last_print_n = wrapper.last_print_n + n = wrapper.n + + if n - last_print_n >= miniters: + # We check the counter first, to reduce the overhead of + # time.time() + cur_t = time.time() + if cur_t - last_print_t >= mininterval: + fmeter = format_meter(n, total, cur_t - start_t) + sp.print_status(prefix + fmeter) + last_print_n = n + last_print_t = cur_t + + wrapper.n += 1 + return func(*args, **kwargs) + + wrapper.start_t = time.time() + wrapper.last_print_t = wrapper.start_t + wrapper.last_print_n = 0 + wrapper.n = 0 + + return wrapper + + progress_func = progress_decorator(func) + result = groups.apply(progress_func, *args, **kwargs) + + if not leave: + sp.print_status('') + sys.stdout.write('\r') + + return result diff --git a/tqdm/tests/tests_pandas.py b/tqdm/tests/tests_pandas.py new file mode 100644 index 000000000..72a61d42d --- /dev/null +++ b/tqdm/tests/tests_pandas.py @@ -0,0 +1,53 @@ +from __future__ import unicode_literals + +try: + from StringIO import StringIO +except: + from io import StringIO + +import time + +from nose.plugins.skip import SkipTest +from nose.tools import with_setup + +from tqdm import tqdm + +def setup_pandas(): + try: + from tqdm import enable_progress_apply + enable_progress_apply() + except: + raise SkipTest + + +@with_setup(setup_pandas) +def test_pandas(): + + import pandas as pd + import numpy as np + + our_file = StringIO() + + df = pd.DataFrame(np.random.randint(0, 100, (1000, 6))) + df.groupby(0).progress_apply(lambda x: time.sleep(0.01), + progress_kwargs=dict(file=our_file, leave=False)) + + our_file.seek(0) + + assert "|##########| 100/100 100%" in our_file.read() + + +@with_setup(setup_pandas) +def test_pandas_leave(): + + import pandas as pd + import numpy as np + + our_file = StringIO() + + df = pd.DataFrame(np.random.randint(0, 100, (1000, 6))) + df.groupby(0).progress_apply(lambda x: time.sleep(0.01), + progress_kwargs=dict(file=our_file, leave=True)) + our_file.seek(0) + + assert "|##########| 100/100 100%" in our_file.read() From 8add12710e7087b32fd65f4cea634354d21cfee6 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Mon, 8 Jun 2015 13:41:30 +0200 Subject: [PATCH 2/8] Fix lines larger than 80 (I would prefer 100) --- tqdm/_pandas.py | 3 ++- tqdm/tests/tests_pandas.py | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tqdm/_pandas.py b/tqdm/_pandas.py index 05bd04557..e0224a71a 100644 --- a/tqdm/_pandas.py +++ b/tqdm/_pandas.py @@ -12,7 +12,8 @@ def enable_progress_apply(): from pandas.core.groupby import DataFrameGroupBy DataFrameGroupBy.progress_apply = _progress_apply except ImportError: - raise("You can't enable Pandas progress apply because Pandas is not installed") + raise("You can't enable Pandas progress apply ", + "because Pandas is not installed") def _progress_apply(groups, func, progress_kwargs={}, *args, **kwargs): diff --git a/tqdm/tests/tests_pandas.py b/tqdm/tests/tests_pandas.py index 72a61d42d..3077c03e8 100644 --- a/tqdm/tests/tests_pandas.py +++ b/tqdm/tests/tests_pandas.py @@ -10,7 +10,6 @@ from nose.plugins.skip import SkipTest from nose.tools import with_setup -from tqdm import tqdm def setup_pandas(): try: @@ -30,7 +29,8 @@ def test_pandas(): df = pd.DataFrame(np.random.randint(0, 100, (1000, 6))) df.groupby(0).progress_apply(lambda x: time.sleep(0.01), - progress_kwargs=dict(file=our_file, leave=False)) + progress_kwargs=dict(file=our_file, + leave=False)) our_file.seek(0) @@ -47,7 +47,8 @@ def test_pandas_leave(): df = pd.DataFrame(np.random.randint(0, 100, (1000, 6))) df.groupby(0).progress_apply(lambda x: time.sleep(0.01), - progress_kwargs=dict(file=our_file, leave=True)) + progress_kwargs=dict(file=our_file, + leave=True)) our_file.seek(0) assert "|##########| 100/100 100%" in our_file.read() From 5c12158dd8792c051074b7e617f22cf634743bbb Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 29 Nov 2015 16:07:43 +0000 Subject: [PATCH 3/8] untested pandas support --- tqdm/__init__.py | 4 +- tqdm/_pandas.py | 167 ++++++++++++++++++++++------------------------ tqdm/_tqdm_gui.py | 2 +- 3 files changed, 84 insertions(+), 89 deletions(-) diff --git a/tqdm/__init__.py b/tqdm/__init__.py index a2b6b4e21..57b86abc7 100644 --- a/tqdm/__init__.py +++ b/tqdm/__init__.py @@ -4,8 +4,8 @@ from ._tqdm import format_meter from ._tqdm_gui import tqdm_gui from ._tqdm_gui import tgrange -from ._pandas import enable_progress_apply +from ._pandas import tqdm_pandas from ._version import __version__ # NOQA __all__ = ['tqdm', 'tqdm_gui', 'trange', 'tgrange', 'format_interval', - 'format_meter', 'enable_progress_apply', '__version__'] + 'format_meter', 'tqdm_pandas', '__version__'] diff --git a/tqdm/_pandas.py b/tqdm/_pandas.py index e0224a71a..480990cda 100644 --- a/tqdm/_pandas.py +++ b/tqdm/_pandas.py @@ -1,106 +1,101 @@ -import sys -import time +# future division is important to divide integers and get as +# a result precise floating numbers (instead of truncated int) +from __future__ import division, absolute_import -from tqdm._tqdm import StatusPrinter -from tqdm._tqdm import format_meter -__all__ = ['enable_progress_apply'] +__author__ = {"github.com/": ["casperdcl", "hadim"]} +__all__ = ['tqdm_pandas'] -def enable_progress_apply(): - try: - from pandas.core.groupby import DataFrameGroupBy - DataFrameGroupBy.progress_apply = _progress_apply - except ImportError: - raise("You can't enable Pandas progress apply ", - "because Pandas is not installed") - - -def _progress_apply(groups, func, progress_kwargs={}, *args, **kwargs): - """Add a progress bar during DataFrameGroupBy.apply(). Largely inspired from - https://stackoverflow.com/questions/18603270/progress-indicator-during-pandas-operations-python. - - Parameters - ---------- - groups : DataFrameGroupBy - Grouped data. - func : function - To be applied on the grouped data. - progress_kwargs : dict - Parameters for the progress bar (same as for `tqdm.tqdm`). - - *args and *kwargs are transmitted to DataFrameGroupBy.apply() +def tqdm_pandas(t): + """ + Adds given `tqdm` instance to `DataFrameGroupBy.progress_apply()`. + Don't forget to close the `tqdm` instance afterwards + (or just use `with` syntax): Examples -------- - >>> import time >>> import pandas as pd >>> import numpy as np + >>> from tqdm import tqdm, tqdm_pandas + >>> form time import time >>> - >>> from tqdm import enable_progress_apply - >>> enable_progress_apply() - >>> - >>> # Now you can use `progress_apply` instead of `apply` >>> df = pd.DataFrame(np.random.randint(0, 100, (100000, 6))) - >>> df.groupby(0).progress_apply(lambda x: time.sleep(0.01)) + >>> with tqdm(...) as t: + ... tqdm_pandas(t) + ... # Now you can use `progress_apply` instead of `apply` + ... df.groupby(0).progress_apply(lambda x: time.sleep(0.01)) + References + ---------- + https://stackoverflow.com/questions/18603270/ + progress-indicator-during-pandas-operations-python """ - - mininterval = progress_kwargs['mininterval'] if 'mininterval' \ - in progress_kwargs.keys() else 0.5 - miniters = progress_kwargs['miniters'] if 'miniters' \ - in progress_kwargs.keys() else 1 - file = progress_kwargs['file'] if 'file' \ - in progress_kwargs.keys() else sys.stderr - desc = progress_kwargs['desc'] if 'desc' \ - in progress_kwargs.keys() else '' - leave = progress_kwargs['leave'] if 'leave' \ - in progress_kwargs.keys() else False - - for key, value in progress_kwargs.items(): - locals()[key] = value - - prefix = desc + ': ' if desc else '' - - total = len(groups) - - sp = StatusPrinter(file) - sp.print_status(prefix + format_meter(0, total, 0)) - - def progress_decorator(func): + from pandas.core.groupby import DataFrameGroupBy + + def inner(groups, func, progress_kwargs={}, *args, **kwargs): + """ + Parameters + ---------- + groups : DataFrameGroupBy + Grouped data. + func : function + To be applied on the grouped data. + progress_kwargs : dict + Parameters for the progress bar (same as for `tqdm`). + + *args and *kwargs are transmitted to DataFrameGroupBy.apply() + """ + for key, val in progress_kwargs.items(): + # TODO: do we need this? + if getattr(t, key, None) is not None: + setattr(t, key, val) + + t.total = len(groups) + + # def progress_decorator(func): + # def wrapper(*args, **kwargs): + # start_t = wrapper.start_t + # last_print_t = wrapper.last_print_t + # last_print_n = wrapper.last_print_n + # n = wrapper.n + # + # if n - last_print_n >= miniters: + # # We check the counter first, to reduce the overhead of + # # time.time() + # cur_t = time.time() + # if cur_t - last_print_t >= mininterval: + # fmeter = format_meter(n, total, cur_t - start_t) + # sp.print_status(prefix + fmeter) + # last_print_n = n + # last_print_t = cur_t + # + # wrapper.n += 1 + # + # return func(*args, **kwargs) + # + # wrapper.start_t = time.time() + # wrapper.last_print_t = wrapper.start_t + # wrapper.last_print_n = 0 + # wrapper.n = 0 + # + # return wrapper + # progress_func = progress_decorator(func) + # result = groups.apply(progress_func, *args, **kwargs) def wrapper(*args, **kwargs): - - start_t = wrapper.start_t - last_print_t = wrapper.last_print_t - last_print_n = wrapper.last_print_n - n = wrapper.n - - if n - last_print_n >= miniters: - # We check the counter first, to reduce the overhead of - # time.time() - cur_t = time.time() - if cur_t - last_print_t >= mininterval: - fmeter = format_meter(n, total, cur_t - start_t) - sp.print_status(prefix + fmeter) - last_print_n = n - last_print_t = cur_t - - wrapper.n += 1 + t.update() return func(*args, **kwargs) - wrapper.start_t = time.time() - wrapper.last_print_t = wrapper.start_t - wrapper.last_print_n = 0 - wrapper.n = 0 - - return wrapper + result = groups.apply(wrapper, *args, **kwargs) - progress_func = progress_decorator(func) - result = groups.apply(progress_func, *args, **kwargs) + # if not leave: + # sp.print_status('') + # sys.stdout.write('\r') + # TODO: check if above can be replaced by: + t.close() - if not leave: - sp.print_status('') - sys.stdout.write('\r') + return result - return result + # Enable custom tqdm progress in pandas! + DataFrameGroupBy.progress_apply = inner diff --git a/tqdm/_tqdm_gui.py b/tqdm/_tqdm_gui.py index bd7e111ef..dd951da8c 100644 --- a/tqdm/_tqdm_gui.py +++ b/tqdm/_tqdm_gui.py @@ -11,8 +11,8 @@ # a result precise floating numbers (instead of truncated int) from __future__ import division, absolute_import # import compatibility functions and utilities -from ._utils import _range from time import time +from ._utils import _range # to inherit from the tqdm class from ._tqdm import tqdm, format_meter From ec5f157df8eddf3ceff0cff28257d02a2a5ad528 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 29 Nov 2015 16:46:32 +0000 Subject: [PATCH 4/8] automated constucting and destructing of tqdm_pandas, neater tests and documentation --- tqdm/_pandas.py | 66 +++++++---------------------------- tqdm/tests/tests_pandas.py | 71 +++++++++++++++++++++----------------- 2 files changed, 52 insertions(+), 85 deletions(-) diff --git a/tqdm/_pandas.py b/tqdm/_pandas.py index 480990cda..dec17aee5 100644 --- a/tqdm/_pandas.py +++ b/tqdm/_pandas.py @@ -2,38 +2,39 @@ # a result precise floating numbers (instead of truncated int) from __future__ import division, absolute_import +from tqdm import tqdm -__author__ = {"github.com/": ["casperdcl", "hadim"]} + +__author__ = "github.com/casperdcl" __all__ = ['tqdm_pandas'] -def tqdm_pandas(t): +def tqdm_pandas(**kwargs): """ - Adds given `tqdm` instance to `DataFrameGroupBy.progress_apply()`. - Don't forget to close the `tqdm` instance afterwards - (or just use `with` syntax): + Creates a `tqdm` instance with the given kwargs and registers it with + `pandas.core.groupby.DataFrameGroupBy.progress_apply`. + It will even close() the `tqdm` instance upon completion. Examples -------- >>> import pandas as pd >>> import numpy as np >>> from tqdm import tqdm, tqdm_pandas - >>> form time import time >>> >>> df = pd.DataFrame(np.random.randint(0, 100, (100000, 6))) - >>> with tqdm(...) as t: - ... tqdm_pandas(t) - ... # Now you can use `progress_apply` instead of `apply` - ... df.groupby(0).progress_apply(lambda x: time.sleep(0.01)) + >>> tqdm_pandas() # can specify any optional kwargs (same as for `tqdm`) + >>> # Now you can use `progress_apply` instead of `apply` + >>> df.groupby(0).progress_apply(lambda x: x**2) References ---------- https://stackoverflow.com/questions/18603270/ progress-indicator-during-pandas-operations-python """ + t = tqdm(**kwargs) from pandas.core.groupby import DataFrameGroupBy - def inner(groups, func, progress_kwargs={}, *args, **kwargs): + def inner(groups, func, *args, **kwargs): """ Parameters ---------- @@ -41,47 +42,10 @@ def inner(groups, func, progress_kwargs={}, *args, **kwargs): Grouped data. func : function To be applied on the grouped data. - progress_kwargs : dict - Parameters for the progress bar (same as for `tqdm`). *args and *kwargs are transmitted to DataFrameGroupBy.apply() """ - for key, val in progress_kwargs.items(): - # TODO: do we need this? - if getattr(t, key, None) is not None: - setattr(t, key, val) - - t.total = len(groups) - - # def progress_decorator(func): - # def wrapper(*args, **kwargs): - # start_t = wrapper.start_t - # last_print_t = wrapper.last_print_t - # last_print_n = wrapper.last_print_n - # n = wrapper.n - # - # if n - last_print_n >= miniters: - # # We check the counter first, to reduce the overhead of - # # time.time() - # cur_t = time.time() - # if cur_t - last_print_t >= mininterval: - # fmeter = format_meter(n, total, cur_t - start_t) - # sp.print_status(prefix + fmeter) - # last_print_n = n - # last_print_t = cur_t - # - # wrapper.n += 1 - # - # return func(*args, **kwargs) - # - # wrapper.start_t = time.time() - # wrapper.last_print_t = wrapper.start_t - # wrapper.last_print_n = 0 - # wrapper.n = 0 - # - # return wrapper - # progress_func = progress_decorator(func) - # result = groups.apply(progress_func, *args, **kwargs) + t.total = len(groups) + 1 # pandas calls update once too many def wrapper(*args, **kwargs): t.update() @@ -89,10 +53,6 @@ def wrapper(*args, **kwargs): result = groups.apply(wrapper, *args, **kwargs) - # if not leave: - # sp.print_status('') - # sys.stdout.write('\r') - # TODO: check if above can be replaced by: t.close() return result diff --git a/tqdm/tests/tests_pandas.py b/tqdm/tests/tests_pandas.py index 3077c03e8..056295138 100644 --- a/tqdm/tests/tests_pandas.py +++ b/tqdm/tests/tests_pandas.py @@ -1,54 +1,61 @@ from __future__ import unicode_literals +from nose.plugins.skip import SkipTest try: from StringIO import StringIO except: from io import StringIO - -import time - -from nose.plugins.skip import SkipTest -from nose.tools import with_setup - - -def setup_pandas(): - try: - from tqdm import enable_progress_apply - enable_progress_apply() - except: - raise SkipTest +# Ensure we can use `with closing(...) as ... :` syntax +if getattr(StringIO, '__exit__', False) and \ + getattr(StringIO, '__enter__', False): + def closing(arg): + return arg +else: + from contextlib import closing -@with_setup(setup_pandas) def test_pandas(): - import pandas as pd import numpy as np + try: + from tqdm import tqdm_pandas + except: + raise SkipTest - our_file = StringIO() - - df = pd.DataFrame(np.random.randint(0, 100, (1000, 6))) - df.groupby(0).progress_apply(lambda x: time.sleep(0.01), - progress_kwargs=dict(file=our_file, - leave=False)) + with closing(StringIO()) as our_file: + df = pd.DataFrame(np.random.randint(0, 100, (1000, 6))) + tqdm_pandas(file=our_file, leave=False) + df.groupby(0).progress_apply(lambda x: None) - our_file.seek(0) + our_file.seek(0) - assert "|##########| 100/100 100%" in our_file.read() + try: + # don't expect final output since no `leave` and + # high dynamic `miniters` + assert '100%|##########| 101/101' not in our_file.read() + except: + raise AssertionError('Did not expect:\n\t100%|##########| 101/101') -@with_setup(setup_pandas) def test_pandas_leave(): - import pandas as pd import numpy as np + try: + from tqdm import tqdm_pandas + except: + raise SkipTest - our_file = StringIO() + with closing(StringIO()) as our_file: + df = pd.DataFrame(np.random.randint(0, 100, (1000, 6))) + tqdm_pandas(file=our_file, leave=True) + df.groupby(0).progress_apply(lambda x: None) - df = pd.DataFrame(np.random.randint(0, 100, (1000, 6))) - df.groupby(0).progress_apply(lambda x: time.sleep(0.01), - progress_kwargs=dict(file=our_file, - leave=True)) - our_file.seek(0) + our_file.seek(0) - assert "|##########| 100/100 100%" in our_file.read() + try: + assert '100%|##########| 101/101' in our_file.read() + except: + our_file.seek(0) + raise AssertionError('\n'.join(('Expected:', + '100%|##########| 101/101', 'Got:', + our_file.read()))) From 03239da29211962bc08197262a95c755b3a9b02b Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 29 Nov 2015 16:52:37 +0000 Subject: [PATCH 5/8] pandas dependancy for tox --- tox.ini | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tox.ini b/tox.ini index 0b813c70d..e4b5535b6 100644 --- a/tox.ini +++ b/tox.ini @@ -13,6 +13,8 @@ deps = nose-timer coverage<4 coveralls + pandas + numpy commands = nosetests --with-coverage --with-timer --cover-package=tqdm --ignore-files="tests_perf\.py" -d -v tqdm/ coveralls From 3c9f93de4a17d946d3dba905bdef876357ca5e21 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 29 Nov 2015 17:19:34 +0000 Subject: [PATCH 6/8] remove pandas, numpy from tox (too slow), revert manual creation of tqdm --- tox.ini | 2 -- tqdm/__init__.py | 2 +- tqdm/{_pandas.py => _tqdm_pandas.py} | 9 +++------ tqdm/tests/tests_pandas.py | 19 ++++++++++--------- 4 files changed, 14 insertions(+), 18 deletions(-) rename tqdm/{_pandas.py => _tqdm_pandas.py} (87%) diff --git a/tox.ini b/tox.ini index e4b5535b6..0b813c70d 100644 --- a/tox.ini +++ b/tox.ini @@ -13,8 +13,6 @@ deps = nose-timer coverage<4 coveralls - pandas - numpy commands = nosetests --with-coverage --with-timer --cover-package=tqdm --ignore-files="tests_perf\.py" -d -v tqdm/ coveralls diff --git a/tqdm/__init__.py b/tqdm/__init__.py index 57b86abc7..8577e8058 100644 --- a/tqdm/__init__.py +++ b/tqdm/__init__.py @@ -4,7 +4,7 @@ from ._tqdm import format_meter from ._tqdm_gui import tqdm_gui from ._tqdm_gui import tgrange -from ._pandas import tqdm_pandas +from ._tqdm_pandas import tqdm_pandas from ._version import __version__ # NOQA __all__ = ['tqdm', 'tqdm_gui', 'trange', 'tgrange', 'format_interval', diff --git a/tqdm/_pandas.py b/tqdm/_tqdm_pandas.py similarity index 87% rename from tqdm/_pandas.py rename to tqdm/_tqdm_pandas.py index dec17aee5..f6e22e5e4 100644 --- a/tqdm/_pandas.py +++ b/tqdm/_tqdm_pandas.py @@ -2,16 +2,14 @@ # a result precise floating numbers (instead of truncated int) from __future__ import division, absolute_import -from tqdm import tqdm - __author__ = "github.com/casperdcl" __all__ = ['tqdm_pandas'] -def tqdm_pandas(**kwargs): +def tqdm_pandas(t): # pragma: no cover """ - Creates a `tqdm` instance with the given kwargs and registers it with + Registers the given `tqdm` instance with `pandas.core.groupby.DataFrameGroupBy.progress_apply`. It will even close() the `tqdm` instance upon completion. @@ -22,7 +20,7 @@ def tqdm_pandas(**kwargs): >>> from tqdm import tqdm, tqdm_pandas >>> >>> df = pd.DataFrame(np.random.randint(0, 100, (100000, 6))) - >>> tqdm_pandas() # can specify any optional kwargs (same as for `tqdm`) + >>> tqdm_pandas(tqdm()) # can use tqdm_gui, optional kwargs, etc >>> # Now you can use `progress_apply` instead of `apply` >>> df.groupby(0).progress_apply(lambda x: x**2) @@ -31,7 +29,6 @@ def tqdm_pandas(**kwargs): https://stackoverflow.com/questions/18603270/ progress-indicator-during-pandas-operations-python """ - t = tqdm(**kwargs) from pandas.core.groupby import DataFrameGroupBy def inner(groups, func, *args, **kwargs): diff --git a/tqdm/tests/tests_pandas.py b/tqdm/tests/tests_pandas.py index 056295138..b245d6c13 100644 --- a/tqdm/tests/tests_pandas.py +++ b/tqdm/tests/tests_pandas.py @@ -1,6 +1,7 @@ -from __future__ import unicode_literals from nose.plugins.skip import SkipTest +from tqdm import tqdm + try: from StringIO import StringIO except: @@ -15,16 +16,16 @@ def closing(arg): def test_pandas(): - import pandas as pd - import numpy as np try: + from numpy.random import randint from tqdm import tqdm_pandas + import pandas as pd except: raise SkipTest with closing(StringIO()) as our_file: - df = pd.DataFrame(np.random.randint(0, 100, (1000, 6))) - tqdm_pandas(file=our_file, leave=False) + df = pd.DataFrame(randint(0, 100, (1000, 6))) + tqdm_pandas(tqdm(file=our_file, leave=False, ascii=True)) df.groupby(0).progress_apply(lambda x: None) our_file.seek(0) @@ -38,16 +39,16 @@ def test_pandas(): def test_pandas_leave(): - import pandas as pd - import numpy as np try: + from numpy.random import randint from tqdm import tqdm_pandas + import pandas as pd except: raise SkipTest with closing(StringIO()) as our_file: - df = pd.DataFrame(np.random.randint(0, 100, (1000, 6))) - tqdm_pandas(file=our_file, leave=True) + df = pd.DataFrame(randint(0, 100, (1000, 6))) + tqdm_pandas(tqdm(file=our_file, leave=True, ascii=True)) df.groupby(0).progress_apply(lambda x: None) our_file.seek(0) From cd6238ff886f3fecc13b2aae2cc94398d9a18c9d Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Mon, 30 Nov 2015 23:14:29 +0000 Subject: [PATCH 7/8] pandas documentation, removed unnecessary imports --- README.rst | 42 +++++++++++++++++++++++++++++++++++++++++- tqdm/_tqdm_pandas.py | 2 +- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index b6992911b..05a02c343 100644 --- a/README.rst +++ b/README.rst @@ -181,10 +181,13 @@ Returns """ Examples and Advanced Usage -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +--------------------------- See the ``examples`` folder. +Hooks and callbacks +~~~~~~~~~~~~~~~~~~~ + ``tqdm`` can easily support callbacks/hooks and manual updates. Here's an example with ``urllib``: @@ -245,6 +248,43 @@ It is recommend to use ``miniters=1`` whenever there is potentially large differences in iteration speed (e.g. downloading a file over a patchy connection). +Pandas Integration +~~~~~~~~~~~~~~~~~~ + +Due to popular demand we've added support for ``pandas`` -- here's an example +for ``DataFrameGroupBy.progress_apply``: + +.. code:: python + + import pandas as pd + import numpy as np + from tqdm import tqdm, tqdm_pandas + + df = pd.DataFrame(np.random.randint(0, 100, (100000, 6))) + + # Create and register a new `tqdm` instance with `pandas` + # (can use tqdm_gui, optional kwargs, etc.) + tqdm_pandas(tqdm()) + + # Now you can use `progress_apply` instead of `apply` + df.groupby(0).progress_apply(lambda x: x**2) + +In case you're interested in the internals, the ``tqdm_pandas`` function is +defined as follows: + +.. code:: python + + def tqdm_pandas(t): + from pandas.core.groupby import DataFrameGroupBy + def inner(groups, func, *args, **kwargs): + t.total = len(groups) + 1 + def wrapper(*args, **kwargs): + t.update(1) + return func(*args, **kwargs) + result = groups.apply(wrapper, *args, **kwargs) + t.close() + return result + DataFrameGroupBy.progress_apply = inner Contributions ------------- diff --git a/tqdm/_tqdm_pandas.py b/tqdm/_tqdm_pandas.py index f6e22e5e4..59632d9a6 100644 --- a/tqdm/_tqdm_pandas.py +++ b/tqdm/_tqdm_pandas.py @@ -1,6 +1,6 @@ # future division is important to divide integers and get as # a result precise floating numbers (instead of truncated int) -from __future__ import division, absolute_import +from __future__ import absolute_import __author__ = "github.com/casperdcl" From 8b77070f12cad36fff325d36f1506e880fe5f1be Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Mon, 30 Nov 2015 23:25:05 +0000 Subject: [PATCH 8/8] pandas examples --- README.rst | 21 ++++----------------- examples/pandas_progress_apply.py | 26 ++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 17 deletions(-) create mode 100644 examples/pandas_progress_apply.py diff --git a/README.rst b/README.rst index 05a02c343..285a35195 100644 --- a/README.rst +++ b/README.rst @@ -183,7 +183,7 @@ Returns Examples and Advanced Usage --------------------------- -See the ``examples`` folder. +See the ``examples`` folder or import the module and run ``help()``. Hooks and callbacks ~~~~~~~~~~~~~~~~~~~ @@ -269,22 +269,9 @@ for ``DataFrameGroupBy.progress_apply``: # Now you can use `progress_apply` instead of `apply` df.groupby(0).progress_apply(lambda x: x**2) -In case you're interested in the internals, the ``tqdm_pandas`` function is -defined as follows: - -.. code:: python - - def tqdm_pandas(t): - from pandas.core.groupby import DataFrameGroupBy - def inner(groups, func, *args, **kwargs): - t.total = len(groups) + 1 - def wrapper(*args, **kwargs): - t.update(1) - return func(*args, **kwargs) - result = groups.apply(wrapper, *args, **kwargs) - t.close() - return result - DataFrameGroupBy.progress_apply = inner +In case you're interested in how this works (and how to modify it for your +own callbacks), see the ``examples`` folder or import the midule and run +``help()``. Contributions ------------- diff --git a/examples/pandas_progress_apply.py b/examples/pandas_progress_apply.py new file mode 100644 index 000000000..0658a05a0 --- /dev/null +++ b/examples/pandas_progress_apply.py @@ -0,0 +1,26 @@ +import pandas as pd +import numpy as np +from tqdm import tqdm, tqdm_pandas + +df = pd.DataFrame(np.random.randint(0, 100, (100000, 6))) + +# Create and register a new `tqdm` instance with `pandas` +# (can use tqdm_gui, optional kwargs, etc.) +tqdm_pandas(tqdm()) + +# Now you can use `progress_apply` instead of `apply` +df.groupby(0).progress_apply(lambda x: x**2) + + +""" Source code for `tqdm_pandas` (really simple!) """ +# def tqdm_pandas(t): +# from pandas.core.groupby import DataFrameGroupBy +# def inner(groups, func, *args, **kwargs): +# t.total = len(groups) + 1 +# def wrapper(*args, **kwargs): +# t.update(1) +# return func(*args, **kwargs) +# result = groups.apply(wrapper, *args, **kwargs) +# t.close() +# return result +# DataFrameGroupBy.progress_apply = inner