Skip to content

Commit

Permalink
bump version, Merge pull request tqdm#9 from tqdm/pandas-progress
Browse files Browse the repository at this point in the history
Add progress bar to pandas apply function
  • Loading branch information
casperdcl committed Nov 30, 2015
2 parents d2263d9 + 8b77070 commit b2c759d
Show file tree
Hide file tree
Showing 7 changed files with 182 additions and 7 deletions.
34 changes: 31 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,13 @@ Returns
"""
Examples and Advanced Usage
~~~~~~~~~~~~~~~~~~~~~~~~~~~
---------------------------

See the ``examples`` folder.
See the `examples <https://github.com/tqdm/tqdm/tree/master/examples>`__ folder or
import the module and run ``help()``.

Hooks and callbacks
~~~~~~~~~~~~~~~~~~~

``tqdm`` can easily support callbacks/hooks and manual updates.
Here's an example with ``urllib``:
Expand Down Expand Up @@ -245,6 +249,30 @@ It is recommend to use ``miniters=1`` whenever there is potentially
large differences in iteration speed (e.g. downloading a file over
a patchy connection).

Pandas Integration
~~~~~~~~~~~~~~~~~~

Due to popular demand we've added support for ``pandas`` -- here's an example
for ``DataFrameGroupBy.progress_apply``:

.. code:: python
import pandas as pd
import numpy as np
from tqdm import tqdm, tqdm_pandas
df = pd.DataFrame(np.random.randint(0, 100, (100000, 6)))
# Create and register a new `tqdm` instance with `pandas`
# (can use tqdm_gui, optional kwargs, etc.)
tqdm_pandas(tqdm())
# Now you can use `progress_apply` instead of `apply`
df.groupby(0).progress_apply(lambda x: x**2)
In case you're interested in how this works (and how to modify it for your
own callbacks), see the `examples <https://github.com/tqdm/tqdm/tree/master/examples>`__
folder or import the module and run ``help()``.

Contributions
-------------
Expand All @@ -253,7 +281,7 @@ To run the testing suite please make sure tox (http://tox.testrun.org/)
is installed, then type ``tox`` from the command line.

Alternatively if you don't want to use ``tox``, a Makefile is provided
with the following command:
with the following commands:

.. code:: sh
Expand Down
26 changes: 26 additions & 0 deletions examples/pandas_progress_apply.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import pandas as pd
import numpy as np
from tqdm import tqdm, tqdm_pandas

df = pd.DataFrame(np.random.randint(0, 100, (100000, 6)))

# Create and register a new `tqdm` instance with `pandas`
# (can use tqdm_gui, optional kwargs, etc.)
tqdm_pandas(tqdm())

# Now you can use `progress_apply` instead of `apply`
df.groupby(0).progress_apply(lambda x: x**2)


""" Source code for `tqdm_pandas` (really simple!) """
# def tqdm_pandas(t):
# from pandas.core.groupby import DataFrameGroupBy
# def inner(groups, func, *args, **kwargs):
# t.total = len(groups) + 1
# def wrapper(*args, **kwargs):
# t.update(1)
# return func(*args, **kwargs)
# result = groups.apply(wrapper, *args, **kwargs)
# t.close()
# return result
# DataFrameGroupBy.progress_apply = inner
5 changes: 3 additions & 2 deletions tqdm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from ._tqdm import format_meter
from ._tqdm_gui import tqdm_gui
from ._tqdm_gui import tgrange
from ._tqdm_pandas import tqdm_pandas
from ._version import __version__ # NOQA

__all__ = ['tqdm', 'tqdm_gui', 'trange', 'tgrange',
'format_interval', 'format_meter', '__version__']
__all__ = ['tqdm', 'tqdm_gui', 'trange', 'tgrange', 'format_interval',
'format_meter', 'tqdm_pandas', '__version__']
2 changes: 1 addition & 1 deletion tqdm/_tqdm_gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
# a result precise floating numbers (instead of truncated int)
from __future__ import division, absolute_import
# import compatibility functions and utilities
from ._utils import _range
from time import time
from ._utils import _range
# to inherit from the tqdm class
from ._tqdm import tqdm, format_meter

Expand Down
58 changes: 58 additions & 0 deletions tqdm/_tqdm_pandas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# future division is important to divide integers and get as
# a result precise floating numbers (instead of truncated int)
from __future__ import absolute_import


__author__ = "github.com/casperdcl"
__all__ = ['tqdm_pandas']


def tqdm_pandas(t): # pragma: no cover
"""
Registers the given `tqdm` instance with
`pandas.core.groupby.DataFrameGroupBy.progress_apply`.
It will even close() the `tqdm` instance upon completion.
Examples
--------
>>> import pandas as pd
>>> import numpy as np
>>> from tqdm import tqdm, tqdm_pandas
>>>
>>> df = pd.DataFrame(np.random.randint(0, 100, (100000, 6)))
>>> tqdm_pandas(tqdm()) # can use tqdm_gui, optional kwargs, etc
>>> # Now you can use `progress_apply` instead of `apply`
>>> df.groupby(0).progress_apply(lambda x: x**2)
References
----------
https://stackoverflow.com/questions/18603270/
progress-indicator-during-pandas-operations-python
"""
from pandas.core.groupby import DataFrameGroupBy

def inner(groups, func, *args, **kwargs):
"""
Parameters
----------
groups : DataFrameGroupBy
Grouped data.
func : function
To be applied on the grouped data.
*args and *kwargs are transmitted to DataFrameGroupBy.apply()
"""
t.total = len(groups) + 1 # pandas calls update once too many

def wrapper(*args, **kwargs):
t.update()
return func(*args, **kwargs)

result = groups.apply(wrapper, *args, **kwargs)

t.close()

return result

# Enable custom tqdm progress in pandas!
DataFrameGroupBy.progress_apply = inner
2 changes: 1 addition & 1 deletion tqdm/_version.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Definition of the version number
version_info = 3, 2, 0 # major, minor, patch, -extra
version_info = 3, 3, 0 # major, minor, patch, -extra

# Nice string for the version
__version__ = '.'.join(map(str, version_info)).replace('.-', '-').strip('.-')
62 changes: 62 additions & 0 deletions tqdm/tests/tests_pandas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from nose.plugins.skip import SkipTest

from tqdm import tqdm

try:
from StringIO import StringIO
except:
from io import StringIO
# Ensure we can use `with closing(...) as ... :` syntax
if getattr(StringIO, '__exit__', False) and \
getattr(StringIO, '__enter__', False):
def closing(arg):
return arg
else:
from contextlib import closing


def test_pandas():
try:
from numpy.random import randint
from tqdm import tqdm_pandas
import pandas as pd
except:
raise SkipTest

with closing(StringIO()) as our_file:
df = pd.DataFrame(randint(0, 100, (1000, 6)))
tqdm_pandas(tqdm(file=our_file, leave=False, ascii=True))
df.groupby(0).progress_apply(lambda x: None)

our_file.seek(0)

try:
# don't expect final output since no `leave` and
# high dynamic `miniters`
assert '100%|##########| 101/101' not in our_file.read()
except:
raise AssertionError('Did not expect:\n\t100%|##########| 101/101')


def test_pandas_leave():
try:
from numpy.random import randint
from tqdm import tqdm_pandas
import pandas as pd
except:
raise SkipTest

with closing(StringIO()) as our_file:
df = pd.DataFrame(randint(0, 100, (1000, 6)))
tqdm_pandas(tqdm(file=our_file, leave=True, ascii=True))
df.groupby(0).progress_apply(lambda x: None)

our_file.seek(0)

try:
assert '100%|##########| 101/101' in our_file.read()
except:
our_file.seek(0)
raise AssertionError('\n'.join(('Expected:',
'100%|##########| 101/101', 'Got:',
our_file.read())))

0 comments on commit b2c759d

Please sign in to comment.