Skip to content

Commit

Permalink
make GridOut an iterator returning chunk sized strings
Browse files Browse the repository at this point in the history
  • Loading branch information
Mike Dirolf committed Apr 2, 2010
1 parent 946bedc commit bec638f
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/api/gridfs/grid_file.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
:members:

.. autoattribute:: _id
.. automethod:: __iter__

.. autoclass:: GridFile
:members:
34 changes: 33 additions & 1 deletion gridfs/grid_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"""Tools for representing files stored in GridFS."""

import datetime
import math
import os
try:
from cStringIO import StringIO
Expand Down Expand Up @@ -351,7 +352,7 @@ def read(self, size=-1):
chunk = self.__chunks.find_one({"files_id": self._id,
"n": chunk_number})
if not chunk:
raise CorruptGridFile("no chunk for n = " + chunk_number)
raise CorruptGridFile("no chunk #%d" % chunk_number)

if not data:
data += chunk["data"][self.__position % self.chunk_size:]
Expand Down Expand Up @@ -397,6 +398,37 @@ def seek(self, pos, whence=_SEEK_SET):
self.__position = new_pos
self.__buffer = ""

def __iter__(self):
"""Return an iterator over all of this file's data.
The iterator will return chunk-sized instances of
:class:`str`. This can be useful when serving files using a
webserver that handles such an iterator efficiently.
"""
return GridOutIterator(self, self.__chunks)


class GridOutIterator(object):
def __init__(self, grid_out, chunks):
self.__id = grid_out._id
self.__chunks = chunks
self.__current_chunk = 0
self.__max_chunk = math.ceil(float(grid_out.length) /
grid_out.chunk_size)

def __iter__(self):
return self

def next(self):
if self.__current_chunk >= self.__max_chunk:
raise StopIteration
chunk = self.__chunks.find_one({"files_id": self.__id,
"n": self.__current_chunk})
if not chunk:
raise CorruptGridFile("no chunk #%d" % self.__current_chunk)
self.__current_chunk += 1
return str(chunk["data"])


class GridFile(object):
"""No longer supported.
Expand Down
21 changes: 21 additions & 0 deletions test/test_grid_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,27 @@ def test_multiple_reads(self):
self.assertEqual("d", g.read(2))
self.assertEqual("", g.read(2))

def test_iterator(self):
f = GridIn(self.db.fs)
f.close()
g = GridOut(self.db.fs, f._id)
self.assertEqual([], list(g))

f = GridIn(self.db.fs)
f.write("hello world")
f.close()
g = GridOut(self.db.fs, f._id)
self.assertEqual(["hello world"], list(g))
self.assertEqual("hello", g.read(5))
self.assertEqual(["hello world"], list(g))
self.assertEqual(" worl", g.read(5))

f = GridIn(self.db.fs, chunk_size=2)
f.write("hello world")
f.close()
g = GridOut(self.db.fs, f._id)
self.assertEqual(["he", "ll", "o ", "wo", "rl", "d"], list(g))

def test_read_chunks_unaligned_buffer_size(self):
in_data = "This is a text that doesn't quite fit in a single 16-byte chunk."
f = GridIn(self.db.fs, chunkSize=16)
Expand Down

0 comments on commit bec638f

Please sign in to comment.