Skip to content

Commit

Permalink
Support completely empty messages in git objects
Browse files Browse the repository at this point in the history
As it turns out, a git object can be terminated by the last header,
without empty lines. In that case, dulwich currently "eats" the last
header line, which is obviously wrong.

When such a case happens, return an object with a None message instead
of an empty line. We make sure that serialization/deserialization
still works as expected.
  • Loading branch information
olasd committed Feb 17, 2016
1 parent a5d6568 commit f8e4295
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 4 deletions.
29 changes: 26 additions & 3 deletions dulwich/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,17 +602,39 @@ def _parse_message(chunks):
f = BytesIO(b''.join(chunks))
k = None
v = ""
eof = False

# Parse the headers
#
# Headers can contain newlines. The next line is indented with a space.
# We store the latest key as 'k', and the accumulated value as 'v'.
for l in f:
if l.startswith(b' '):
# Indented continuation of the previous line
v += l[1:]
else:
if k is not None:
# We parsed a new header, return its value
yield (k, v.rstrip(b'\n'))
if l == b'\n':
# Empty line indicates end of headers
break
(k, v) = l.split(b' ', 1)
yield (None, f.read())

else:
# We reached end of file before the headers ended. We still need to
# return the previous header, then we need to return a None field for
# the text.
eof = True
if k is not None:
yield (k, v.rstrip(b'\n'))
yield (None, None)

if not eof:
# We didn't reach the end of file while parsing headers. We can return
# the rest of the file as a message.
yield (None, f.read())

f.close()


Expand Down Expand Up @@ -679,8 +701,9 @@ def _serialize(self):
chunks.append(git_line(
_TAGGER_HEADER, self._tagger, str(self._tag_time).encode('ascii'),
format_timezone(self._tag_timezone, self._tag_timezone_neg_utc)))
chunks.append(b'\n') # To close headers
chunks.append(self._message)
if self._message is not None:
chunks.append(b'\n') # To close headers
chunks.append(self._message)
return chunks

def _deserialize(self, chunks):
Expand Down
27 changes: 26 additions & 1 deletion dulwich/tests/test_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,20 @@ def test_serialize_simple(self):
b'\n'
b'Tag 0.1'), x.as_raw_string())

def test_serialize_none_message(self):
x = make_object(Tag,
tagger=b'Jelmer Vernooij <jelmer@samba.org>',
name=b'0.1',
message=None,
object=(Blob, b'd80c186a03f423a81b39df39dc87fd269736ca86'),
tag_time=423423423,
tag_timezone=0)
self.assertEqual((b'object d80c186a03f423a81b39df39dc87fd269736ca86\n'
b'type blob\n'
b'tag 0.1\n'
b'tagger Jelmer Vernooij <jelmer@samba.org> '
b'423423423 +0000\n'), x.as_raw_string())


default_tagger = (b'Linus Torvalds <torvalds@woody.linux-foundation.org> '
b'1183319674 -0700')
Expand Down Expand Up @@ -849,8 +863,8 @@ def make_tag_lines(self,
lines.append(b'tag ' + name)
if tagger is not None:
lines.append(b'tagger ' + tagger)
lines.append(b'')
if message is not None:
lines.append(b'')
lines.append(message)
return lines

Expand All @@ -877,6 +891,17 @@ def test_parse_no_tagger(self):
self.assertEqual(None, x.tagger)
self.assertEqual(b'v2.6.22-rc7', x.name)

def test_parse_no_message(self):
x = Tag()
x.set_raw_string(self.make_tag_text(message=None))
self.assertEqual(None, x.message)
self.assertEqual(
b'Linus Torvalds <torvalds@woody.linux-foundation.org>', x.tagger)
self.assertEqual(datetime.datetime.utcfromtimestamp(x.tag_time),
datetime.datetime(2007, 7, 1, 19, 54, 34))
self.assertEqual(-25200, x.tag_timezone)
self.assertEqual(b'v2.6.22-rc7', x.name)

def test_check(self):
self.assertCheckSucceeds(Tag, self.make_tag_text())
self.assertCheckFails(Tag, self.make_tag_text(object_sha=None))
Expand Down

0 comments on commit f8e4295

Please sign in to comment.