Skip to content

Commit

Permalink
Fix handling of 'done' in graph walker and implement the 'no-done' ca…
Browse files Browse the repository at this point in the history
…pability. (jelmer#88)

The gist of this gnarly problem is that it looks like the implementation over HTTP behaves as-if the `no-done` capability was enabled but unspecified so the reference client would make a second round trip back with more `haves` but then it doesn't know what to do with the `PACK` that was already sent. Alternatively there are cases where the side-band was used when it was expecting ACK/NAK. There were multiple problems with the code base that made this issue rather hard to stomp out, and here are the fixes (some verbatim from commit messages).

- `MultiAckDetailedGraphWalkerImpl` rebuilt to ensure output generated
  matches the reference implementation, like the git-http-backend. (Even though this is not strictly necessary, repeatedly calling `all_wants_specified` doesn't really change much given that there's only one place where the commits can be added in, which is by the object store through the `ack` method.

- Correction to the communication after all `have` lines have been processed. As a freebie I got `no-done` capability implemented since that's what it looked like before.

- There is another issue where attempts to pull an unrelated repo, the
  various combination of the previous flaws made it difficult to send
  the correct number of NAKs.  Removing the walker.send_ack call from
  the walker implementation and just ensure the ack is the only method
  that will call send_ack simplifies the process of correcting this.

- Added various state variables to the handler to track whether the `done` token is expected.

- Added a couple methods to deal with the handling of the above state variables, which the walker implementations themselves have access to through a method provided by the generic protocol walker class

- Added a method to the graph walker that takes in the state variables provided by the handler to delegate the dealing of the final ACK/NAK line that ends the section to permit the PACK section to begin, which then gets delegated to the Impl instances (which also have been normalized to address this).

- Finally, cleaned up the test cases and added further tests where relevant. Naturally
one of the earlier commits introduce some test examples that demonstrates this problem.
  • Loading branch information
metatoaster authored and jelmer committed May 24, 2015
1 parent e7fe2c3 commit b3445db
Show file tree
Hide file tree
Showing 12 changed files with 1,213 additions and 51 deletions.
5 changes: 5 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
* Support 'git.bat' in SubprocessGitClient on Windows.
(Stefan Zimmermann)

BUG FIXES

* Fix handling of 'done' in graph walker and implement the
'no-done' capability. (Tommy Yu, #88)

0.10.1 2015-03-25

BUG FIXES
Expand Down
1 change: 1 addition & 0 deletions dulwich/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
CAPABILITY_INCLUDE_TAG = b'include-tag'
CAPABILITY_MULTI_ACK = b'multi_ack'
CAPABILITY_MULTI_ACK_DETAILED = b'multi_ack_detailed'
CAPABILITY_NO_DONE = b'no-done'
CAPABILITY_NO_PROGRESS = b'no-progress'
CAPABILITY_OFS_DELTA = b'ofs-delta'
CAPABILITY_REPORT_STATUS = b'report-status'
Expand Down
3 changes: 3 additions & 0 deletions dulwich/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,9 @@ def fetch_objects(self, determine_wants, graph_walker, progress,

return []

# If the graph walker is set up with an implementation that can
# ACK/NAK to the wire, it will write data to the client through
# this call as a side-effect.
haves = self.object_store.find_common_revisions(graph_walker)

# Deal with shallow requests separately because the haves do
Expand Down
143 changes: 111 additions & 32 deletions dulwich/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
CAPABILITY_INCLUDE_TAG,
CAPABILITY_MULTI_ACK_DETAILED,
CAPABILITY_MULTI_ACK,
CAPABILITY_NO_DONE,
CAPABILITY_NO_PROGRESS,
CAPABILITY_OFS_DELTA,
CAPABILITY_REPORT_STATUS,
Expand Down Expand Up @@ -205,6 +206,8 @@ def __init__(self, backend, proto, http_req=None):
self.proto = proto
self.http_req = http_req
self._client_capabilities = None
# Flags needed for the no-done capability
self._done_received = False

@classmethod
def capability_line(cls):
Expand Down Expand Up @@ -244,6 +247,9 @@ def has_capability(self, cap):
'before asking client' % cap)
return cap in self._client_capabilities

def notify_done(self):
self._done_received = True


class UploadPackHandler(Handler):
"""Protocol handler for uploading a pack to the server."""
Expand All @@ -264,7 +270,7 @@ def capabilities(cls):
return (CAPABILITY_MULTI_ACK_DETAILED, CAPABILITY_MULTI_ACK,
CAPABILITY_SIDE_BAND_64K, CAPABILITY_THIN_PACK,
CAPABILITY_OFS_DELTA, CAPABILITY_NO_PROGRESS,
CAPABILITY_INCLUDE_TAG, CAPABILITY_SHALLOW)
CAPABILITY_INCLUDE_TAG, CAPABILITY_SHALLOW, CAPABILITY_NO_DONE)

@classmethod
def required_capabilities(cls):
Expand Down Expand Up @@ -331,6 +337,10 @@ def handle(self):
# band data now.
self._processing_have_lines = False

if not graph_walker.handle_done(
not self.has_capability(CAPABILITY_NO_DONE), self._done_received):
return

self.progress(b"dul-daemon says what\n")
self.progress(("counting objects: %d, done.\n" % len(objects_iter)).encode('ascii'))
write_pack_objects(ProtocolFile(None, write), objects_iter)
Expand Down Expand Up @@ -607,6 +617,10 @@ def _handle_shallow_request(self, wants):

self.proto.write_pkt_line(None)

def notify_done(self):
# relay the message down to the handler.
self.handler.notify_done()

def send_ack(self, sha, ack_type=b''):
if ack_type:
ack_type = b' ' + ack_type
Expand All @@ -615,6 +629,10 @@ def send_ack(self, sha, ack_type=b''):
def send_nak(self):
self.proto.write_pkt_line(b'NAK\n')

def handle_done(self, done_required, done_received):
# Delegate this to the implementation.
return self._impl.handle_done(done_required, done_received)

def set_wants(self, wants):
self._wants = wants

Expand Down Expand Up @@ -644,24 +662,44 @@ class SingleAckGraphWalkerImpl(object):

def __init__(self, walker):
self.walker = walker
self._sent_ack = False
self._common = []

def ack(self, have_ref):
if not self._sent_ack:
if not self._common:
self.walker.send_ack(have_ref)
self._sent_ack = True
self._common.append(have_ref)

def next(self):
command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS)
if command in (None, COMMAND_DONE):
if not self._sent_ack:
self.walker.send_nak()
# defer the handling of done
self.walker.notify_done()
return None
elif command == COMMAND_HAVE:
return sha

__next__ = next

def handle_done(self, done_required, done_received):
if not self._common:
self.walker.send_nak()

if done_required and not done_received:
# we are not done, especially when done is required; skip
# the pack for this request and especially do not handle
# the done.
return False

if not done_received and not self._common:
# Okay we are not actually done then since the walker picked
# up no haves. This is usually triggered when client attempts
# to pull from a source that has no common base_commit.
# See: test_server.MultiAckDetailedGraphWalkerImplTestCase.\
# test_multi_ack_stateless_nodone
return False

return True


class MultiAckGraphWalkerImpl(object):
"""Graph walker implementation that speaks the multi-ack protocol."""
Expand All @@ -688,12 +726,7 @@ def next(self):
# flush but more have lines are still coming
continue
elif command == COMMAND_DONE:
# don't nak unless no common commits were found, even if not
# everything is satisfied
if self._common:
self.walker.send_ack(self._common[-1])
else:
self.walker.send_nak()
self.walker.notify_done()
return None
elif command == COMMAND_HAVE:
if self._found_base:
Expand All @@ -703,49 +736,94 @@ def next(self):

__next__ = next

def handle_done(self, done_required, done_received):
if done_required and not done_received:
# we are not done, especially when done is required; skip
# the pack for this request and especially do not handle
# the done.
return False

if not done_received and not self._common:
# Okay we are not actually done then since the walker picked
# up no haves. This is usually triggered when client attempts
# to pull from a source that has no common base_commit.
# See: test_server.MultiAckDetailedGraphWalkerImplTestCase.\
# test_multi_ack_stateless_nodone
return False

# don't nak unless no common commits were found, even if not
# everything is satisfied
if self._common:
self.walker.send_ack(self._common[-1])
else:
self.walker.send_nak()
return True


class MultiAckDetailedGraphWalkerImpl(object):
"""Graph walker implementation speaking the multi-ack-detailed protocol."""

def __init__(self, walker):
self.walker = walker
self._found_base = False
self._common = []

def ack(self, have_ref):
# Should only be called iff have_ref is common
self._common.append(have_ref)
if not self._found_base:
self.walker.send_ack(have_ref, b'common')
if self.walker.all_wants_satisfied(self._common):
self._found_base = True
self.walker.send_ack(have_ref, b'ready')
# else we blind ack within next
self.walker.send_ack(have_ref, b'common')

def next(self):
while True:
command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS)
if command is None:
if self.walker.all_wants_satisfied(self._common):
self.walker.send_ack(self._common[-1], b'ready')
self.walker.send_nak()
if self.walker.http_req:
# The HTTP version of this request a flush-pkt always
# signifies an end of request, so we also return
# nothing here as if we are done (but not really, as
# it depends on whether no-done capability was
# specified and that's handled in handle_done which
# may or may not call post_nodone_check depending on
# that).
return None
continue
elif command == COMMAND_DONE:
# don't nak unless no common commits were found, even if not
# everything is satisfied
if self._common:
self.walker.send_ack(self._common[-1])
else:
self.walker.send_nak()
return None
# Let the walker know that we got a done.
self.walker.notify_done()
break
elif command == COMMAND_HAVE:
if self._found_base:
# blind ack; can happen if the client has more requests
# inflight
self.walker.send_ack(sha, b'ready')
# return the sha and let the caller ACK it with the
# above ack method.
return sha
# don't nak unless no common commits were found, even if not
# everything is satisfied

__next__ = next

def handle_done(self, done_required, done_received):
if done_required and not done_received:
# we are not done, especially when done is required; skip
# the pack for this request and especially do not handle
# the done.
return False

if not done_received and not self._common:
# Okay we are not actually done then since the walker picked
# up no haves. This is usually triggered when client attempts
# to pull from a source that has no common base_commit.
# See: test_server.MultiAckDetailedGraphWalkerImplTestCase.\
# test_multi_ack_stateless_nodone
return False

# don't nak unless no common commits were found, even if not
# everything is satisfied
if self._common:
self.walker.send_ack(self._common[-1])
else:
self.walker.send_nak()
return True


class ReceivePackHandler(Handler):
"""Protocol handler for downloading a pack from the client."""
Expand All @@ -758,7 +836,8 @@ def __init__(self, backend, args, proto, http_req=None,

@classmethod
def capabilities(cls):
return (CAPABILITY_REPORT_STATUS, CAPABILITY_DELETE_REFS, CAPABILITY_SIDE_BAND_64K)
return (CAPABILITY_REPORT_STATUS, CAPABILITY_DELETE_REFS,
CAPABILITY_SIDE_BAND_64K, CAPABILITY_NO_DONE)

def _apply_pack(self, refs):
all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError,
Expand Down
37 changes: 37 additions & 0 deletions dulwich/tests/compat/server_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,43 @@ def test_fetch_full_depth_into_shallow_clone_from_dulwich(self):
self.assertEqual([], _get_shallow(clone))
self.assertReposEqual(clone, self._source_repo)

def test_fetch_from_dulwich_issue_88_standard(self):
# Basically an integration test to see that the ACK/NAK
# generation works on repos with common head.
self._source_repo = self.import_repo('issue88_expect_ack_nak_server.export')
self._client_repo = self.import_repo('issue88_expect_ack_nak_client.export')
port = self._start_server(self._source_repo)

run_git_or_fail(['fetch', self.url(port), 'master',],
cwd=self._client_repo.path)
self.assertObjectStoreEqual(
self._source_repo.object_store,
self._client_repo.object_store)

def test_fetch_from_dulwich_issue_88_alternative(self):
# likewise, but the case where the two repos have no common parent
self._source_repo = self.import_repo('issue88_expect_ack_nak_other.export')
self._client_repo = self.import_repo('issue88_expect_ack_nak_client.export')
port = self._start_server(self._source_repo)

self.assertRaises(KeyError, self._client_repo.get_object,
b'02a14da1fc1fc13389bbf32f0af7d8899f2b2323')
run_git_or_fail(['fetch', self.url(port), 'master',],
cwd=self._client_repo.path)
self.assertEqual(b'commit', self._client_repo.get_object(
b'02a14da1fc1fc13389bbf32f0af7d8899f2b2323').type_name)

def test_push_to_dulwich_issue_88_standard(self):
# Same thing, but we reverse the role of the server/client
# and do a push instead.
self._source_repo = self.import_repo('issue88_expect_ack_nak_client.export')
self._client_repo = self.import_repo('issue88_expect_ack_nak_server.export')
port = self._start_server(self._source_repo)

run_git_or_fail(['push', self.url(port), 'master',],
cwd=self._client_repo.path)
self.assertReposEqual(self._source_repo, self._client_repo)


# TODO(dborowitz): Come up with a better way of testing various permutations of
# capabilities. The only reason it is the way it is now is that side-band-64k
Expand Down
Loading

0 comments on commit b3445db

Please sign in to comment.