Skip to content

Commit

Permalink
diff_tree: Factor out a few protected methods in RenameDetector.
Browse files Browse the repository at this point in the history
This is intended to make subclassing easier.

Change-Id: If6c16651e97ee0b963deb6deafb3a13f041bd283
  • Loading branch information
dborowitz authored and jelmer committed Aug 1, 2011
1 parent e2252ee commit a52514a
Showing 1 changed file with 23 additions and 17 deletions.
40 changes: 23 additions & 17 deletions dulwich/diff_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,18 +491,32 @@ def _find_exact_renames(self):
self._changes.append(TreeChange(CHANGE_COPY, old, new))
self._prune(add_paths, delete_paths)

def _find_content_renames(self):
def _should_find_content_renames(self):
return len(self._adds) * len(self._deletes) <= self._max_files ** 2

def _rename_type(self, check_paths, delete, add):
if check_paths and delete.old.path == add.new.path:
# If the paths match, this must be a split modify, so make sure it
# comes out as a modify.
return CHANGE_MODIFY
elif delete.type != CHANGE_DELETE:
# If it's in deletes but not marked as a delete, it must have been
# added due to find_copies_harder, and needs to be marked as a copy.
return CHANGE_COPY
return CHANGE_RENAME

def _find_content_rename_candidates(self):
candidates = self._candidates = []
# TODO: Optimizations:
# - Compare object sizes before counting blocks.
# - Skip if delete's S_IFMT differs from all adds.
# - Skip if adds or deletes is empty.
# Match C git's behavior of not attempting to find content renames if
# the matrix size exceeds the threshold.
if len(self._adds) * len(self._deletes) > self._max_files ** 2:
if not self._should_find_content_renames():
return

check_paths = self._rename_threshold is not None
candidates = []
for delete in self._deletes:
if S_ISGITLINK(delete.old.mode):
continue # Git links don't exist in this repo.
Expand All @@ -516,26 +530,17 @@ def _find_content_renames(self):
score = _similarity_score(old_obj, new_obj,
block_cache={old_sha: old_blocks})
if score > self._rename_threshold:
if check_paths and delete.old.path == add.new.path:
# If the paths match, this must be a split modify, so
# make sure it comes out as a modify.
new_type = CHANGE_MODIFY
elif delete.type != CHANGE_DELETE:
# If it's in deletes but not marked as a delete, it must
# have been added due to find_copies_harder, and needs
# to be marked as a copy.
new_type = CHANGE_COPY
else:
new_type = CHANGE_RENAME
new_type = self._rename_type(check_paths, delete, add)
rename = TreeChange(new_type, delete.old, add.new)
candidates.append((-score, rename))

def _choose_content_renames(self):
# Sort scores from highest to lowest, but keep names in ascending order.
candidates.sort()
self._candidates.sort()

delete_paths = set()
add_paths = set()
for _, change in candidates:
for _, change in self._candidates:
new_path = change.new.path
if new_path in add_paths:
continue
Expand Down Expand Up @@ -588,7 +593,8 @@ def changes_with_renames(self, tree1_id, tree2_id, want_unchanged=False):
self._want_unchanged = want_unchanged
self._collect_changes(tree1_id, tree2_id)
self._find_exact_renames()
self._find_content_renames()
self._find_content_rename_candidates()
self._choose_content_renames()
self._join_modifies()
self._prune_unchanged()
return self._sorted_changes()
Expand Down

0 comments on commit a52514a

Please sign in to comment.