Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Exclusion Filters #705

Merged
merged 23 commits into from
Dec 29, 2020
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion core/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,13 @@
from .util import cmp_value, fix_surrogate_encoding
from . import directories, results, export, fs, prioritize
from .ignore import IgnoreList
from .exclude import ExcludeDict as ExcludeList
from .scanner import ScanType
from .gui.deletion_options import DeletionOptions
from .gui.details_panel import DetailsPanel
from .gui.directory_tree import DirectoryTree
from .gui.ignore_list_dialog import IgnoreListDialog
from .gui.exclude_list_dialog import ExcludeListDialogCore
from .gui.problem_dialog import ProblemDialog
from .gui.stats_label import StatsLabel

Expand Down Expand Up @@ -137,7 +139,8 @@ def __init__(self, view):
os.makedirs(self.appdata)
self.app_mode = AppMode.Standard
self.discarded_file_count = 0
self.directories = directories.Directories()
self.exclude_list = ExcludeList()
self.directories = directories.Directories(self.exclude_list)
self.results = results.Results(self)
self.ignore_list = IgnoreList()
# In addition to "app-level" options, this dictionary also holds options that will be
Expand All @@ -155,6 +158,7 @@ def __init__(self, view):
self.directory_tree = DirectoryTree(self)
self.problem_dialog = ProblemDialog(self)
self.ignore_list_dialog = IgnoreListDialog(self)
self.exclude_list_dialog = ExcludeListDialogCore(self)
self.stats_label = StatsLabel(self)
self.result_table = None
self.deletion_options = DeletionOptions()
Expand Down Expand Up @@ -587,6 +591,9 @@ def load(self):
p = op.join(self.appdata, "ignore_list.xml")
self.ignore_list.load_from_xml(p)
self.ignore_list_dialog.refresh()
p = op.join(self.appdata, "exclude_list.xml")
self.exclude_list.load_from_xml(p)
self.exclude_list_dialog.refresh()

def load_directories(self, filepath):
# Clear out previous entries
Expand Down Expand Up @@ -779,6 +786,8 @@ def save(self):
self.directories.save_to_file(op.join(self.appdata, "last_directories.xml"))
p = op.join(self.appdata, "ignore_list.xml")
self.ignore_list.save_to_xml(p)
p = op.join(self.appdata, "exclude_list.xml")
self.exclude_list.save_to_xml(p)
self.notify("save_session")

def save_as(self, filename):
Expand Down
50 changes: 40 additions & 10 deletions core/directories.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,11 @@ class Directories:
"""

# ---Override
def __init__(self):
def __init__(self, exclude_list=None):
self._dirs = []
# {path: state}
self.states = {}
self._exclude_list = exclude_list

def __contains__(self, path):
for p in self._dirs:
Expand All @@ -76,39 +77,62 @@ def __len__(self):

# ---Private
def _default_state_for_path(self, path):
# New logic with regex filters
if self._exclude_list is not None and self._exclude_list.mark_count > 0:
# We iterate even if we only have one item here
for denied_path_re in self._exclude_list.compiled:
if denied_path_re.match(str(path.name)):
return DirectoryState.Excluded
# return # We still use the old logic to force state on hidden dirs
# Override this in subclasses to specify the state of some special folders.
if path.name.startswith("."): # hidden
if path.name.startswith("."):
return DirectoryState.Excluded

def _get_files(self, from_path, fileclasses, j):
for root, dirs, files in os.walk(str(from_path)):
j.check_if_cancelled()
root = Path(root)
state = self.get_state(root)
rootPath = Path(root)
state = self.get_state(rootPath)
if state == DirectoryState.Excluded:
# Recursively get files from folders with lots of subfolder is expensive. However, there
# might be a subfolder in this path that is not excluded. What we want to do is to skim
# through self.states and see if we must continue, or we can stop right here to save time
if not any(p[: len(root)] == root for p in self.states):
if not any(p[: len(rootPath)] == rootPath for p in self.states):
del dirs[:]
try:
if state != DirectoryState.Excluded:
found_files = [
fs.get_file(root + f, fileclasses=fileclasses) for f in files
]
# Old logic
if self._exclude_list is None or not self._exclude_list.mark_count:
found_files = [fs.get_file(rootPath + f, fileclasses=fileclasses) for f in files]
else:
found_files = []
# print(f"len of files: {len(files)} {files}")
for f in files:
found = False
for expr in self._exclude_list.compiled_files:
if expr.match(f):
found = True
break
if not found:
for expr in self._exclude_list.compiled_paths:
if expr.match(root + os.sep + f):
found = True
break
if not found:
found_files.append(fs.get_file(rootPath + f, fileclasses=fileclasses))
found_files = [f for f in found_files if f is not None]
# In some cases, directories can be considered as files by dupeGuru, which is
# why we have this line below. In fact, there only one case: Bundle files under
# OS X... In other situations, this forloop will do nothing.
for d in dirs[:]:
f = fs.get_file(root + d, fileclasses=fileclasses)
f = fs.get_file(rootPath + d, fileclasses=fileclasses)
if f is not None:
found_files.append(f)
dirs.remove(d)
logging.debug(
"Collected %d files in folder %s",
len(found_files),
str(from_path),
str(rootPath),
)
for file in found_files:
file.is_ref = state == DirectoryState.Reference
Expand Down Expand Up @@ -194,8 +218,14 @@ def get_state(self, path):
if path in self.states:
return self.states[path]
state = self._default_state_for_path(path) or DirectoryState.Normal
# Save non-default states in cache, necessary for _get_files()
if state != DirectoryState.Normal:
self.states[path] = state
return state

prevlen = 0
# we loop through the states to find the longest matching prefix
# if the parent has a state in cache, return that state
for p, s in self.states.items():
if p.is_parent_of(path) and len(p) > prevlen:
prevlen = len(p)
Expand Down
Loading