Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added ignore kwarg to traverse() in common, allowing directory pruning #209

Merged
merged 4 commits into from
Feb 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 36 additions & 7 deletions src/promnesia/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pathlib import Path
from glob import glob
import itertools
from more_itertools import intersperse
import logging
from functools import lru_cache
import shutil
Expand Down Expand Up @@ -390,26 +391,50 @@ def mime(path: PathIsh) -> Optional[str]:
return magic(ps)


def find_args(root: Path, follow: bool) -> List[str]:
def find_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
prune_dir_args = []
ignore_file_args = []
if ignore:
# -name {name} for all the file/directories in ignore
ignore_names = [['-name', n] for n in ignore]
# OR (-o) all the names together and flatten
ignore_names_l = list(itertools.chain(*intersperse(['-o'], ignore_names)))
# Prune all of those directories, and make the entire clause evaluate to false
# (so that it doesn't match anything and make find print)
prune_dir_args = ['-type', 'd', '-a', '(', *ignore_names_l, ')', '-prune', '-false', '-o']
# Also ignore any files with the names as well
ignore_file_args = ['-a', '-not', '(', *ignore_names_l, ')']

return [
*(['-L'] if follow else []),
str(root),
*prune_dir_args,
'-type', 'f',
*ignore_file_args
]


def fdfind_args(root: Path, follow: bool) -> List[str]:
def fdfind_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
from .config import extra_fd_args

ignore_args = []
if ignore:
# Add a statment that excludes the folder
ignore_args = [['--exclude', f'{n}'] for n in ignore]
# Flatten the list of lists
ignore_args_l = list(itertools.chain(*ignore_args))

return [
*extra_fd_args(),
*ignore_args_l,
*(['--follow'] if follow else []),
'--type', 'f',
'.',
str(root),
]


def traverse(root: Path, *, follow: bool=True) -> Iterable[Path]:
def traverse(root: Path, *, follow: bool=True, ignore: List[str]=[]) -> Iterable[Path]:
if not root.is_dir():
yield root
return
Expand All @@ -418,16 +443,20 @@ def traverse(root: Path, *, follow: bool=True) -> Iterable[Path]:
if _is_windows:
# on windows could use 'forfiles'... but probably easier not to bother for now
# todo coild use followlinks=True? walk could end up in infinite loop?
for r, _, files in os.walk(root):
yield from (Path(r) / f for f in files)
for r, dirs, files in os.walk(root):
# Remove dirs specified in ignore (clone dirs() as we have to remove in place)
for i, d in enumerate(list(dirs)):
if d in ignore:
del dirs[i]
yield from (Path(r) / f for f in files if f not in ignore)
return

from .compat import Popen, PIPE
cmd = ['find', *find_args(root, follow=follow)]
cmd = ['find', *find_args(root, follow=follow, ignore=ignore)]
# try to use fd.. it cooperates well with gitignore etc, also faster than find
for x in ('fd', 'fd-find', 'fdfind'): # has different names on different dists..
if shutil.which(x):
cmd = [x, *fdfind_args(root, follow=follow)]
cmd = [x, *fdfind_args(root, follow=follow, ignore=ignore)]
break
else:
warnings.warn("'fdfind' is recommended for the best indexing performance. See https://github.com/sharkdp/fd#installation. Falling back to 'find'")
Expand Down
2 changes: 1 addition & 1 deletion src/promnesia/sources/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def _index(path: Path, opts: Options) -> Results:

# iterate over resolved paths, to avoid duplicates
def rit() -> Iterable[Path]:
it = traverse(path, follow=opts.follow)
it = traverse(path, follow=opts.follow, ignore=IGNORE)
for p in it:
if any(fnmatch(str(p), o) for o in opts.ignored):
# TODO not sure if should log here... might end up with quite a bit of logs
Expand Down
43 changes: 43 additions & 0 deletions tests/test_traverse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from pathlib import Path
from promnesia.common import traverse
from unittest.mock import Mock, patch
from common import DATA


testDataPath = Path(DATA) / 'traverse'

# Patch shutil.which so it always returns false (when trying to which fdfind, etc)
# so that it falls back to find
@patch('promnesia.common.shutil.which', return_value=False)
def test_traverse_ignore_find(patched):
'''
traverse() with `find` but ignore some stuff
'''
# act
paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2']))

# assert
assert paths == {testDataPath / 'imhere2/real.txt', testDataPath / 'imhere.txt'}

def test_traverse_ignore_fdfind():
'''
traverse() with `fdfind` but ignore some stuff
'''
# act
paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2']))

# assert
assert paths == {testDataPath / 'imhere.txt', testDataPath / 'imhere2/real.txt'}

# TODO: It would be nice to test the implementation directly without having to do this
# weird patching in the future
@patch('promnesia.common._is_windows', new_callable=lambda: True)
def test_traverse_ignore_windows(patched):
'''
traverse() with python when _is_windows is true but ignore some stuff
'''
# act
paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2']))

# assert
assert paths == {testDataPath / 'imhere.txt', testDataPath / 'imhere2/real.txt'}
1 change: 1 addition & 0 deletions tests/testdata/traverse/ignoreme.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
jaiofjeoriheoirjg
1 change: 1 addition & 0 deletions tests/testdata/traverse/ignoreme2/notrealignored.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
notrealignores
1 change: 1 addition & 0 deletions tests/testdata/traverse/imhere.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
imhere.txt
1 change: 1 addition & 0 deletions tests/testdata/traverse/imhere2/real.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
jdfioja