Skip to content

Instantly share code, notes, and snippets.

@ievans3024
Last active April 20, 2020 16:32
Show Gist options
  • Save ievans3024/06e4934a190ac8c763169be31461bc2c to your computer and use it in GitHub Desktop.
Save ievans3024/06e4934a190ac8c763169be31461bc2c to your computer and use it in GitHub Desktop.
Python module for providing unix/linux "find" command behavior.
#! /usr/bin/env python3
"""
Module for providing behavior like the unix/linux "find" command.
Supports glob expressions and python regex objects.
"""
import fnmatch
import os
import os.path
import re
from typing import Union, Iterable
def __get_matches(path: Union[str, bytes, os.PathLike],
pattern: Union[str, re.Pattern],
case_sensitive: bool = True
) -> bool:
"""
Determine if a path matches a given pattern.
:param path: The full path to test.
:param pattern: A string or regular expression object to test the basename against.
:param case_sensitive: Whether or not comparisons to pattern should be case-sensitive.
:return:
"""
path = path.rstrip('/') # use rstrip to prevent, e.g., '/a/b/c/' -> ('/a/b/c', '')
basename = os.path.basename(path)
if not isinstance(pattern, re.Pattern):
# use fnmatch.translate to create a regex to test against, provide for case (in)sensitivity
# this is better than glob.glob, which is inconsistently case (in)sensitive across platforms.
flags = 0
if not case_sensitive:
flags = flags | re.IGNORECASE
pattern = re.compile(fnmatch.translate(pattern), flags=flags)
return bool(pattern.match(basename))
def find(base_path: Union[str, bytes, os.PathLike],
pattern: Union[str, re.Pattern],
follow_links: bool = False,
case_sensitive: bool = True,
) -> Iterable[Union[str, bytes, os.PathLike]]:
"""
Generator to find files in base_path that match pattern.
:param base_path: The directory to start searching.
:param pattern: The pattern to search against, as a string or regular expression object.
:param follow_links: Whether or not to follow symlinks
:param case_sensitive: Whether or not comparisons to pattern should be case-sensitive.
:return:
"""
tree = os.walk(base_path, followlinks=follow_links)
for _dir, _, _files in tree:
if __get_matches(_dir, pattern, case_sensitive=case_sensitive):
if _dir == base_path:
yield base_path
else:
yield _dir
for _f in _files:
full_path = os.path.join(_dir, _f)
if __get_matches(full_path, pattern, case_sensitive=case_sensitive):
yield full_path
if __name__ == '__main__':
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument('path', help='The base path to search from.')
parser.add_argument('-L', '--follow-links', action='store_true', help='Follow links')
name_mutex = parser.add_mutually_exclusive_group()
name_mutex.add_argument('--iname', help='Case-insensitive name search. Supports glob patterns.')
name_mutex.add_argument('--name', help='Case-sensitive name search. Supports glob patterns.')
name_mutex.add_argument('--rname', type=re.compile, help='Python regular expression for name search.')
args = parser.parse_args()
find_args = [
args.path
]
find_kwargs = {
'follow_links': args.follow_links
}
if args.iname:
find_args.append(args.iname)
find_kwargs['case_sensitive'] = False
elif args.name:
find_args.append(args.name)
elif args.rname:
find_args.append(args.rname)
else:
find_args.append('*')
print(*find(*find_args, **find_kwargs), sep='\n')
@ievans3024
Copy link
Author

ievans3024 commented Apr 19, 2020

Usage Examples

In a python program (assuming the find module is in your python path or virtualenv)

from find import find
import re

# all files
for f in find('/path/to/some/dir', '*'):
    print(f)

# follow links
for f in find('/path/to/some/dir', '*', follow_links=True):
    print(f)

# plain filename
# find any files/folders that match the name 'abc'

# case sensitive
for f in find('/path/to/some/dir', 'abc'):
    # does not match, e.g. ABC, Abc, aBc, etc.
    print(f)

# case insensitive
for f in find('/path/to/some/dir', 'abc', case_sensitive=False):
    # matches, e.g. abc, ABC, Abc, aBc, etc.
    print(f)

# glob expression
# find any files/folders that start with 'ab'

# case sensitive
for f in find('/path/to/some/dir', 'ab*'):
    # does not match, eg., ABC, Abc, aBc, etc.
    print(f)

# case sensitive
for f in find('/path/to/some/dir', 'ab*', case_sensitive=False):
    # matches, eg., abc, ABC, Abc, aBc, etc.
    print(f)

# regex
# find any files/folders with names composed of any combination
# of the characters a, b, and c, of any length

# case sensitive
for f in find('/path/to/some/dir', re.compile(r'[abc]+')):
    print(f)

# case insensitive
for f in find('/path/to/some/dir', re.compile(r'(?i:[abc]+)')):
    print(f)

As a shell program (assuming the find module file is in your path and executable)

# all files under /path/to/some/dir
find.py /path/to/some/dir

# also follow links
find.py -L /path/to/some/dir

# plain filename
find.py /path/to/some/dir --name abc  # case sensitive
find.py /path/to/some/dir --iname abc  # case insensitive

# glob expression
find.py /path/to/some/dir --name 'ab*'  # case sensitive
find.py /path/to/some/dir --iname 'ab*'  # case insensitive

# regex
find.py /path/to/some/dir --rname '[abc]+'  # case sensitive
find.py /path/to/some/dir --rname '(?i:[abc]+)'  # case insensitive

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment