Last active
April 20, 2020 16:32
-
-
Save ievans3024/06e4934a190ac8c763169be31461bc2c to your computer and use it in GitHub Desktop.
Python module for providing unix/linux "find" command behavior.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
""" | |
Module for providing behavior like the unix/linux "find" command. | |
Supports glob expressions and python regex objects. | |
""" | |
import fnmatch | |
import os | |
import os.path | |
import re | |
from typing import Union, Iterable | |
def __get_matches(path: Union[str, bytes, os.PathLike], | |
pattern: Union[str, re.Pattern], | |
case_sensitive: bool = True | |
) -> bool: | |
""" | |
Determine if a path matches a given pattern. | |
:param path: The full path to test. | |
:param pattern: A string or regular expression object to test the basename against. | |
:param case_sensitive: Whether or not comparisons to pattern should be case-sensitive. | |
:return: | |
""" | |
path = path.rstrip('/') # use rstrip to prevent, e.g., '/a/b/c/' -> ('/a/b/c', '') | |
basename = os.path.basename(path) | |
if not isinstance(pattern, re.Pattern): | |
# use fnmatch.translate to create a regex to test against, provide for case (in)sensitivity | |
# this is better than glob.glob, which is inconsistently case (in)sensitive across platforms. | |
flags = 0 | |
if not case_sensitive: | |
flags = flags | re.IGNORECASE | |
pattern = re.compile(fnmatch.translate(pattern), flags=flags) | |
return bool(pattern.match(basename)) | |
def find(base_path: Union[str, bytes, os.PathLike], | |
pattern: Union[str, re.Pattern], | |
follow_links: bool = False, | |
case_sensitive: bool = True, | |
) -> Iterable[Union[str, bytes, os.PathLike]]: | |
""" | |
Generator to find files in base_path that match pattern. | |
:param base_path: The directory to start searching. | |
:param pattern: The pattern to search against, as a string or regular expression object. | |
:param follow_links: Whether or not to follow symlinks | |
:param case_sensitive: Whether or not comparisons to pattern should be case-sensitive. | |
:return: | |
""" | |
tree = os.walk(base_path, followlinks=follow_links) | |
for _dir, _, _files in tree: | |
if __get_matches(_dir, pattern, case_sensitive=case_sensitive): | |
if _dir == base_path: | |
yield base_path | |
else: | |
yield _dir | |
for _f in _files: | |
full_path = os.path.join(_dir, _f) | |
if __get_matches(full_path, pattern, case_sensitive=case_sensitive): | |
yield full_path | |
if __name__ == '__main__': | |
from argparse import ArgumentParser | |
parser = ArgumentParser() | |
parser.add_argument('path', help='The base path to search from.') | |
parser.add_argument('-L', '--follow-links', action='store_true', help='Follow links') | |
name_mutex = parser.add_mutually_exclusive_group() | |
name_mutex.add_argument('--iname', help='Case-insensitive name search. Supports glob patterns.') | |
name_mutex.add_argument('--name', help='Case-sensitive name search. Supports glob patterns.') | |
name_mutex.add_argument('--rname', type=re.compile, help='Python regular expression for name search.') | |
args = parser.parse_args() | |
find_args = [ | |
args.path | |
] | |
find_kwargs = { | |
'follow_links': args.follow_links | |
} | |
if args.iname: | |
find_args.append(args.iname) | |
find_kwargs['case_sensitive'] = False | |
elif args.name: | |
find_args.append(args.name) | |
elif args.rname: | |
find_args.append(args.rname) | |
else: | |
find_args.append('*') | |
print(*find(*find_args, **find_kwargs), sep='\n') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Usage Examples
In a python program (assuming the find module is in your python path or virtualenv)
As a shell program (assuming the find module file is in your path and executable)