Created
July 9, 2015 14:24
-
-
Save wonzbak/3072c1ddf9fb1a224404 to your computer and use it in GitHub Desktop.
Fuzzy file finder
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# | |
"""Find a file in a list in a fuzzy way. | |
""" | |
from __future__ import division | |
import os | |
import sys | |
import time | |
import re | |
import readline # for history with up | |
import argparse | |
from pprint import pprint as pp | |
def benchIt(func): | |
"""Decorator, display execution time of a function. | |
""" | |
def wrapper(*args, **kwargs): | |
start = time.time() | |
ret = func(*args) | |
duration = time.time() - start | |
sys.stderr.write("{} executed in {:.5f}s\n".format(func.__name__, duration)) | |
return ret | |
return wrapper | |
class AnsiColor: | |
"""ANSI escape sequences | |
""" | |
BLUE = '\033[94m' | |
BROWN = '\033[94m' | |
PURPLE = '\033[95m' | |
CYAN = '\033[96m' | |
DARKCYAN = '\033[36m' | |
GREEN = '\033[92m' | |
GREEN = '\033[92m' | |
RED = '\033[91m' | |
YELLOW = '\033[93m' | |
BOLD = '\033[1m' | |
UNDERLINE = '\033[4m' | |
END = '\033[0m' | |
@staticmethod | |
def boldize(string): | |
return "%s%s%s" % (AnsiColor.BOLD, string, AnsiColor.END) | |
class Item (object): | |
"""Class representing an item which is a match | |
""" | |
def __init__(self, string, score, groups): | |
self.string = string | |
self.score = score | |
self.groups = groups | |
def __str__(self): | |
return self.string | |
def wrapItem(self, before, after): | |
"""Wrap match char with before and after. | |
""" | |
parts = [] | |
nb_group = len(self.groups) | |
# split item string | |
for i, group in enumerate(self.groups): | |
first = True if i == 0 else False | |
last = True if i == nb_group - 1 else False | |
if first: | |
# from start to begining of first group | |
startPos = 0 | |
endPos = self.groups[i]['start'] | |
parts.append(self.string[startPos:endPos]) | |
else: | |
# between 2 groups | |
startPos = self.groups[i - 1]['end'] | |
endPos = self.groups[i]['start'] | |
parts.append(self.string[startPos:endPos]) | |
if last: | |
# from last end group to end of string | |
endPos = self.groups[i]['end'] | |
parts.append(self.string[endPos:]) | |
# highlight char. | |
wrapped_string = "" | |
for i, part in enumerate(parts): | |
wrapped_string += part | |
if i < nb_group: | |
wrapped_string += "{}{}{}".format(before, self.groups[i]['car'], after) | |
return wrapped_string | |
def highlightToTerminal(self): | |
return self.wrapItem(AnsiColor.BOLD, AnsiColor.END) | |
class FuzzySearch(object): | |
def __init__(self, collection): | |
self.collection = collection | |
@staticmethod | |
def getNbCharBetweenGroup(groups): | |
"""Count character between groups | |
""" | |
nb_char = 0 | |
for i, group in enumerate(groups): | |
if i == 0: | |
continue | |
nb_char += groups[i]['start'] - groups[i - 1]['end'] | |
return nb_char | |
def searchReverse(self, search): | |
"""Filter items base on search filter, same as fuzzyFind | |
but with inverted search and items. | |
""" | |
filtered_items = [] | |
pattern_parts = [] | |
r_search = search[::-1] # reversed search | |
escape_car = ['.'] | |
for car in r_search: | |
if car in escape_car: | |
pattern_parts.append(r"(\%s)" % car) | |
else: | |
pattern_parts.append(r"(%s)" % car) | |
pattern = '.*?'.join(pattern_parts) | |
regex = re.compile(pattern, re.I) | |
for item in self.collection: | |
r_item = item[::-1] # reversed item | |
match = regex.search(r_item) | |
if match: | |
groups = [] | |
for i, group in enumerate(match.groups()): | |
groupData = { | |
'start': match.span(i + 1)[0], | |
'end': match.span(i + 1)[1], | |
'car': group | |
} | |
groups.append(groupData) | |
nb_chars = FuzzySearch.getNbCharBetweenGroup(groups) | |
# the less there is character between each group, the more the score is | |
# the less there is character between the last group and the end of line, | |
# the more th score is | |
score = (len(search) / len(item) * 100) + (1 / (nb_chars + match.start()) * 10000) | |
# reverse groups | |
item_length = len(item) | |
reversed_groups = list(reversed(groups)) | |
for group_ in reversed_groups: | |
start = item_length - group_['end'] | |
end = item_length - group_['start'] | |
group_['start'] = start | |
group_['end'] = end | |
filtered_items.append( | |
( | |
score, | |
item, | |
reversed_groups, | |
) | |
) | |
return [Item(item, score_, group) | |
for score_, item, group in sorted(filtered_items, reverse=True)] | |
def fuzzyFind(self, search): | |
"""Filter items base on search filter | |
""" | |
filtered_items = [] | |
pattern_parts = [] | |
for car in search: | |
pattern_parts.append("(%s)" % car) | |
pattern = '.*?'.join(pattern_parts) | |
print "DEBUG Pattern: {}".format(pattern) | |
regex = re.compile(pattern, re.I) | |
for item in self.collection: | |
match = regex.search(item) | |
if match: | |
groups = [] | |
for i, group in enumerate(match.groups()): | |
groupData = { | |
'start': match.span(i + 1)[0], | |
'end': match.span(i + 1)[1], | |
'car': group | |
} | |
groups.append(groupData) | |
nb_chars = FuzzySearch.getNbCharBetweenGroup(groups) | |
score = nb_chars + match.start() | |
filtered_items.append( | |
( | |
score, | |
item, | |
groups, | |
) | |
) | |
return [Item(item, score_, start_, group) | |
for score_, start_, item, group in sorted(filtered_items)] | |
class Application(object): | |
"""Load file of a directory and filter them. | |
""" | |
def __init__(self,): | |
self.args = self.cli() | |
self.files = self.loadFiles(self.args.directory, self.args.ext) | |
self.fuzzySearcher = FuzzySearch(self.files) | |
def cli(self): | |
parser = argparse.ArgumentParser(description='fuzzy search on file') | |
parser.add_argument('directory', help='root directory') | |
parser.add_argument('--ext', default=None, help='include only files with an extension') | |
parser.add_argument('-f', '--filter', default=None, help='filter to apply on the files list') | |
parser.add_argument('--with-score', default=False, action="store_true", help='Display score') | |
parser.add_argument('-v', '--verbosity', action="count", help='Verbosity') | |
return parser.parse_args() | |
def run(self): | |
"""Run the command. | |
""" | |
if self.args.filter: | |
self.filterFiles(self.args.filter) | |
else: | |
# interactive command line | |
try: | |
self.userInputloop() | |
except (KeyboardInterrupt): | |
pass | |
except (EOFError): | |
# exit | |
pass | |
def loadFiles(self, directory=None, extension=None): | |
"""Return list of all file in the directory with the extension. | |
""" | |
if directory is None: | |
directory = '.' | |
directory = os.path.abspath(os.path.expanduser(directory)) | |
if not os.path.isdir(directory): | |
raise Exception("Directory {} not exist".format(directory)) | |
files = [] | |
root_position = len(directory) + 1 | |
start_time = time.time() | |
for dirpath, dirnames, filenames in os.walk(directory): | |
for name in filenames: | |
projectPath = dirpath[root_position:] | |
if extension is not None: | |
if name.endswith(".{}".format(extension)): | |
files.append("{}/{}".format(projectPath, name)) | |
else: | |
files.append("{}/{}".format(projectPath, name)) | |
duration = time.time() - start_time | |
if self.args.verbosity: | |
if extension is not None: | |
sys.stderr.write( | |
"Got {} {} files from {} in {:.5f}s\n" | |
.format(len(files), extension, directory, duration)) | |
else: | |
sys.stderr.write("Got {} files from {} in {:.5f}s" .format(len(files), directory, duration)) | |
return files | |
def filterFiles(self, filter_): | |
"""Display filtered file. | |
""" | |
start_time = time.time() | |
filtered_files = self.fuzzySearcher.searchReverse(filter_) | |
duration = time.time() - start_time | |
for item in filtered_files: | |
if self.args.with_score: | |
print '{:.0f} {}'.format(item.score, item.highlightToTerminal()) | |
else: | |
print '{}'.format(item.highlightToTerminal()) | |
if (self.args.verbosity): | |
sys.stderr.write("Got {} item(s) in {:.5f}s\n".format(len(filtered_files), duration)) | |
def userInputloop(self): | |
while True: | |
try: | |
user_input = raw_input('filter: ') | |
if user_input: | |
self.filterFiles(user_input) | |
except KeyboardInterrupt as e: | |
print e | |
if __name__ == '__main__': | |
app = Application() | |
app.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment