wonzbak · July 9, 2015 14:24
diff --git a/fuzzy_file_finder.py b/fuzzy_file_finder.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 """Find a file in a list in a fuzzy way.
 """
 from __future__ import division
 import os
 import sys
 import time
 import re
 import readline  # for history with up
 import argparse

 from pprint import pprint as pp


 def benchIt(func):
    """Decorator, display execution time of a function.
    """
    def wrapper(*args, **kwargs):
        start = time.time()
        ret = func(*args)
        duration = time.time() - start
        sys.stderr.write("{} executed in {:.5f}s\n".format(func.__name__, duration))

        return ret

    return wrapper


 class AnsiColor:
    """ANSI escape sequences
    """
    BLUE = '\033[94m'
    BROWN = '\033[94m'
    PURPLE = '\033[95m'
    CYAN = '\033[96m'
    DARKCYAN = '\033[36m'
    GREEN = '\033[92m'
    GREEN = '\033[92m'
    RED = '\033[91m'
    YELLOW = '\033[93m'

    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'
    END = '\033[0m'

    @staticmethod
    def boldize(string):
        return "%s%s%s" % (AnsiColor.BOLD, string, AnsiColor.END)


 class Item (object):
    """Class representing an item which is a match
    """
    def __init__(self, string, score, groups):
        self.string = string
        self.score = score
        self.groups = groups

    def __str__(self):
        return self.string

    def wrapItem(self, before, after):
        """Wrap match char with before and after.
        """
        parts = []
        nb_group = len(self.groups)

        # split item string
        for i, group in enumerate(self.groups):
            first = True if i == 0 else False
            last = True if i == nb_group - 1 else False
            if first:
                # from start to begining of first group
                startPos = 0
                endPos = self.groups[i]['start']
                parts.append(self.string[startPos:endPos])

            else:
                # between 2 groups
                startPos = self.groups[i - 1]['end']
                endPos = self.groups[i]['start']
                parts.append(self.string[startPos:endPos])

                if last:
                    # from last end group to end of string
                    endPos = self.groups[i]['end']
                    parts.append(self.string[endPos:])

        # highlight char.
        wrapped_string = ""
        for i, part in enumerate(parts):
            wrapped_string += part
            if i < nb_group:
                wrapped_string += "{}{}{}".format(before, self.groups[i]['car'], after)

        return wrapped_string

    def highlightToTerminal(self):
        return self.wrapItem(AnsiColor.BOLD, AnsiColor.END)


 class FuzzySearch(object):

    def __init__(self, collection):
        self.collection = collection

    @staticmethod
    def getNbCharBetweenGroup(groups):
        """Count character between groups
        """
        nb_char = 0
        for i, group in enumerate(groups):
            if i == 0:
                continue
            nb_char += groups[i]['start'] - groups[i - 1]['end']

        return nb_char

    def searchReverse(self, search):
        """Filter items base on search filter, same as fuzzyFind
        but with inverted search and items.
        """
        filtered_items = []

        pattern_parts = []
        r_search = search[::-1]  # reversed search
        escape_car = ['.']
        for car in r_search:
            if car in escape_car:
                pattern_parts.append(r"(\%s)" % car)
            else:
                pattern_parts.append(r"(%s)" % car)

        pattern = '.*?'.join(pattern_parts)

        regex = re.compile(pattern, re.I)
        for item in self.collection:
            r_item = item[::-1]  # reversed item
            match = regex.search(r_item)
            if match:
                groups = []
                for i, group in enumerate(match.groups()):
                    groupData = {
                        'start': match.span(i + 1)[0],
                        'end': match.span(i + 1)[1],
                        'car': group
                    }
                    groups.append(groupData)
                nb_chars = FuzzySearch.getNbCharBetweenGroup(groups)

                # the less there is character between each group, the more the score is
                # the less there is character between the last group and the end of line,
                # the more th score is
                score = (len(search) / len(item) * 100) + (1 / (nb_chars + match.start()) * 10000)

                # reverse groups
                item_length = len(item)
                reversed_groups = list(reversed(groups))
                for group_ in reversed_groups:
                    start = item_length - group_['end']
                    end = item_length - group_['start']
                    group_['start'] = start
                    group_['end'] = end

                filtered_items.append(
                    (
                        score,
                        item,
                        reversed_groups,
                    )
                )
        return [Item(item, score_, group)
                for score_, item, group in sorted(filtered_items, reverse=True)]

    def fuzzyFind(self, search):
        """Filter items base on search filter
        """
        filtered_items = []

        pattern_parts = []
        for car in search:
            pattern_parts.append("(%s)" % car)

        pattern = '.*?'.join(pattern_parts)
        print "DEBUG Pattern: {}".format(pattern)

        regex = re.compile(pattern, re.I)
        for item in self.collection:
            match = regex.search(item)
            if match:
                groups = []
                for i, group in enumerate(match.groups()):
                    groupData = {
                        'start': match.span(i + 1)[0],
                        'end': match.span(i + 1)[1],
                        'car': group
                    }
                    groups.append(groupData)
                nb_chars = FuzzySearch.getNbCharBetweenGroup(groups)
                score = nb_chars + match.start()

                filtered_items.append(
                    (
                        score,
                        item,
                        groups,
                    )
                )

        return [Item(item, score_, start_, group)
                for score_, start_, item, group in sorted(filtered_items)]


 class Application(object):
    """Load file of a directory and filter them.
    """
    def __init__(self,):
        self.args = self.cli()

        self.files = self.loadFiles(self.args.directory, self.args.ext)
        self.fuzzySearcher = FuzzySearch(self.files)

    def cli(self):
        parser = argparse.ArgumentParser(description='fuzzy search on file')
        parser.add_argument('directory', help='root directory')
        parser.add_argument('--ext', default=None, help='include only files with an extension')
        parser.add_argument('-f', '--filter', default=None, help='filter to apply on the files list')
        parser.add_argument('--with-score', default=False, action="store_true", help='Display score')
        parser.add_argument('-v', '--verbosity', action="count", help='Verbosity')

        return parser.parse_args()

    def run(self):
        """Run the command.
        """
        if self.args.filter:
            self.filterFiles(self.args.filter)
        else:
            # interactive command line
            try:
                self.userInputloop()
            except (KeyboardInterrupt):
                pass
            except (EOFError):
                # exit
                pass

    def loadFiles(self, directory=None, extension=None):
        """Return list of all file in the directory with the extension.
        """
        if directory is None:
            directory = '.'
        directory = os.path.abspath(os.path.expanduser(directory))

        if not os.path.isdir(directory):
            raise Exception("Directory {} not exist".format(directory))

        files = []
        root_position = len(directory) + 1
        start_time = time.time()
        for dirpath, dirnames, filenames in os.walk(directory):
            for name in filenames:
                projectPath = dirpath[root_position:]
                if extension is not None:
                    if name.endswith(".{}".format(extension)):
                        files.append("{}/{}".format(projectPath, name))
                else:
                    files.append("{}/{}".format(projectPath, name))

        duration = time.time() - start_time
        if self.args.verbosity:
            if extension is not None:
                sys.stderr.write(
                    "Got {} {} files from {} in {:.5f}s\n"
                    .format(len(files), extension, directory, duration))
            else:
                sys.stderr.write("Got {} files from {} in {:.5f}s" .format(len(files), directory, duration))

        return files

    def filterFiles(self, filter_):
        """Display filtered file.
        """
        start_time = time.time()
        filtered_files = self.fuzzySearcher.searchReverse(filter_)
        duration = time.time() - start_time
        for item in filtered_files:
            if self.args.with_score:
                print '{:.0f} {}'.format(item.score, item.highlightToTerminal())
            else:
                print '{}'.format(item.highlightToTerminal())

        if (self.args.verbosity):
            sys.stderr.write("Got {} item(s) in {:.5f}s\n".format(len(filtered_files), duration))

    def userInputloop(self):
        while True:
            try:
                user_input = raw_input('filter: ')
                if user_input:
                    self.filterFiles(user_input)
            except KeyboardInterrupt as e:
                print e

 if __name__ == '__main__':
    app = Application()
    app.run()
	#!/usr/bin/env python
	# -- coding: utf-8 --
	#
	"""Find a file in a list in a fuzzy way.
	"""
	from __future__ import division
	import os
	import sys
	import time
	import re
	import readline # for history with up
	import argparse

	from pprint import pprint as pp


	def benchIt(func):
	"""Decorator, display execution time of a function.
	"""
	def wrapper(args, *kwargs):
	start = time.time()
	ret = func(*args)
	duration = time.time() - start
	sys.stderr.write("{} executed in {:.5f}s\n".format(func.__name__, duration))

	return ret

	return wrapper


	class AnsiColor:
	"""ANSI escape sequences
	"""
	BLUE = '\033[94m'
	BROWN = '\033[94m'
	PURPLE = '\033[95m'
	CYAN = '\033[96m'
	DARKCYAN = '\033[36m'
	GREEN = '\033[92m'
	GREEN = '\033[92m'
	RED = '\033[91m'
	YELLOW = '\033[93m'

	BOLD = '\033[1m'
	UNDERLINE = '\033[4m'
	END = '\033[0m'

	@staticmethod
	def boldize(string):
	return "%s%s%s" % (AnsiColor.BOLD, string, AnsiColor.END)


	class Item (object):
	"""Class representing an item which is a match
	"""
	def __init__(self, string, score, groups):
	self.string = string
	self.score = score
	self.groups = groups

	def __str__(self):
	return self.string

	def wrapItem(self, before, after):
	"""Wrap match char with before and after.
	"""
	parts = []
	nb_group = len(self.groups)

	# split item string
	for i, group in enumerate(self.groups):
	first = True if i == 0 else False
	last = True if i == nb_group - 1 else False
	if first:
	# from start to begining of first group
	startPos = 0
	endPos = self.groups[i]['start']
	parts.append(self.string[startPos:endPos])

	else:
	# between 2 groups
	startPos = self.groups[i - 1]['end']
	endPos = self.groups[i]['start']
	parts.append(self.string[startPos:endPos])

	if last:
	# from last end group to end of string
	endPos = self.groups[i]['end']
	parts.append(self.string[endPos:])

	# highlight char.
	wrapped_string = ""
	for i, part in enumerate(parts):
	wrapped_string += part
	if i < nb_group:
	wrapped_string += "{}{}{}".format(before, self.groups[i]['car'], after)

	return wrapped_string

	def highlightToTerminal(self):
	return self.wrapItem(AnsiColor.BOLD, AnsiColor.END)


	class FuzzySearch(object):

	def __init__(self, collection):
	self.collection = collection

	@staticmethod
	def getNbCharBetweenGroup(groups):
	"""Count character between groups
	"""
	nb_char = 0
	for i, group in enumerate(groups):
	if i == 0:
	continue
	nb_char += groups[i]['start'] - groups[i - 1]['end']

	return nb_char

	def searchReverse(self, search):
	"""Filter items base on search filter, same as fuzzyFind
	but with inverted search and items.
	"""
	filtered_items = []

	pattern_parts = []
	r_search = search[::-1] # reversed search
	escape_car = ['.']
	for car in r_search:
	if car in escape_car:
	pattern_parts.append(r"(\%s)" % car)
	else:
	pattern_parts.append(r"(%s)" % car)

	pattern = '.*?'.join(pattern_parts)

	regex = re.compile(pattern, re.I)
	for item in self.collection:
	r_item = item[::-1] # reversed item
	match = regex.search(r_item)
	if match:
	groups = []
	for i, group in enumerate(match.groups()):
	groupData = {
	'start': match.span(i + 1)[0],
	'end': match.span(i + 1)[1],
	'car': group
	}
	groups.append(groupData)
	nb_chars = FuzzySearch.getNbCharBetweenGroup(groups)

	# the less there is character between each group, the more the score is
	# the less there is character between the last group and the end of line,
	# the more th score is
	score = (len(search) / len(item) * 100) + (1 / (nb_chars + match.start()) * 10000)

	# reverse groups
	item_length = len(item)
	reversed_groups = list(reversed(groups))
	for group_ in reversed_groups:
	start = item_length - group_['end']
	end = item_length - group_['start']
	group_['start'] = start
	group_['end'] = end

	filtered_items.append(
	(
	score,
	item,
	reversed_groups,
	)
	)
	return [Item(item, score_, group)
	for score_, item, group in sorted(filtered_items, reverse=True)]

	def fuzzyFind(self, search):
	"""Filter items base on search filter
	"""
	filtered_items = []

	pattern_parts = []
	for car in search:
	pattern_parts.append("(%s)" % car)

	pattern = '.*?'.join(pattern_parts)
	print "DEBUG Pattern: {}".format(pattern)

	regex = re.compile(pattern, re.I)
	for item in self.collection:
	match = regex.search(item)
	if match:
	groups = []
	for i, group in enumerate(match.groups()):
	groupData = {
	'start': match.span(i + 1)[0],
	'end': match.span(i + 1)[1],
	'car': group
	}
	groups.append(groupData)
	nb_chars = FuzzySearch.getNbCharBetweenGroup(groups)
	score = nb_chars + match.start()

	filtered_items.append(
	(
	score,
	item,
	groups,
	)
	)

	return [Item(item, score_, start_, group)
	for score_, start_, item, group in sorted(filtered_items)]


	class Application(object):
	"""Load file of a directory and filter them.
	"""
	def __init__(self,):
	self.args = self.cli()

	self.files = self.loadFiles(self.args.directory, self.args.ext)
	self.fuzzySearcher = FuzzySearch(self.files)

	def cli(self):
	parser = argparse.ArgumentParser(description='fuzzy search on file')
	parser.add_argument('directory', help='root directory')
	parser.add_argument('--ext', default=None, help='include only files with an extension')
	parser.add_argument('-f', '--filter', default=None, help='filter to apply on the files list')
	parser.add_argument('--with-score', default=False, action="store_true", help='Display score')
	parser.add_argument('-v', '--verbosity', action="count", help='Verbosity')

	return parser.parse_args()

	def run(self):
	"""Run the command.
	"""
	if self.args.filter:
	self.filterFiles(self.args.filter)
	else:
	# interactive command line
	try:
	self.userInputloop()
	except (KeyboardInterrupt):
	pass
	except (EOFError):
	# exit
	pass

	def loadFiles(self, directory=None, extension=None):
	"""Return list of all file in the directory with the extension.
	"""
	if directory is None:
	directory = '.'
	directory = os.path.abspath(os.path.expanduser(directory))

	if not os.path.isdir(directory):
	raise Exception("Directory {} not exist".format(directory))

	files = []
	root_position = len(directory) + 1
	start_time = time.time()
	for dirpath, dirnames, filenames in os.walk(directory):
	for name in filenames:
	projectPath = dirpath[root_position:]
	if extension is not None:
	if name.endswith(".{}".format(extension)):
	files.append("{}/{}".format(projectPath, name))
	else:
	files.append("{}/{}".format(projectPath, name))

	duration = time.time() - start_time
	if self.args.verbosity:
	if extension is not None:
	sys.stderr.write(
	"Got {} {} files from {} in {:.5f}s\n"
	.format(len(files), extension, directory, duration))
	else:
	sys.stderr.write("Got {} files from {} in {:.5f}s" .format(len(files), directory, duration))

	return files

	def filterFiles(self, filter_):
	"""Display filtered file.
	"""
	start_time = time.time()
	filtered_files = self.fuzzySearcher.searchReverse(filter_)
	duration = time.time() - start_time
	for item in filtered_files:
	if self.args.with_score:
	print '{:.0f} {}'.format(item.score, item.highlightToTerminal())
	else:
	print '{}'.format(item.highlightToTerminal())

	if (self.args.verbosity):
	sys.stderr.write("Got {} item(s) in {:.5f}s\n".format(len(filtered_files), duration))

	def userInputloop(self):
	while True:
	try:
	user_input = raw_input('filter: ')
	if user_input:
	self.filterFiles(user_input)
	except KeyboardInterrupt as e:
	print e

	if __name__ == '__main__':
	app = Application()
	app.run()