-
-
Save samuelcolvin/13641d3085e7ed4a880c to your computer and use it in GitHub Desktop.
#! /usr/bin/python | |
import argparse | |
import subprocess | |
import os | |
import re | |
import mmap | |
from termcolor import colored | |
import mimetypes | |
parser = argparse.ArgumentParser(description="""findin | |
Find strings in files in a directory and colour print them. | |
""", formatter_class=argparse.RawTextHelpFormatter) | |
parser.add_argument('directory', | |
action='store', | |
help='directory to search') | |
parser.add_argument('search', | |
nargs='+', | |
help='string to search for.') | |
parser.add_argument('-x', | |
'--exclude-filter', | |
action='store', | |
help='paths not to search, processed as regex.') | |
parser.add_argument('-i', | |
'--include-filter', | |
action='store', | |
help='paths to include, processed as regex, if not provided all paths are searched.') | |
parser.add_argument('-e', | |
'--extension', | |
action='store', | |
help='required extension, (actually required ending of path), eg. ".py" or "el.py"') | |
parser.add_argument('-m', '--match', | |
dest='match_search', | |
action='store_const', | |
const='match', | |
default='search', | |
help='use re.match instead of default of re.search') | |
WD = os.getcwd() | |
def print_result(path, results): | |
print '\nfile://%s' % os.path.join(WD, path) | |
for result in results: | |
curtail = len(result) > 500 | |
if curtail: | |
result = result[:500] | |
number, rest = result.split(':', 1) | |
s = colored('%8s:' % number, 'yellow') | |
chunks = rest.split(search) | |
for i, chunk in enumerate(chunks): | |
s += colored(chunk, 'cyan') | |
if i != len(chunks) - 1: | |
s += colored(search, 'red', attrs=['bold']) | |
if curtail: | |
s += colored('...', 'yellow') | |
print s | |
args = parser.parse_args() | |
exclude = None | |
if args.exclude_filter: | |
exclude = getattr(re.compile(args.exclude_filter), args.match_search) | |
include = None | |
if args.include_filter: | |
include = getattr(re.compile(args.include_filter), args.match_search) | |
paths = [] | |
fcount = 0 | |
for dp, dn, fs in os.walk(args.directory): | |
for f in fs: | |
path = os.path.join(dp, f) | |
fcount += 1 | |
if args.extension and not path.endswith(args.extension): | |
continue | |
if exclude and exclude(path): | |
continue | |
if include and not include(path): | |
continue | |
paths.append(path) | |
search = ' '.join(args.search) | |
print '%d files filtered' % fcount | |
print '%d matching files' % len(paths) | |
print 'searching for "%s"' % search | |
results = [] | |
for path in paths: | |
if os.stat(path).st_size == 0: | |
# file is empty | |
continue | |
ftype, _ = mimetypes.guess_type(path) | |
if ftype is not None and not ftype.startswith('text/'): | |
continue | |
file_results = [] | |
with open(path) as f: | |
file_string = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) | |
# if file_string.find(search) == -1: | |
if not re.search(search, file_string): | |
continue | |
for i, line in enumerate(f): | |
if not re.search(search, line): | |
continue | |
file_results.append('%d: %s' % (i, line.rstrip('\n'))) | |
if file_results: | |
results.append((path, file_results)) | |
for show_path, result in results: | |
print_result(show_path, result) | |
print '%d results found' % sum(len(r[1]) for r in results) |
Better to use ripgrep, is much faster and more powerful.
The syntax error is because this is very old python 2
many thanks for your reply. I had tried to install ripgrep but get the following error
Could not find a version that satisfies the requirement ripgrep (from versions: )
No matching distribution found for ripgrep
Does ripgrep work on Python 2 or only 3?
It's written in rust, search on Google.
Great, thanks!
Just installed Rust and ripgrep successfully. Now try to start using
#! /usr/bin/python3
import argparse
import subprocess
import os
import re
import mmap
from termcolor import colored
import mimetypes
parser = argparse.ArgumentParser(description="""findin
Find strings in files in a directory and colour print them.
""", formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('directory',
action='store',
help='directory to search')
parser.add_argument('search',
nargs='+',
help='string to search for.')
parser.add_argument('-x',
'--exclude-filter',
action='store',
help='paths not to search, processed as regex.')
parser.add_argument('-i',
'--include-filter',
action='store',
help='paths to include, processed as regex, if not provided all paths are searched.')
parser.add_argument('-e',
'--extension',
action='store',
help='required extension, (actually required ending of path), eg. ".py" or "el.py"')
parser.add_argument('-m', '--match',
dest='match_search',
action='store_const',
const='match',
default='search',
help='use re.match instead of default of re.search')
WD = os.getcwd()
def print_result(path, results):
print ('\nfile://%s' % os.path.join(WD, path))
for result in results:
curtail = len(result) > 500
if curtail:
result = result[:500]
number, rest = result.split(':', 1)
s = colored('%8s:' % number, 'yellow')
chunks = rest.split(search)
for i, chunk in enumerate(chunks):
s += colored(chunk, 'cyan')
if i != len(chunks) - 1:
s += colored(search, 'red', attrs=['bold'])
if curtail:
s += colored('...', 'yellow')
print (s)
args = parser.parse_args()
exclude = None
if args.exclude_filter:
exclude = getattr(re.compile(args.exclude_filter), args.match_search)
include = None
if args.include_filter:
include = getattr(re.compile(args.include_filter), args.match_search)
paths = []
fcount = 0
for dp, dn, fs in os.walk(args.directory):
for f in fs:
path = os.path.join(dp, f)
fcount += 1
if args.extension and not path.endswith(args.extension):
continue
if exclude and exclude(path):
continue
if include and not include(path):
continue
paths.append(path)
search = ' '.join(args.search)
print ('%d files filtered' % fcount)
print ('%d matching files' % len(paths))
print ('searching for "%s"' % search)
print
results = []
for path in paths:
if os.stat(path).st_size == 0:
# file is empty
continue
ftype, _ = mimetypes.guess_type(path)
if ftype is not None and not ftype.startswith('text/'):
continue
file_results = []
with open(path) as f:
file_string = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
# if file_string.find(search) == -1:
if not re.search(search, file_string):
continue
for i, line in enumerate(f):
if not re.search(search, line):
continue
file_results.append('%d: %s' % (i, line.rstrip('\n')))
if file_results:
results.append((path, file_results))
for show_path, result in results:
print_result(show_path, result)
print ('%d results found' % sum(len(r[1]) for r in results))
would love to try this code, however get a syntax error in line 46: print '\nfile://%s' % os.path.join(WD, path). The aphastroph just before % os.path causes the error. Any suggestion on how to resolve this syntax error appreciated