Skip to content

Instantly share code, notes, and snippets.

@samuelcolvin
Last active June 2, 2022 15:51
Show Gist options
  • Save samuelcolvin/13641d3085e7ed4a880c to your computer and use it in GitHub Desktop.
Save samuelcolvin/13641d3085e7ed4a880c to your computer and use it in GitHub Desktop.
python script to find strings in files in a directory
#! /usr/bin/python
import argparse
import subprocess
import os
import re
import mmap
from termcolor import colored
import mimetypes
parser = argparse.ArgumentParser(description="""findin
Find strings in files in a directory and colour print them.
""", formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('directory',
action='store',
help='directory to search')
parser.add_argument('search',
nargs='+',
help='string to search for.')
parser.add_argument('-x',
'--exclude-filter',
action='store',
help='paths not to search, processed as regex.')
parser.add_argument('-i',
'--include-filter',
action='store',
help='paths to include, processed as regex, if not provided all paths are searched.')
parser.add_argument('-e',
'--extension',
action='store',
help='required extension, (actually required ending of path), eg. ".py" or "el.py"')
parser.add_argument('-m', '--match',
dest='match_search',
action='store_const',
const='match',
default='search',
help='use re.match instead of default of re.search')
WD = os.getcwd()
def print_result(path, results):
print '\nfile://%s' % os.path.join(WD, path)
for result in results:
curtail = len(result) > 500
if curtail:
result = result[:500]
number, rest = result.split(':', 1)
s = colored('%8s:' % number, 'yellow')
chunks = rest.split(search)
for i, chunk in enumerate(chunks):
s += colored(chunk, 'cyan')
if i != len(chunks) - 1:
s += colored(search, 'red', attrs=['bold'])
if curtail:
s += colored('...', 'yellow')
print s
args = parser.parse_args()
exclude = None
if args.exclude_filter:
exclude = getattr(re.compile(args.exclude_filter), args.match_search)
include = None
if args.include_filter:
include = getattr(re.compile(args.include_filter), args.match_search)
paths = []
fcount = 0
for dp, dn, fs in os.walk(args.directory):
for f in fs:
path = os.path.join(dp, f)
fcount += 1
if args.extension and not path.endswith(args.extension):
continue
if exclude and exclude(path):
continue
if include and not include(path):
continue
paths.append(path)
search = ' '.join(args.search)
print '%d files filtered' % fcount
print '%d matching files' % len(paths)
print 'searching for "%s"' % search
print
results = []
for path in paths:
if os.stat(path).st_size == 0:
# file is empty
continue
ftype, _ = mimetypes.guess_type(path)
if ftype is not None and not ftype.startswith('text/'):
continue
file_results = []
with open(path) as f:
file_string = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
# if file_string.find(search) == -1:
if not re.search(search, file_string):
continue
for i, line in enumerate(f):
if not re.search(search, line):
continue
file_results.append('%d: %s' % (i, line.rstrip('\n')))
if file_results:
results.append((path, file_results))
for show_path, result in results:
print_result(show_path, result)
print '%d results found' % sum(len(r[1]) for r in results)
@Cor7169
Copy link

Cor7169 commented Apr 13, 2020

would love to try this code, however get a syntax error in line 46: print '\nfile://%s' % os.path.join(WD, path). The aphastroph just before % os.path causes the error. Any suggestion on how to resolve this syntax error appreciated

@samuelcolvin
Copy link
Author

Better to use ripgrep, is much faster and more powerful.

@samuelcolvin
Copy link
Author

The syntax error is because this is very old python 2

@Cor7169
Copy link

Cor7169 commented Apr 14, 2020

many thanks for your reply. I had tried to install ripgrep but get the following error
Could not find a version that satisfies the requirement ripgrep (from versions: )
No matching distribution found for ripgrep
Does ripgrep work on Python 2 or only 3?

@samuelcolvin
Copy link
Author

It's written in rust, search on Google.

@Cor7169
Copy link

Cor7169 commented Apr 14, 2020

Great, thanks!
Just installed Rust and ripgrep successfully. Now try to start using

@nikkpap
Copy link

nikkpap commented Nov 30, 2020

#! /usr/bin/python3

import argparse
import subprocess
import os
import re
import mmap
from termcolor import colored
import mimetypes

parser = argparse.ArgumentParser(description="""findin

Find strings in files in a directory and colour print them.

""", formatter_class=argparse.RawTextHelpFormatter)

parser.add_argument('directory',
action='store',
help='directory to search')
parser.add_argument('search',
nargs='+',
help='string to search for.')
parser.add_argument('-x',
'--exclude-filter',
action='store',
help='paths not to search, processed as regex.')
parser.add_argument('-i',
'--include-filter',
action='store',
help='paths to include, processed as regex, if not provided all paths are searched.')
parser.add_argument('-e',
'--extension',
action='store',
help='required extension, (actually required ending of path), eg. ".py" or "el.py"')
parser.add_argument('-m', '--match',
dest='match_search',
action='store_const',
const='match',
default='search',
help='use re.match instead of default of re.search')

WD = os.getcwd()

def print_result(path, results):
print ('\nfile://%s' % os.path.join(WD, path))
for result in results:
curtail = len(result) > 500
if curtail:
result = result[:500]
number, rest = result.split(':', 1)
s = colored('%8s:' % number, 'yellow')
chunks = rest.split(search)
for i, chunk in enumerate(chunks):
s += colored(chunk, 'cyan')
if i != len(chunks) - 1:
s += colored(search, 'red', attrs=['bold'])
if curtail:
s += colored('...', 'yellow')
print (s)

args = parser.parse_args()

exclude = None
if args.exclude_filter:
exclude = getattr(re.compile(args.exclude_filter), args.match_search)

include = None
if args.include_filter:
include = getattr(re.compile(args.include_filter), args.match_search)

paths = []
fcount = 0
for dp, dn, fs in os.walk(args.directory):
for f in fs:
path = os.path.join(dp, f)
fcount += 1
if args.extension and not path.endswith(args.extension):
continue
if exclude and exclude(path):
continue
if include and not include(path):
continue
paths.append(path)
search = ' '.join(args.search)
print ('%d files filtered' % fcount)
print ('%d matching files' % len(paths))
print ('searching for "%s"' % search)
print
results = []
for path in paths:
if os.stat(path).st_size == 0:
# file is empty
continue
ftype, _ = mimetypes.guess_type(path)
if ftype is not None and not ftype.startswith('text/'):
continue
file_results = []
with open(path) as f:
file_string = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
# if file_string.find(search) == -1:
if not re.search(search, file_string):
continue
for i, line in enumerate(f):
if not re.search(search, line):
continue
file_results.append('%d: %s' % (i, line.rstrip('\n')))
if file_results:
results.append((path, file_results))
for show_path, result in results:
print_result(show_path, result)
print ('%d results found' % sum(len(r[1]) for r in results))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment