Last active
March 27, 2022 19:01
-
-
Save KevOrr/7489d4a2a091e7e68fedf25df6fa4589 to your computer and use it in GitHub Desktop.
Grepair
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import re | |
import operator as op | |
import itertools as it | |
import sys | |
def get_positions(text, patterns_a, patterns_b): | |
position_lists = [tuple((m.start(),m.end()) for p in ps | |
for m in re.finditer(p, text, re.IGNORECASE) if m) | |
for ps in (patterns_a, patterns_b)] | |
return position_lists | |
def find_python(positions): | |
distances = [] | |
for posa, posb in it.product(*positions): | |
distances.append((abs(posb[0] - posa[0]), posa, posb)) | |
return sorted(distances, key=op.itemgetter(0)) | |
def search(text, patterns_a, patterns_b): | |
positions = get_positions(text, patterns_a, patterns_b) | |
distances = find_python(positions) | |
for dist, (begina, enda), (beginb, endb) in distances[:5]: | |
print('{:<6} {}'.format(dist, text[max(min(begina, beginb)-10, 0) : max(enda, endb)+10])) | |
def main(): | |
patterns_a = sys.argv[1].strip(sys.argv[1][0]).split(sys.argv[1][0]) | |
patterns_b = sys.argv[2].strip(sys.argv[2][0]).split(sys.argv[2][0]) | |
text = open(sys.argv[3], encoding='ascii', errors='replace').read() | |
search(text, patterns_a, patterns_b) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment