Skip to content

Instantly share code, notes, and snippets.

@iamliuzy
Last active January 22, 2024 13:52
Show Gist options
  • Save iamliuzy/59129658d39804dbc31104c4a158b79b to your computer and use it in GitHub Desktop.
Save iamliuzy/59129658d39804dbc31104c4a158b79b to your computer and use it in GitHub Desktop.
Find keywords in a text file. The keywords are specificated by another text file.
"""
# Find Keywords
Find keywords in a text file.
The keywords are specificated by another text file.
## Arguments
positional arguments:
file File to be searched for keywords.
keyword_file File containing keywords to search for.
options:
-h, --help show this help message and exit
--output-mode {list,highlight}, -m {list,highlight}
Output mode.
list - List all keywords found in the file.
highlight - Show full content of the file and highlight keywords found.
(default:list)
--lines-per-page LINES_PER_PAGE, -l LINES_PER_PAGE
Number of lines to display per page.
"""
from pathlib import Path
import argparse
from colorama import Fore
# Arguments
parser = argparse.ArgumentParser(prog="Find Keywords", description="Find keywords in a text file.\
The keywords are specificated by another text file.")
parser.add_argument('file', help='File to be searched for keywords.', type=Path)
parser.add_argument('keyword_file', help='File containing keywords to search for.', type=Path)
parser.add_argument('--output-mode', '-m', choices=['list', 'highlight'], help='Output mode.\n\
list - List all keywords found in the file.\n\
highlight - Show full content of the file and highlight keywords found.\n\
(default:%(default)s)',
default='list')
parser.add_argument('--lines-per-page', '-l', help='Number of lines to display per page.',
type=int, required=True)
args = parser.parse_args()
with open(args.file.resolve(), 'r', encoding='utf-8') as f1:
file1_lines = f1.readlines()
with open(args.keyword_file.resolve(), 'r', encoding='utf-8') as f2:
file2 = f2.read().replace('\n', '').replace('\r', '').replace('\t', '').replace(' ', '')
if args.output_mode == 'list':
linenum: int
words = {}
for char in file2:
words[char] = []
for line in file1_lines:
linenum += 1
if line.startswith('# '):
continue
for char in line:
if char in file2:
pos = (linenum // args.lines_per_page
+ (0 if linenum % args.lines_per_page == 0 else 1),
linenum % args.lines_per_page
+ (args.lines_per_page if linenum % args.lines_per_page == 0 else 0))
if not pos in words[char]:
words[char].append(pos)
del linenum
found: int
for char, poses in words.items():
if len(poses) == 0:
continue
found += 1
print(f"{char}:", end='')
for pos in poses:
print(f"P{pos[0]}L{pos[1]}", end='' if poses.index(pos)==len(poses)-1 else '、')
print()
print(f"Found {found} words.")
elif args.output_mode == 'highlight':
for line in file1_lines:
if line.startswith('# '):
print(line, end='')
else:
for char in line:
if char in file2:
print(Fore.BLUE + char + Fore.RESET, end='')
else:
print(char, end='')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment