Last active
January 22, 2024 13:52
-
-
Save iamliuzy/59129658d39804dbc31104c4a158b79b to your computer and use it in GitHub Desktop.
Find keywords in a text file. The keywords are specificated by another text file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
# Find Keywords | |
Find keywords in a text file. | |
The keywords are specificated by another text file. | |
## Arguments | |
positional arguments: | |
file File to be searched for keywords. | |
keyword_file File containing keywords to search for. | |
options: | |
-h, --help show this help message and exit | |
--output-mode {list,highlight}, -m {list,highlight} | |
Output mode. | |
list - List all keywords found in the file. | |
highlight - Show full content of the file and highlight keywords found. | |
(default:list) | |
--lines-per-page LINES_PER_PAGE, -l LINES_PER_PAGE | |
Number of lines to display per page. | |
""" | |
from pathlib import Path | |
import argparse | |
from colorama import Fore | |
# Arguments | |
parser = argparse.ArgumentParser(prog="Find Keywords", description="Find keywords in a text file.\ | |
The keywords are specificated by another text file.") | |
parser.add_argument('file', help='File to be searched for keywords.', type=Path) | |
parser.add_argument('keyword_file', help='File containing keywords to search for.', type=Path) | |
parser.add_argument('--output-mode', '-m', choices=['list', 'highlight'], help='Output mode.\n\ | |
list - List all keywords found in the file.\n\ | |
highlight - Show full content of the file and highlight keywords found.\n\ | |
(default:%(default)s)', | |
default='list') | |
parser.add_argument('--lines-per-page', '-l', help='Number of lines to display per page.', | |
type=int, required=True) | |
args = parser.parse_args() | |
with open(args.file.resolve(), 'r', encoding='utf-8') as f1: | |
file1_lines = f1.readlines() | |
with open(args.keyword_file.resolve(), 'r', encoding='utf-8') as f2: | |
file2 = f2.read().replace('\n', '').replace('\r', '').replace('\t', '').replace(' ', '') | |
if args.output_mode == 'list': | |
linenum: int | |
words = {} | |
for char in file2: | |
words[char] = [] | |
for line in file1_lines: | |
linenum += 1 | |
if line.startswith('# '): | |
continue | |
for char in line: | |
if char in file2: | |
pos = (linenum // args.lines_per_page | |
+ (0 if linenum % args.lines_per_page == 0 else 1), | |
linenum % args.lines_per_page | |
+ (args.lines_per_page if linenum % args.lines_per_page == 0 else 0)) | |
if not pos in words[char]: | |
words[char].append(pos) | |
del linenum | |
found: int | |
for char, poses in words.items(): | |
if len(poses) == 0: | |
continue | |
found += 1 | |
print(f"{char}:", end='') | |
for pos in poses: | |
print(f"P{pos[0]}L{pos[1]}", end='' if poses.index(pos)==len(poses)-1 else '、') | |
print() | |
print(f"Found {found} words.") | |
elif args.output_mode == 'highlight': | |
for line in file1_lines: | |
if line.startswith('# '): | |
print(line, end='') | |
else: | |
for char in line: | |
if char in file2: | |
print(Fore.BLUE + char + Fore.RESET, end='') | |
else: | |
print(char, end='') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment