Created
November 8, 2023 09:34
-
-
Save Koushikphy/970d60d46ca051f1c297ef37209000bd to your computer and use it in GitHub Desktop.
List all acronyms and/or keywords in your documents to avoid duplicate acronym definitions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re,sys,subprocess | |
from colorama import init as colorama_init, Fore, Style | |
colorama_init() | |
keywords = [ | |
# provide keywords | |
] | |
with open(sys.argv[1]) as f: # provide with command line | |
txt = f.read() | |
# list all acronyms | |
allWords = list(dict.fromkeys(re.findall('\\b[A-Z](?:[A-Z])+s?\\b', txt))) | |
txt = txt.splitlines() | |
# specifically made for latex files | |
for word in allWords: | |
res = [] | |
for n,line in enumerate(txt,start=1): | |
if line.startswith('%'): # comment in latex | |
continue | |
if re.search(rf'\b{word}\b',line) and not re.search(rf'(?<={{).*({word}).*(?=}})',line): | |
#^ if match but not inside a `{}`, these are usually commands | |
#^ second one may not work properly sometimes | |
res.append(f"{n}: {line}") | |
if res: # print if found | |
print(f"{word} {'='*50}") | |
for i in res: | |
print(re.sub(word,f'{Fore.RED}{word}{Style.RESET_ALL}',i)) | |
print('\n\n') | |
res = [] | |
for word in keywords: # search the provided keywords | |
res = [] | |
for n,line in enumerate(txt,start=1): | |
if line.startswith('%'): # comment in latex | |
continue | |
if re.search(rf'\b{word}\b',line,re.IGNORECASE) : | |
res.append(f"{n}: {line}") | |
if res: | |
print(f"{word} {'='*50}") | |
for i in res: | |
print(re.sub(word,f'{Fore.RED}{word}{Style.RESET_ALL}',i)) | |
print('\n\n') | |
res = [] | |
# a bash way | |
# cat file | grep -wo "[A-Z]\+\{2,10\}" | sort | uniq |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment