Created
March 23, 2022 11:14
-
-
Save twsh/32b063f3dade22230a7ed2f27f70f040 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import sys | |
# open and read the first argument | |
with open(sys.argv[1], "r") as in_file: | |
bibliography = in_file.read() | |
# Make sets of the citekeys found in the file, and of the entries | |
# The self_keys regex matches an upper or lower case cite, citep, citeyear, citeauthor, | |
# citeyearpar, and then anything between that and the first bracket | |
# The self_keys regex doesn't handle lists of keys | |
self_keys = set( | |
re.findall(r"\\[C|c]ite[t|p|year|author|yearpar]?.*?\{\s*(.+?)\}", bibliography) | |
) | |
entries = set(re.findall(r"@.+?{(.+?)\,", bibliography)) | |
# Print the information | |
print("I found these self citing keys:\n", "\n".join(self_keys)) | |
print("I found these entries:\n", "\n".join(entries)) | |
print( | |
"These self citing keys are not in the entries:\n", "\n".join(self_keys - entries) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment