Created
August 6, 2024 23:09
-
-
Save gmelodie/0851232ddae68743d043066f11907d4f to your computer and use it in GitHub Desktop.
Remove entire publications from ACM Digital Library's exported .bib file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pybtex.database.input import bibtex | |
from pybtex.database.output.bibtex import Writer | |
from pybtex.errors import set_strict_mode | |
import sys | |
if __name__ == '__main__': | |
if len(sys.argv) != 2: | |
print('usage: python3 detrashify_acm.py /path/to/acm.bib') | |
exit(1) | |
filename = sys.argv[1] | |
print(f'Detrashifying {filename}') | |
# allow duplicate bibs | |
set_strict_mode(False) | |
# parse acm.bib | |
parser = bibtex.Parser() | |
bib_data = parser.parse_file(filename) | |
old_len = len(bib_data.entries.items()) | |
# remove entries with authors == None | |
keys_to_remove = [] | |
excluded = 0 | |
for cite_key, entry in bib_data.entries.items(): | |
if 'author' not in entry.persons or len(entry.persons['author']) == 0: # remove | |
print(f"Filtering out {entry.fields['title'].split(':')[0]}") | |
keys_to_remove.append(cite_key) | |
excluded += 1 | |
for key in keys_to_remove: | |
del bib_data.entries[key] | |
writer = Writer() | |
with open("acm-updated.bib", 'w', encoding='utf-8') as file: | |
writer.write_stream(bib_data, file) | |
new_len = len(bib_data.entries.items()) | |
print(f'excluded {excluded} entries without authors') | |
print(f'Old len: {old_len}\t New len: {new_len}') | |
print(f'Updated file: acm-updated.bib') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment