Created
May 24, 2025 01:11
-
-
Save Ghost---Shadow/2b7e424710f7bd86f8412f9395d0fd77 to your computer and use it in GitHub Desktop.
BibTeX journal name abbreviator - automatically converts full journal names to standard IEEE/ACM abbreviations to fix bibliography overflow issues in LaTeX documents.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
BibTeX Journal Abbreviator | |
This script reads a .bib file and replaces full journal names with their | |
standard abbreviations based on IEEE, ACM, and other common abbreviations. | |
""" | |
import re | |
import sys | |
import argparse | |
from pathlib import Path | |
# Comprehensive journal abbreviation dictionary | |
JOURNAL_ABBREVIATIONS = { | |
# IEEE Journals | |
"IEEE Transactions on Pattern Analysis and Machine Intelligence": "IEEE Trans. Pattern Anal. Mach. Intell.", | |
"IEEE Transactions on Neural Networks": "IEEE Trans. Neural Netw.", | |
"IEEE Transactions on Neural Networks and Learning Systems": "IEEE Trans. Neural Netw. Learn. Syst.", | |
"IEEE Transactions on Knowledge and Data Engineering": "IEEE Trans. Knowl. Data Eng.", | |
"IEEE Transactions on Software Engineering": "IEEE Trans. Softw. Eng.", | |
"IEEE Transactions on Computers": "IEEE Trans. Comput.", | |
"IEEE Transactions on Information Theory": "IEEE Trans. Inf. Theory", | |
"IEEE Transactions on Signal Processing": "IEEE Trans. Signal Process.", | |
"IEEE Transactions on Image Processing": "IEEE Trans. Image Process.", | |
"IEEE Transactions on Multimedia": "IEEE Trans. Multimedia", | |
"IEEE Transactions on Visualization and Computer Graphics": "IEEE Trans. Vis. Comput. Graph.", | |
"IEEE Computer Graphics and Applications": "IEEE Comput. Graph. Appl.", | |
"IEEE Internet Computing": "IEEE Internet Comput.", | |
"IEEE Software": "IEEE Softw.", | |
"IEEE Computer": "IEEE Computer", | |
"Proceedings of the IEEE": "Proc. IEEE", | |
# IEEE Conferences | |
"2019 IEEE Winter Conference on Applications of Computer Vision (WACV)": "WACV", | |
"2023 38th IEEE/ACM International Conference on Automated Software Engineering (ASE)": "ASE", | |
"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)": "ICASSP", | |
"IEEE Games Entertainment Media Conference": "IEEE GEM", | |
# Nature/Science | |
"Nature": "Nature", | |
"Nature Machine Intelligence": "Nat. Mach. Intell.", | |
"Nature Communications": "Nat. Commun.", | |
"Nature Methods": "Nat. Methods", | |
"Science": "Science", | |
"Science Advances": "Sci. Adv.", | |
# ACM Journals | |
"ACM Transactions on Graphics": "ACM Trans. Graph.", | |
"ACM Transactions on Computer Systems": "ACM Trans. Comput. Syst.", | |
"ACM Transactions on Database Systems": "ACM Trans. Database Syst.", | |
"ACM Transactions on Information Systems": "ACM Trans. Inf. Syst.", | |
"ACM Computing Surveys": "ACM Comput. Surv.", | |
"Communications of the ACM": "Commun. ACM", | |
"Annual International ACM SIGIR Conference on Research and Development in Information Retrieval": "SIGIR", | |
# Machine Learning Journals | |
"Journal of Machine Learning Research": "J. Mach. Learn. Res.", | |
"J. Mach. Learn. Res.": "J. Mach. Learn. Res.", # Already abbreviated | |
"Machine Learning": "Mach. Learn.", | |
"Neural Networks": "Neural Netw.", | |
"Neural Computation": "Neural Comput.", | |
"Artificial Intelligence": "Artif. Intell.", | |
"Journal of Artificial Intelligence Research": "J. Artif. Intell. Res.", | |
"Found. Trends Mach. Learn.": "Found. Trends Mach. Learn.", # Already abbreviated | |
# Computer Vision | |
"International Journal of Computer Vision": "Int. J. Comput. Vis.", | |
"Computer Vision and Image Understanding": "Comput. Vis. Image Underst.", | |
"Pattern Recognition": "Pattern Recognit.", | |
"Pattern Recognition Letters": "Pattern Recognit. Lett.", | |
"CVPR 2011": "CVPR", | |
# NLP/Computational Linguistics | |
"Computational Linguistics": "Comput. Linguist.", | |
"Natural Language Engineering": "Nat. Lang. Eng.", | |
"Language Resources and Evaluation": "Lang. Resour. Eval.", | |
"Annual Meeting of the Association for Computational Linguistics": "ACL", | |
"Conference of the European Chapter of the Association for Computational Linguistics": "EACL", | |
"North American Chapter of the Association for Computational Linguistics": "NAACL", | |
"NAACL-HLT": "NAACL-HLT", | |
"Transactions of the Association for Computational Linguistics": "Trans. ACL", | |
"International Conference on Computational Linguistics": "COLING", | |
"International Conference on Language Resources and Evaluation": "LREC", | |
"Findings": "Findings", # Keep as is (usually "Findings of ACL/EMNLP") | |
# Conferences (often kept as proceedings) | |
"International Conference on Machine Learning": "ICML", | |
"Conference on Neural Information Processing Systems": "NeurIPS", | |
"Neural Information Processing Systems": "NeurIPS", | |
"International Conference on Learning Representations": "ICLR", | |
"Association for Computational Linguistics": "ACL", | |
"Conference on Empirical Methods in Natural Language Processing": "EMNLP", | |
"Conference on Computer Vision and Pattern Recognition": "CVPR", | |
"International Conference on Computer Vision": "ICCV", | |
"European Conference on Computer Vision": "ECCV", | |
"AAAI Conference on Artificial Intelligence": "AAAI", | |
"AAAI/IAAI, Vol. 2": "AAAI", | |
"International Joint Conference on Artificial Intelligence": "IJCAI", | |
"ACM SIGMOD International Conference on Management of Data": "SIGMOD", | |
"ACM SIGKDD International Conference on Knowledge Discovery and Data Mining": "KDD", | |
"International World Wide Web Conference": "WWW", | |
"ACM Conference on Human Factors in Computing Systems": "CHI", | |
"International Conference on Artificial Intelligence and Statistics": "AISTATS", | |
"International Conference on Algorithmic Learning Theory": "ALT", | |
"Conference on Uncertainty in Artificial Intelligence": "UAI", | |
"The International FLAIRS Conference Proceedings": "FLAIRS", | |
# Workshops and Special Venues | |
"International Workshop on Similarity-Based Pattern Recognition": "S+SSPR", | |
"Machine Learning Challenges Workshop": "ML Challenges Workshop", | |
"Proceedings of the Workshop on Structured and Unstructured Knowledge Integration (SUKI)": "SUKI Workshop", | |
"Workshop on Document-grounded Dialogue and Conversational Question Answering": "Doc2Dial Workshop", | |
# arXiv and preprints | |
"arXiv preprint": "arXiv", | |
"ArXiv": "arXiv", | |
"arXiv: Computation and Language": "arXiv", | |
# Other Journals | |
"Complex.": "Complex.", # Already abbreviated (Complexity journal) | |
"Mathematical Programming": "Math. Program.", | |
"Semantic Web": "Semant. Web", | |
"The Scientific World Journal": "Sci. World J.", | |
"Frontiers Robotics AI": "Front. Robot. AI", | |
# Other common journals | |
"Journal of the American Statistical Association": "J. Am. Stat. Assoc.", | |
"Bioinformatics": "Bioinformatics", | |
"PLOS ONE": "PLOS ONE", | |
"Scientific Reports": "Sci. Rep.", | |
"Applied Mathematics and Computation": "Appl. Math. Comput.", | |
"Information Sciences": "Inf. Sci.", | |
"Knowledge-Based Systems": "Knowl.-Based Syst.", | |
"Expert Systems with Applications": "Expert Syst. Appl.", | |
} | |
def parse_bib_file(file_path): | |
"""Parse a BibTeX file and return entries.""" | |
with open(file_path, "r", encoding="utf-8") as f: | |
content = f.read() | |
# Split into entries | |
entries = re.split(r"@\w+\s*\{", content) | |
parsed_entries = [] | |
for i, entry in enumerate(entries): | |
if i == 0: # Skip the part before first entry | |
continue | |
# Find the entry type from the previous split | |
entry_match = re.search(r"@(\w+)\s*\{", content) | |
if entry_match: | |
entry_type = entry_match.group(1) | |
# Extract the entry key and fields | |
lines = entry.split("\n") | |
if lines: | |
key_line = lines[0].strip() | |
key = key_line.split(",")[0] if "," in key_line else key_line.rstrip("}") | |
parsed_entries.append( | |
{ | |
"type": entry_type if "entry_type" in locals() else "article", | |
"key": key, | |
"content": entry, | |
} | |
) | |
return parsed_entries | |
def extract_journal_field(entry_content): | |
"""Extract journal field from BibTeX entry.""" | |
# Look for journal field (case insensitive) | |
journal_match = re.search( | |
r"journal\s*=\s*\{([^}]+)\}", entry_content, re.IGNORECASE | |
) | |
if journal_match: | |
return journal_match.group(1) | |
# Also check for booktitle field (for conference proceedings) | |
booktitle_match = re.search( | |
r"booktitle\s*=\s*\{([^}]+)\}", entry_content, re.IGNORECASE | |
) | |
if booktitle_match: | |
return booktitle_match.group(1) | |
return None | |
def find_abbreviation(journal_name): | |
"""Find abbreviation for a journal name.""" | |
# Direct match | |
if journal_name in JOURNAL_ABBREVIATIONS: | |
return JOURNAL_ABBREVIATIONS[journal_name] | |
# Case-insensitive match | |
for full_name, abbrev in JOURNAL_ABBREVIATIONS.items(): | |
if journal_name.lower() == full_name.lower(): | |
return abbrev | |
# Partial match (for slight variations) | |
for full_name, abbrev in JOURNAL_ABBREVIATIONS.items(): | |
if ( | |
journal_name.lower() in full_name.lower() | |
or full_name.lower() in journal_name.lower() | |
): | |
# Only return if it's a significant match (>70% overlap) | |
if ( | |
len(set(journal_name.lower().split()) & set(full_name.lower().split())) | |
/ max(len(journal_name.split()), len(full_name.split())) | |
> 0.7 | |
): | |
return abbrev | |
return None | |
def abbreviate_bib_file(input_file, output_file=None, dry_run=False): | |
"""Process BibTeX file and abbreviate journal names.""" | |
if output_file is None: | |
output_file = input_file.replace(".bib", "_abbreviated.bib") | |
with open(input_file, "r", encoding="utf-8") as f: | |
content = f.read() | |
changes_made = [] | |
modified_content = content | |
# Find all journal and booktitle fields | |
journal_pattern = r"(journal\s*=\s*\{)([^}]+)(\})" | |
booktitle_pattern = r"(booktitle\s*=\s*\{)([^}]+)(\})" | |
def replace_journal(match): | |
prefix, journal_name, suffix = match.groups() | |
abbrev = find_abbreviation(journal_name) | |
if abbrev and abbrev != journal_name: | |
changes_made.append(f"'{journal_name}' -> '{abbrev}'") | |
return prefix + abbrev + suffix | |
return match.group(0) | |
# Replace journal fields | |
modified_content = re.sub( | |
journal_pattern, replace_journal, modified_content, flags=re.IGNORECASE | |
) | |
# Replace booktitle fields | |
modified_content = re.sub( | |
booktitle_pattern, replace_journal, modified_content, flags=re.IGNORECASE | |
) | |
if dry_run: | |
print("DRY RUN - Changes that would be made:") | |
if changes_made: | |
for change in changes_made: | |
print(f" {change}") | |
else: | |
print(" No abbreviations found") | |
return | |
# Write the modified content | |
with open(output_file, "w", encoding="utf-8") as f: | |
f.write(modified_content) | |
print(f"Processed {input_file} -> {output_file}") | |
if changes_made: | |
print(f"Made {len(changes_made)} abbreviations:") | |
for change in changes_made: | |
print(f" {change}") | |
else: | |
print("No journal names were abbreviated (none found in dictionary)") | |
def list_unrecognized_journals(input_file): | |
"""List journal names that don't have abbreviations.""" | |
with open(input_file, "r", encoding="utf-8") as f: | |
content = f.read() | |
unrecognized = set() | |
# Find all journal and booktitle fields | |
journal_matches = re.findall(r"journal\s*=\s*\{([^}]+)\}", content, re.IGNORECASE) | |
booktitle_matches = re.findall( | |
r"booktitle\s*=\s*\{([^}]+)\}", content, re.IGNORECASE | |
) | |
all_journals = journal_matches + booktitle_matches | |
for journal in all_journals: | |
if not find_abbreviation(journal): | |
unrecognized.add(journal) | |
if unrecognized: | |
print("Unrecognized journal names:") | |
for journal in sorted(unrecognized): | |
print(f" '{journal}'") | |
else: | |
print("All journal names have abbreviations available") | |
def main(): | |
parser = argparse.ArgumentParser( | |
description="Abbreviate journal names in BibTeX files" | |
) | |
parser.add_argument("input_file", help="Input .bib file") | |
parser.add_argument( | |
"-o", "--output", help="Output .bib file (default: input_abbreviated.bib)" | |
) | |
parser.add_argument( | |
"--dry-run", | |
action="store_true", | |
help="Show what would be changed without making changes", | |
) | |
parser.add_argument( | |
"--list-unrecognized", | |
action="store_true", | |
help="List journal names without abbreviations", | |
) | |
args = parser.parse_args() | |
if not Path(args.input_file).exists(): | |
print(f"Error: File '{args.input_file}' not found") | |
sys.exit(1) | |
if args.list_unrecognized: | |
list_unrecognized_journals(args.input_file) | |
else: | |
abbreviate_bib_file(args.input_file, args.output, args.dry_run) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment