Skip to content

Instantly share code, notes, and snippets.

@Ghost---Shadow
Created May 24, 2025 01:11
Show Gist options
  • Save Ghost---Shadow/2b7e424710f7bd86f8412f9395d0fd77 to your computer and use it in GitHub Desktop.
Save Ghost---Shadow/2b7e424710f7bd86f8412f9395d0fd77 to your computer and use it in GitHub Desktop.
BibTeX journal name abbreviator - automatically converts full journal names to standard IEEE/ACM abbreviations to fix bibliography overflow issues in LaTeX documents.
#!/usr/bin/env python3
"""
BibTeX Journal Abbreviator
This script reads a .bib file and replaces full journal names with their
standard abbreviations based on IEEE, ACM, and other common abbreviations.
"""
import re
import sys
import argparse
from pathlib import Path
# Comprehensive journal abbreviation dictionary
JOURNAL_ABBREVIATIONS = {
# IEEE Journals
"IEEE Transactions on Pattern Analysis and Machine Intelligence": "IEEE Trans. Pattern Anal. Mach. Intell.",
"IEEE Transactions on Neural Networks": "IEEE Trans. Neural Netw.",
"IEEE Transactions on Neural Networks and Learning Systems": "IEEE Trans. Neural Netw. Learn. Syst.",
"IEEE Transactions on Knowledge and Data Engineering": "IEEE Trans. Knowl. Data Eng.",
"IEEE Transactions on Software Engineering": "IEEE Trans. Softw. Eng.",
"IEEE Transactions on Computers": "IEEE Trans. Comput.",
"IEEE Transactions on Information Theory": "IEEE Trans. Inf. Theory",
"IEEE Transactions on Signal Processing": "IEEE Trans. Signal Process.",
"IEEE Transactions on Image Processing": "IEEE Trans. Image Process.",
"IEEE Transactions on Multimedia": "IEEE Trans. Multimedia",
"IEEE Transactions on Visualization and Computer Graphics": "IEEE Trans. Vis. Comput. Graph.",
"IEEE Computer Graphics and Applications": "IEEE Comput. Graph. Appl.",
"IEEE Internet Computing": "IEEE Internet Comput.",
"IEEE Software": "IEEE Softw.",
"IEEE Computer": "IEEE Computer",
"Proceedings of the IEEE": "Proc. IEEE",
# IEEE Conferences
"2019 IEEE Winter Conference on Applications of Computer Vision (WACV)": "WACV",
"2023 38th IEEE/ACM International Conference on Automated Software Engineering (ASE)": "ASE",
"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)": "ICASSP",
"IEEE Games Entertainment Media Conference": "IEEE GEM",
# Nature/Science
"Nature": "Nature",
"Nature Machine Intelligence": "Nat. Mach. Intell.",
"Nature Communications": "Nat. Commun.",
"Nature Methods": "Nat. Methods",
"Science": "Science",
"Science Advances": "Sci. Adv.",
# ACM Journals
"ACM Transactions on Graphics": "ACM Trans. Graph.",
"ACM Transactions on Computer Systems": "ACM Trans. Comput. Syst.",
"ACM Transactions on Database Systems": "ACM Trans. Database Syst.",
"ACM Transactions on Information Systems": "ACM Trans. Inf. Syst.",
"ACM Computing Surveys": "ACM Comput. Surv.",
"Communications of the ACM": "Commun. ACM",
"Annual International ACM SIGIR Conference on Research and Development in Information Retrieval": "SIGIR",
# Machine Learning Journals
"Journal of Machine Learning Research": "J. Mach. Learn. Res.",
"J. Mach. Learn. Res.": "J. Mach. Learn. Res.", # Already abbreviated
"Machine Learning": "Mach. Learn.",
"Neural Networks": "Neural Netw.",
"Neural Computation": "Neural Comput.",
"Artificial Intelligence": "Artif. Intell.",
"Journal of Artificial Intelligence Research": "J. Artif. Intell. Res.",
"Found. Trends Mach. Learn.": "Found. Trends Mach. Learn.", # Already abbreviated
# Computer Vision
"International Journal of Computer Vision": "Int. J. Comput. Vis.",
"Computer Vision and Image Understanding": "Comput. Vis. Image Underst.",
"Pattern Recognition": "Pattern Recognit.",
"Pattern Recognition Letters": "Pattern Recognit. Lett.",
"CVPR 2011": "CVPR",
# NLP/Computational Linguistics
"Computational Linguistics": "Comput. Linguist.",
"Natural Language Engineering": "Nat. Lang. Eng.",
"Language Resources and Evaluation": "Lang. Resour. Eval.",
"Annual Meeting of the Association for Computational Linguistics": "ACL",
"Conference of the European Chapter of the Association for Computational Linguistics": "EACL",
"North American Chapter of the Association for Computational Linguistics": "NAACL",
"NAACL-HLT": "NAACL-HLT",
"Transactions of the Association for Computational Linguistics": "Trans. ACL",
"International Conference on Computational Linguistics": "COLING",
"International Conference on Language Resources and Evaluation": "LREC",
"Findings": "Findings", # Keep as is (usually "Findings of ACL/EMNLP")
# Conferences (often kept as proceedings)
"International Conference on Machine Learning": "ICML",
"Conference on Neural Information Processing Systems": "NeurIPS",
"Neural Information Processing Systems": "NeurIPS",
"International Conference on Learning Representations": "ICLR",
"Association for Computational Linguistics": "ACL",
"Conference on Empirical Methods in Natural Language Processing": "EMNLP",
"Conference on Computer Vision and Pattern Recognition": "CVPR",
"International Conference on Computer Vision": "ICCV",
"European Conference on Computer Vision": "ECCV",
"AAAI Conference on Artificial Intelligence": "AAAI",
"AAAI/IAAI, Vol. 2": "AAAI",
"International Joint Conference on Artificial Intelligence": "IJCAI",
"ACM SIGMOD International Conference on Management of Data": "SIGMOD",
"ACM SIGKDD International Conference on Knowledge Discovery and Data Mining": "KDD",
"International World Wide Web Conference": "WWW",
"ACM Conference on Human Factors in Computing Systems": "CHI",
"International Conference on Artificial Intelligence and Statistics": "AISTATS",
"International Conference on Algorithmic Learning Theory": "ALT",
"Conference on Uncertainty in Artificial Intelligence": "UAI",
"The International FLAIRS Conference Proceedings": "FLAIRS",
# Workshops and Special Venues
"International Workshop on Similarity-Based Pattern Recognition": "S+SSPR",
"Machine Learning Challenges Workshop": "ML Challenges Workshop",
"Proceedings of the Workshop on Structured and Unstructured Knowledge Integration (SUKI)": "SUKI Workshop",
"Workshop on Document-grounded Dialogue and Conversational Question Answering": "Doc2Dial Workshop",
# arXiv and preprints
"arXiv preprint": "arXiv",
"ArXiv": "arXiv",
"arXiv: Computation and Language": "arXiv",
# Other Journals
"Complex.": "Complex.", # Already abbreviated (Complexity journal)
"Mathematical Programming": "Math. Program.",
"Semantic Web": "Semant. Web",
"The Scientific World Journal": "Sci. World J.",
"Frontiers Robotics AI": "Front. Robot. AI",
# Other common journals
"Journal of the American Statistical Association": "J. Am. Stat. Assoc.",
"Bioinformatics": "Bioinformatics",
"PLOS ONE": "PLOS ONE",
"Scientific Reports": "Sci. Rep.",
"Applied Mathematics and Computation": "Appl. Math. Comput.",
"Information Sciences": "Inf. Sci.",
"Knowledge-Based Systems": "Knowl.-Based Syst.",
"Expert Systems with Applications": "Expert Syst. Appl.",
}
def parse_bib_file(file_path):
"""Parse a BibTeX file and return entries."""
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
# Split into entries
entries = re.split(r"@\w+\s*\{", content)
parsed_entries = []
for i, entry in enumerate(entries):
if i == 0: # Skip the part before first entry
continue
# Find the entry type from the previous split
entry_match = re.search(r"@(\w+)\s*\{", content)
if entry_match:
entry_type = entry_match.group(1)
# Extract the entry key and fields
lines = entry.split("\n")
if lines:
key_line = lines[0].strip()
key = key_line.split(",")[0] if "," in key_line else key_line.rstrip("}")
parsed_entries.append(
{
"type": entry_type if "entry_type" in locals() else "article",
"key": key,
"content": entry,
}
)
return parsed_entries
def extract_journal_field(entry_content):
"""Extract journal field from BibTeX entry."""
# Look for journal field (case insensitive)
journal_match = re.search(
r"journal\s*=\s*\{([^}]+)\}", entry_content, re.IGNORECASE
)
if journal_match:
return journal_match.group(1)
# Also check for booktitle field (for conference proceedings)
booktitle_match = re.search(
r"booktitle\s*=\s*\{([^}]+)\}", entry_content, re.IGNORECASE
)
if booktitle_match:
return booktitle_match.group(1)
return None
def find_abbreviation(journal_name):
"""Find abbreviation for a journal name."""
# Direct match
if journal_name in JOURNAL_ABBREVIATIONS:
return JOURNAL_ABBREVIATIONS[journal_name]
# Case-insensitive match
for full_name, abbrev in JOURNAL_ABBREVIATIONS.items():
if journal_name.lower() == full_name.lower():
return abbrev
# Partial match (for slight variations)
for full_name, abbrev in JOURNAL_ABBREVIATIONS.items():
if (
journal_name.lower() in full_name.lower()
or full_name.lower() in journal_name.lower()
):
# Only return if it's a significant match (>70% overlap)
if (
len(set(journal_name.lower().split()) & set(full_name.lower().split()))
/ max(len(journal_name.split()), len(full_name.split()))
> 0.7
):
return abbrev
return None
def abbreviate_bib_file(input_file, output_file=None, dry_run=False):
"""Process BibTeX file and abbreviate journal names."""
if output_file is None:
output_file = input_file.replace(".bib", "_abbreviated.bib")
with open(input_file, "r", encoding="utf-8") as f:
content = f.read()
changes_made = []
modified_content = content
# Find all journal and booktitle fields
journal_pattern = r"(journal\s*=\s*\{)([^}]+)(\})"
booktitle_pattern = r"(booktitle\s*=\s*\{)([^}]+)(\})"
def replace_journal(match):
prefix, journal_name, suffix = match.groups()
abbrev = find_abbreviation(journal_name)
if abbrev and abbrev != journal_name:
changes_made.append(f"'{journal_name}' -> '{abbrev}'")
return prefix + abbrev + suffix
return match.group(0)
# Replace journal fields
modified_content = re.sub(
journal_pattern, replace_journal, modified_content, flags=re.IGNORECASE
)
# Replace booktitle fields
modified_content = re.sub(
booktitle_pattern, replace_journal, modified_content, flags=re.IGNORECASE
)
if dry_run:
print("DRY RUN - Changes that would be made:")
if changes_made:
for change in changes_made:
print(f" {change}")
else:
print(" No abbreviations found")
return
# Write the modified content
with open(output_file, "w", encoding="utf-8") as f:
f.write(modified_content)
print(f"Processed {input_file} -> {output_file}")
if changes_made:
print(f"Made {len(changes_made)} abbreviations:")
for change in changes_made:
print(f" {change}")
else:
print("No journal names were abbreviated (none found in dictionary)")
def list_unrecognized_journals(input_file):
"""List journal names that don't have abbreviations."""
with open(input_file, "r", encoding="utf-8") as f:
content = f.read()
unrecognized = set()
# Find all journal and booktitle fields
journal_matches = re.findall(r"journal\s*=\s*\{([^}]+)\}", content, re.IGNORECASE)
booktitle_matches = re.findall(
r"booktitle\s*=\s*\{([^}]+)\}", content, re.IGNORECASE
)
all_journals = journal_matches + booktitle_matches
for journal in all_journals:
if not find_abbreviation(journal):
unrecognized.add(journal)
if unrecognized:
print("Unrecognized journal names:")
for journal in sorted(unrecognized):
print(f" '{journal}'")
else:
print("All journal names have abbreviations available")
def main():
parser = argparse.ArgumentParser(
description="Abbreviate journal names in BibTeX files"
)
parser.add_argument("input_file", help="Input .bib file")
parser.add_argument(
"-o", "--output", help="Output .bib file (default: input_abbreviated.bib)"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Show what would be changed without making changes",
)
parser.add_argument(
"--list-unrecognized",
action="store_true",
help="List journal names without abbreviations",
)
args = parser.parse_args()
if not Path(args.input_file).exists():
print(f"Error: File '{args.input_file}' not found")
sys.exit(1)
if args.list_unrecognized:
list_unrecognized_journals(args.input_file)
else:
abbreviate_bib_file(args.input_file, args.output, args.dry_run)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment