Foadsf · May 6, 2024 19:07
diff --git a/analyze_latex.py b/analyze_latex.py
 from pylatexenc.latex2text import LatexNodes2Text
 from nltk.probability import FreqDist
 from nltk.corpus import stopwords
 import argparse
 import nltk


 def analyze_word_frequency(filename, words_to_check):
    """Analyzes word frequency in a LaTeX document.

    Args:
        filename (str): Path to the LaTeX file.
        words_to_check (list): List of words to check frequency for.

    Returns:
        dict: A dictionary containing the frequency of the words_to_check.
    """
    with open(filename, "r", encoding="utf-8") as f:
        latex_code = f.read()

    # Convert LaTeX to plain text
    converter = LatexNodes2Text()
    plain_text = converter.latex_to_text(latex_code)

    # Tokenize and lowercase the text
    tokens = nltk.word_tokenize(plain_text.lower())

    # Remove stopwords
    stop_words = set(stopwords.words("english"))
    filtered_tokens = [w for w in tokens if w not in stop_words and w.isalnum()]

    # Calculate word frequencies
    word_dist = FreqDist(filtered_tokens)

    # Extract frequencies for words_to_check
    word_counts = {word: word_dist[word] for word in words_to_check}
    return word_counts


 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Analyze word frequency in LaTeX documents."
    )
    parser.add_argument("filename", help="Path to the LaTeX file.")
    parser.add_argument(
        "-w", "--words", nargs="+", help="Words to check frequency for."
    )
    args = parser.parse_args()

    word_counts = analyze_word_frequency(args.filename, args.words)
    print(word_counts)
	from pylatexenc.latex2text import LatexNodes2Text
	from nltk.probability import FreqDist
	from nltk.corpus import stopwords
	import argparse
	import nltk


	def analyze_word_frequency(filename, words_to_check):
	"""Analyzes word frequency in a LaTeX document.

	Args:
	filename (str): Path to the LaTeX file.
	words_to_check (list): List of words to check frequency for.

	Returns:
	dict: A dictionary containing the frequency of the words_to_check.
	"""
	with open(filename, "r", encoding="utf-8") as f:
	latex_code = f.read()

	# Convert LaTeX to plain text
	converter = LatexNodes2Text()
	plain_text = converter.latex_to_text(latex_code)

	# Tokenize and lowercase the text
	tokens = nltk.word_tokenize(plain_text.lower())

	# Remove stopwords
	stop_words = set(stopwords.words("english"))
	filtered_tokens = [w for w in tokens if w not in stop_words and w.isalnum()]

	# Calculate word frequencies
	word_dist = FreqDist(filtered_tokens)

	# Extract frequencies for words_to_check
	word_counts = {word: word_dist[word] for word in words_to_check}
	return word_counts


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(
	description="Analyze word frequency in LaTeX documents."
	)
	parser.add_argument("filename", help="Path to the LaTeX file.")
	parser.add_argument(
	"-w", "--words", nargs="+", help="Words to check frequency for."
	)
	args = parser.parse_args()

	word_counts = analyze_word_frequency(args.filename, args.words)
	print(word_counts)