sunderee · May 4, 2025 04:46
diff --git a/word_counter.py b/word_counter.py
 """
 Markdown Word Counter
 ---------------------
 A simple script to count words in a Markdown file, ignoring Markdown syntax.
 """

 import re
 import sys
 import argparse
 from pathlib import Path


 def clean_markdown(text):
    """
    Remove Markdown syntax elements that shouldn't be counted as words.
    """
    # Remove code blocks
    text = re.sub(r'```[\s\S]*?```', '', text)
    
    # Remove inline code
    text = re.sub(r'`[^`]*`', '', text)
    
    # Remove HTML tags
    text = re.sub(r'<[^>]*>', '', text)
    
    # Remove URLs
    text = re.sub(r'https?://\S+', '', text)
    
    # Remove image references
    text = re.sub(r'!\[.*?\]\(.*?\)', '', text)
    
    # Remove link references but keep the link text
    text = re.sub(r'\[([^\]]*)\]\(.*?\)', r'\1', text)
    
    # Remove headers (# symbols)
    text = re.sub(r'^#+\s+', '', text, flags=re.MULTILINE)
    
    # Remove emphasis markers (* and _) but keep the text
    text = re.sub(r'(\*\*|__)(.*?)\1', r'\2', text)
    text = re.sub(r'(\*|_)(.*?)\1', r'\2', text)
    
    # Remove horizontal rules
    text = re.sub(r'^\s*[-*_]{3,}\s*$', '', text, flags=re.MULTILINE)
    
    # Remove footnote references
    text = re.sub(r'\[\^[^\]]*\]', '', text)
    
    return text


 def count_words(text):
    """
    Count words in the cleaned text.
    """
    # Clean the markdown
    cleaned_text = clean_markdown(text)
    
    # Split by whitespace and count non-empty words
    words = [word for word in re.split(r'\s+', cleaned_text) if word.strip()]
    return len(words)


 def main():
    """
    Main function to handle command line arguments and process the file.
    """
    parser = argparse.ArgumentParser(description='Count words in a Markdown file.')
    parser.add_argument('file', help='Path to the Markdown file')
    parser.add_argument('--details', action='store_true', help='Show detailed statistics')
    args = parser.parse_args()
    
    file_path = Path(args.file)
    
    try:
        if not file_path.exists():
            print(f"Error: File '{file_path}' not found.")
            sys.exit(1)
            
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
            
        word_count = count_words(content)
        
        print(f"\nFile: {file_path}")
        print(f"Word count: {word_count}")
        
        if args.details:
            # Count lines
            line_count = len(content.splitlines())
            
            # Count characters
            char_count = len(content)
            
            # Estimate reading time (average reading speed: 250 words per minute)
            reading_time = word_count / 250
            reading_minutes = int(reading_time)
            reading_seconds = int((reading_time - reading_minutes) * 60)
            
            print(f"Line count: {line_count}")
            print(f"Character count: {char_count}")
            print(f"Estimated reading time: {reading_minutes} min {reading_seconds} sec")
        
    except Exception as e:
        print(f"Error: {e}")
        sys.exit(1)


 if __name__ == "__main__":
    main()
	"""
	Markdown Word Counter
	---------------------
	A simple script to count words in a Markdown file, ignoring Markdown syntax.
	"""

	import re
	import sys
	import argparse
	from pathlib import Path


	def clean_markdown(text):
	"""
	Remove Markdown syntax elements that shouldn't be counted as words.
	"""
	# Remove code blocks
	text = re.sub(r'```[\s\S]*?```', '', text)

	# Remove inline code
	text = re.sub(r'`[^`]*`', '', text)

	# Remove HTML tags
	text = re.sub(r'<[^>]*>', '', text)

	# Remove URLs
	text = re.sub(r'https?://\S+', '', text)

	# Remove image references
	text = re.sub(r'!\[.?\]\(.?\)', '', text)

	# Remove link references but keep the link text
	text = re.sub(r'\[([^\]])\]\(.?\)', r'\1', text)

	# Remove headers (# symbols)
	text = re.sub(r'^#+\s+', '', text, flags=re.MULTILINE)

	# Remove emphasis markers (* and _) but keep the text
	text = re.sub(r'(\\\|__)(.*?)\1', r'\2', text)
	text = re.sub(r'(\\|_)(.?)\1', r'\2', text)

	# Remove horizontal rules
	text = re.sub(r'^\s[-_]{3,}\s*$', '', text, flags=re.MULTILINE)

	# Remove footnote references
	text = re.sub(r'\[\^[^\]]*\]', '', text)

	return text


	def count_words(text):
	"""
	Count words in the cleaned text.
	"""
	# Clean the markdown
	cleaned_text = clean_markdown(text)

	# Split by whitespace and count non-empty words
	words = [word for word in re.split(r'\s+', cleaned_text) if word.strip()]
	return len(words)


	def main():
	"""
	Main function to handle command line arguments and process the file.
	"""
	parser = argparse.ArgumentParser(description='Count words in a Markdown file.')
	parser.add_argument('file', help='Path to the Markdown file')
	parser.add_argument('--details', action='store_true', help='Show detailed statistics')
	args = parser.parse_args()

	file_path = Path(args.file)

	try:
	if not file_path.exists():
	print(f"Error: File '{file_path}' not found.")
	sys.exit(1)

	with open(file_path, 'r', encoding='utf-8') as file:
	content = file.read()

	word_count = count_words(content)

	print(f"\nFile: {file_path}")
	print(f"Word count: {word_count}")

	if args.details:
	# Count lines
	line_count = len(content.splitlines())

	# Count characters
	char_count = len(content)

	# Estimate reading time (average reading speed: 250 words per minute)
	reading_time = word_count / 250
	reading_minutes = int(reading_time)
	reading_seconds = int((reading_time - reading_minutes) * 60)

	print(f"Line count: {line_count}")
	print(f"Character count: {char_count}")
	print(f"Estimated reading time: {reading_minutes} min {reading_seconds} sec")

	except Exception as e:
	print(f"Error: {e}")
	sys.exit(1)


	if __name__ == "__main__":
	main()