Skip to content

Instantly share code, notes, and snippets.

@sunderee
Created May 4, 2025 04:46
Show Gist options
  • Save sunderee/4ac2167ec8740dbcee28afee120e6f48 to your computer and use it in GitHub Desktop.
Save sunderee/4ac2167ec8740dbcee28afee120e6f48 to your computer and use it in GitHub Desktop.
Markdown file word counter
"""
Markdown Word Counter
---------------------
A simple script to count words in a Markdown file, ignoring Markdown syntax.
"""
import re
import sys
import argparse
from pathlib import Path
def clean_markdown(text):
"""
Remove Markdown syntax elements that shouldn't be counted as words.
"""
# Remove code blocks
text = re.sub(r'```[\s\S]*?```', '', text)
# Remove inline code
text = re.sub(r'`[^`]*`', '', text)
# Remove HTML tags
text = re.sub(r'<[^>]*>', '', text)
# Remove URLs
text = re.sub(r'https?://\S+', '', text)
# Remove image references
text = re.sub(r'!\[.*?\]\(.*?\)', '', text)
# Remove link references but keep the link text
text = re.sub(r'\[([^\]]*)\]\(.*?\)', r'\1', text)
# Remove headers (# symbols)
text = re.sub(r'^#+\s+', '', text, flags=re.MULTILINE)
# Remove emphasis markers (* and _) but keep the text
text = re.sub(r'(\*\*|__)(.*?)\1', r'\2', text)
text = re.sub(r'(\*|_)(.*?)\1', r'\2', text)
# Remove horizontal rules
text = re.sub(r'^\s*[-*_]{3,}\s*$', '', text, flags=re.MULTILINE)
# Remove footnote references
text = re.sub(r'\[\^[^\]]*\]', '', text)
return text
def count_words(text):
"""
Count words in the cleaned text.
"""
# Clean the markdown
cleaned_text = clean_markdown(text)
# Split by whitespace and count non-empty words
words = [word for word in re.split(r'\s+', cleaned_text) if word.strip()]
return len(words)
def main():
"""
Main function to handle command line arguments and process the file.
"""
parser = argparse.ArgumentParser(description='Count words in a Markdown file.')
parser.add_argument('file', help='Path to the Markdown file')
parser.add_argument('--details', action='store_true', help='Show detailed statistics')
args = parser.parse_args()
file_path = Path(args.file)
try:
if not file_path.exists():
print(f"Error: File '{file_path}' not found.")
sys.exit(1)
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
word_count = count_words(content)
print(f"\nFile: {file_path}")
print(f"Word count: {word_count}")
if args.details:
# Count lines
line_count = len(content.splitlines())
# Count characters
char_count = len(content)
# Estimate reading time (average reading speed: 250 words per minute)
reading_time = word_count / 250
reading_minutes = int(reading_time)
reading_seconds = int((reading_time - reading_minutes) * 60)
print(f"Line count: {line_count}")
print(f"Character count: {char_count}")
print(f"Estimated reading time: {reading_minutes} min {reading_seconds} sec")
except Exception as e:
print(f"Error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment