maaduukaar · August 25, 2025 15:33 · maaduukaar · Aug 25, 2025
diff --git a/keyword-checker.py b/keyword-checker.py
 import requests
 from bs4 import BeautifulSoup
 import pandas as pd
 import os
 import argparse
 import re

 # CLI arguments
 parser = argparse.ArgumentParser(description="Check WordPress posts for keywords.")
 parser.add_argument("-w", "--whole", action="store_true", help="Match whole words only")
 args = parser.parse_args()

 # read links from links.txt
 with open("links.txt", "r", encoding="utf-8") as f:
    links = [line.strip() for line in f if line.strip()]

 # read keywords from keywords.txt and convert to lowercase
 with open("keywords.txt", "r", encoding="utf-8") as f:
    keywords = [line.strip().lower() for line in f if line.strip()]

 results = []

 for i, url in enumerate(links, start=1):
    print(f"\n[{i}/{len(links)}] Checking: {url}")
    try:
        response = requests.get(url, timeout=10)
        status_code = response.status_code
        content_length = len(response.content)

        print(f"  → Status code: {status_code}")
        print(f"  → Content size: {content_length} bytes")

        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")

        # get plain text from the article
        text = soup.get_text(separator=" ").lower()

        # keyword search
        found = []
        for word in keywords:
            if args.whole:
                # match whole word only
                if re.search(rf"\b{re.escape(word)}\b", text):
                    found.append(word)
            else:
                # substring match
                if word in text:
                    found.append(word)

        if found:
            print(f"  → Found: {', '.join(found)}")
        else:
            print("  → No keywords found")

        results.append({
            "URL": url,
            "Status code": status_code,
            "Content size (bytes)": content_length,
            "Found keywords": ", ".join(found) if found else "—"
        })

    except Exception as e:
        print(f"  → Error: {e}")
        results.append({
            "URL": url,
            "Status code": "Error",
            "Content size (bytes)": "—",
            "Found keywords": f"Error: {e}"
        })

 # check if result.xlsx already exists, if yes -> create a new one with suffix
 output_file = "result.xlsx"
 if os.path.exists(output_file):
    base, ext = os.path.splitext(output_file)
    counter = 1
    while os.path.exists(f"{base}_{counter}{ext}"):
        counter += 1
    output_file = f"{base}_{counter}{ext}"

 # save results to Excel
 df = pd.DataFrame(results)
 df.to_excel(output_file, index=False)

 print(f"\nDone ✅ Results saved to {output_file}")
	import requests
	from bs4 import BeautifulSoup
	import pandas as pd
	import os
	import argparse
	import re

	# CLI arguments
	parser = argparse.ArgumentParser(description="Check WordPress posts for keywords.")
	parser.add_argument("-w", "--whole", action="store_true", help="Match whole words only")
	args = parser.parse_args()

	# read links from links.txt
	with open("links.txt", "r", encoding="utf-8") as f:
	links = [line.strip() for line in f if line.strip()]

	# read keywords from keywords.txt and convert to lowercase
	with open("keywords.txt", "r", encoding="utf-8") as f:
	keywords = [line.strip().lower() for line in f if line.strip()]

	results = []

	for i, url in enumerate(links, start=1):
	print(f"\n[{i}/{len(links)}] Checking: {url}")
	try:
	response = requests.get(url, timeout=10)
	status_code = response.status_code
	content_length = len(response.content)

	print(f" → Status code: {status_code}")
	print(f" → Content size: {content_length} bytes")

	response.raise_for_status()
	soup = BeautifulSoup(response.text, "html.parser")

	# get plain text from the article
	text = soup.get_text(separator=" ").lower()

	# keyword search
	found = []
	for word in keywords:
	if args.whole:
	# match whole word only
	if re.search(rf"\b{re.escape(word)}\b", text):
	found.append(word)
	else:
	# substring match
	if word in text:
	found.append(word)

	if found:
	print(f" → Found: {', '.join(found)}")
	else:
	print(" → No keywords found")

	results.append({
	"URL": url,
	"Status code": status_code,
	"Content size (bytes)": content_length,
	"Found keywords": ", ".join(found) if found else "—"
	})

	except Exception as e:
	print(f" → Error: {e}")
	results.append({
	"URL": url,
	"Status code": "Error",
	"Content size (bytes)": "—",
	"Found keywords": f"Error: {e}"
	})

	# check if result.xlsx already exists, if yes -> create a new one with suffix
	output_file = "result.xlsx"
	if os.path.exists(output_file):
	base, ext = os.path.splitext(output_file)
	counter = 1
	while os.path.exists(f"{base}_{counter}{ext}"):
	counter += 1
	output_file = f"{base}_{counter}{ext}"

	# save results to Excel
	df = pd.DataFrame(results)
	df.to_excel(output_file, index=False)

	print(f"\nDone ✅ Results saved to {output_file}")