Skip to content

Instantly share code, notes, and snippets.

@DerfJagged
Created June 25, 2025 05:35
Show Gist options
  • Save DerfJagged/780de6c45040e77d8fc3b672cf8a2eb4 to your computer and use it in GitHub Desktop.
Save DerfJagged/780de6c45040e77d8fc3b672cf8a2eb4 to your computer and use it in GitHub Desktop.
Python script to tag MediaWiki pages with a category matching the page's namespace.
#!/usr/bin/python3
import requests
import mwclient
import re
import os
os.system('cls')
# Variables
excluded_namespaces = {"MediaWiki", "Media", "Template", "Module", "Topic", "Talk", "User", "File", "Category", "Special", "Translations", "Hidden"}
# Set up your MediaWiki API connection
site = mwclient.Site('EXAMPLE.org', path='/wiki/') # Replace with your wiki's URL
# Authenticate if required
site.login(username='BOT_USERNAME_HERE', password='PASSWORD_HERE')
######################################
# Ask for what namespace to target
target_all = False
target_namespace = input(f"\nWhat namespace do you want to change? ['ALL' to update all] ('q' to quit): ")
if target_namespace == 'q':
exit()
if target_namespace == 'ALL':
target_all = True
# Get list of namespaces
namespace_map = {v: k for k, v in site.namespaces.items()}
namespace_map = {
name: ns_id
for ns_id, name in site.namespaces.items()
if name and name not in excluded_namespaces and "talk" not in name
}
namespace_list = list(namespace_map.items())
namespace_index = 0
first_run = True
while(1):
if target_all:
# Make sure index is within range
if namespace_index < len(namespace_list):
name, id = namespace_list[namespace_index]
target_namespace = name
target_category = target_namespace
namespace_index += 1
else:
print("No more namespaces to target")
exit()
else:
# Avoid duplicate question for the first loop
if first_run:
first_run = False
else:
target_namespace = input(f"\nWhat namespace do you want to change? ('q' to quit): ")
if target_namespace == 'q':
exit()
target_category = input(f"\nWhat category do you want to change? [" + target_namespace + "]: ")
if target_namespace == 'q':
exit()
if (target_category == ''):
target_category = target_namespace
print("\nTagging pages in namespace '" + target_namespace + "' with category '" + target_category + "'")
# Convert namespace name to ID
namespace_id = namespace_map.get(target_namespace)
if namespace_id is None:
print(f"Unknown namespace: {target_namespace}")
continue
print(f"Getting all pages in namespace '{target_namespace}'...")
namespace_pages = {page.name for page in site.allpages(namespace=namespace_id)}
print(f"Getting all pages in category 'Category:{target_category}'...")
category_pages = {page.name for page in site.categories[target_category]}
# Find delta (pages in namespace that are not in the category)
delta_pages = namespace_pages - category_pages
print(f"{len(delta_pages)} pages are missing the category.")
# for delta_page in delta_pages: # Debug
# print(delta_page)
# Append category to each delta page
for page_name in sorted(delta_pages):
# Skip translation pages
if '/' in page_name:
print(f"Skipping (translation) {page_name}")
continue
# Get page text
page = site.pages[page_name]
text = page.text()
# Skip redirect pages
if '#redirect' in text.lower():
print(f"Skipping (redirect) {page_name}")
continue
# Skip if already tagged with the category
category_pattern = re.compile(rf'\[\[\s*category\s*:\s*{re.escape(target_category)}\s*\]\]', re.IGNORECASE)
if category_pattern.search(text):
print(f"Skipping (already tagged) {page_name}")
continue
# Add category tag
# print(page) # Debug
new_text = text.strip() + f"\n\n[[Category:{target_category}]]"
page.save(new_text, summary=f"Adding [[Category:{target_category}]]")
print(f"Updated {page_name}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment