Created
June 25, 2025 05:35
-
-
Save DerfJagged/780de6c45040e77d8fc3b672cf8a2eb4 to your computer and use it in GitHub Desktop.
Python script to tag MediaWiki pages with a category matching the page's namespace.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import requests | |
import mwclient | |
import re | |
import os | |
os.system('cls') | |
# Variables | |
excluded_namespaces = {"MediaWiki", "Media", "Template", "Module", "Topic", "Talk", "User", "File", "Category", "Special", "Translations", "Hidden"} | |
# Set up your MediaWiki API connection | |
site = mwclient.Site('EXAMPLE.org', path='/wiki/') # Replace with your wiki's URL | |
# Authenticate if required | |
site.login(username='BOT_USERNAME_HERE', password='PASSWORD_HERE') | |
###################################### | |
# Ask for what namespace to target | |
target_all = False | |
target_namespace = input(f"\nWhat namespace do you want to change? ['ALL' to update all] ('q' to quit): ") | |
if target_namespace == 'q': | |
exit() | |
if target_namespace == 'ALL': | |
target_all = True | |
# Get list of namespaces | |
namespace_map = {v: k for k, v in site.namespaces.items()} | |
namespace_map = { | |
name: ns_id | |
for ns_id, name in site.namespaces.items() | |
if name and name not in excluded_namespaces and "talk" not in name | |
} | |
namespace_list = list(namespace_map.items()) | |
namespace_index = 0 | |
first_run = True | |
while(1): | |
if target_all: | |
# Make sure index is within range | |
if namespace_index < len(namespace_list): | |
name, id = namespace_list[namespace_index] | |
target_namespace = name | |
target_category = target_namespace | |
namespace_index += 1 | |
else: | |
print("No more namespaces to target") | |
exit() | |
else: | |
# Avoid duplicate question for the first loop | |
if first_run: | |
first_run = False | |
else: | |
target_namespace = input(f"\nWhat namespace do you want to change? ('q' to quit): ") | |
if target_namespace == 'q': | |
exit() | |
target_category = input(f"\nWhat category do you want to change? [" + target_namespace + "]: ") | |
if target_namespace == 'q': | |
exit() | |
if (target_category == ''): | |
target_category = target_namespace | |
print("\nTagging pages in namespace '" + target_namespace + "' with category '" + target_category + "'") | |
# Convert namespace name to ID | |
namespace_id = namespace_map.get(target_namespace) | |
if namespace_id is None: | |
print(f"Unknown namespace: {target_namespace}") | |
continue | |
print(f"Getting all pages in namespace '{target_namespace}'...") | |
namespace_pages = {page.name for page in site.allpages(namespace=namespace_id)} | |
print(f"Getting all pages in category 'Category:{target_category}'...") | |
category_pages = {page.name for page in site.categories[target_category]} | |
# Find delta (pages in namespace that are not in the category) | |
delta_pages = namespace_pages - category_pages | |
print(f"{len(delta_pages)} pages are missing the category.") | |
# for delta_page in delta_pages: # Debug | |
# print(delta_page) | |
# Append category to each delta page | |
for page_name in sorted(delta_pages): | |
# Skip translation pages | |
if '/' in page_name: | |
print(f"Skipping (translation) {page_name}") | |
continue | |
# Get page text | |
page = site.pages[page_name] | |
text = page.text() | |
# Skip redirect pages | |
if '#redirect' in text.lower(): | |
print(f"Skipping (redirect) {page_name}") | |
continue | |
# Skip if already tagged with the category | |
category_pattern = re.compile(rf'\[\[\s*category\s*:\s*{re.escape(target_category)}\s*\]\]', re.IGNORECASE) | |
if category_pattern.search(text): | |
print(f"Skipping (already tagged) {page_name}") | |
continue | |
# Add category tag | |
# print(page) # Debug | |
new_text = text.strip() + f"\n\n[[Category:{target_category}]]" | |
page.save(new_text, summary=f"Adding [[Category:{target_category}]]") | |
print(f"Updated {page_name}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment