Last active
October 13, 2021 13:06
-
-
Save Ladsgroup/ae2aa172e8c5561e2857885d5c4e3a76 to your computer and use it in GitHub Desktop.
Deleting non notable items.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pywikibot | |
import re | |
import random | |
import sys | |
from pywikibot.data.api import Request | |
wikidata = pywikibot.Site('wikidata', 'wikidata') | |
wikidata.login() | |
repo = wikidata.data_repository() | |
report_page = pywikibot.Page(wikidata, 'User:Pasleim/Items for deletion/Page deleted') | |
def check_history(item): | |
sitelinks_added = [] | |
for rev in item.revisions(): | |
if rev['comment'].startswith('/* wbsetsitelink-add:1|'): | |
sitelinks_added.append(rev['comment']) | |
for comment in sitelinks_added: | |
comment = comment.split('/* wbsetsitelink-add:1|')[1] | |
db_name = comment.split(' */')[0].strip() | |
article_title = ' */'.join(comment.split(' */')[1:]).strip() | |
client_site = wikidata.fromDBName(db_name) | |
if not article_title: | |
return False | |
page = pywikibot.Page(client_site, article_title) | |
try: | |
page.get() | |
except: | |
continue | |
else: | |
return False | |
return True | |
for case in re.findall(r'\{\{Q\|Q?(\d+)\}\}\s*\(.+?\)\s*\|\|\s*\d\s*\|\|\s*0\s*\|\|\s*0\s*\|\|\s*\[\[(.+?)\]\]', report_page.get()): | |
qid = case[0] | |
wikipedia_title = '[[{}]]'.format(case[1]) | |
item = pywikibot.ItemPage(repo, 'Q' + qid) | |
try: | |
item.get() | |
except KeyboardInterrupt: | |
raise | |
except: | |
continue | |
if item.sitelinks: | |
continue | |
if list(item.claims): | |
shouldSkip = False | |
for claim in item.claims: | |
if claim not in ['P31']: | |
shouldSkip = True | |
break | |
if shouldSkip: | |
continue | |
backlinked = False | |
for backlink in item.backlinks(): | |
if backlink.namespace() not in [0, 120]: | |
continue | |
backlinked = True | |
if backlinked: | |
continue | |
if not check_history(item): | |
continue | |
#params = { | |
# 'action': 'query', | |
# 'list': 'wbsubscribers', | |
# 'wblsentities':'Q' + qid | |
#} | |
#r = pywikibot.data.api.Request(parameters=params, site=wikidata) | |
#data = r.submit()['query']['subscribers']['Q' + qid]['subscribers'] | |
#if data: | |
# print('Subscribers found','Q' + qid, data) | |
# continue | |
summary = 'Bot: Automatic deletion of an item that used to have a sitelink that is deleted and doesn\'t have any good statement, no backlink in main namespace and no sitelinks.' | |
if item.labels: | |
one_label = random.choice(list(item.labels.values())) | |
summary += ' Label: "' + one_label + '"' | |
summary += ' The last deleted sitelink: ' + wikipedia_title | |
item.delete(summary, prompt=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment