Created
January 5, 2024 19:48
-
-
Save Shiroizu/b571ea87cd508cc050338d8fe654e596 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' Output (sorted by date): | |
λ "VocaDB Wiki revision history.py" | |
Found 77 pages | |
2024-01-03 10:22:51+00:00: wiki/59 'Content policy' (version 28) edit by Shiroizu | |
2024-01-01 13:54:17+00:00: wiki/86 'Album entry editing' (version 11) edit by Shiroizu | |
2024-01-01 13:54:02+00:00: wiki/83 'Artist entry editing' (version 19) edit by Shiroizu | |
2024-01-01 13:53:30+00:00: wiki/89 'Song entry editing ' (version 19) edit by Shiroizu | |
... | |
2023-02-16 03:32:05+00:00: wiki/53 'Management guidelines / golden rules' (version 24) edit by andreoda | |
2023-02-16 03:31:50+00:00: wiki/29 'License' (version 14) edit by andreoda | |
2023-02-16 01:56:24+00:00: wiki/22 'VocaDB domains & login process' (version 6) edit by andreoda | |
''' | |
import requests | |
import time | |
from bs4 import BeautifulSoup | |
from datetime import datetime | |
all_wiki_pages_url = "https://wiki.vocadb.net/pages/allpages" | |
page_edits = [] # edit_date, wiki_page_title, wiki_page_id, version, editor | |
r = requests.get(all_wiki_pages_url) | |
all_wiki_pages = BeautifulSoup(r.text, "html.parser").select("table td.pagename a") | |
print(f"Found {len(all_wiki_pages)} pages\n") | |
for wiki_page_link in all_wiki_pages: | |
# <a href="/wiki/62/artist-merging-and-splitting-guidelines"> Artist merging and splitting guidelines</a> | |
wiki_page_title = wiki_page_link.text | |
wiki_page_id = str(wiki_page_link).split("/wiki/")[1].split("/")[0] | |
wiki_page_history_url = f"https://wiki.vocadb.net/pages/history/{wiki_page_id}" | |
r = requests.get(wiki_page_history_url) | |
most_recent_edit = BeautifulSoup(r.text, "html.parser").select( | |
"tbody tr:first-child td" | |
) | |
version, edit_date, editor, _ = most_recent_edit | |
edit_date = datetime.fromisoformat(edit_date.text) | |
# print(f"Version {version.text}, {edit_date}, by {editor.text}") | |
page_edits.append( | |
[edit_date, wiki_page_title, wiki_page_id, version.text, editor.text] | |
) | |
time.sleep(0.5) | |
page_edits.sort(key=lambda x: x[0], reverse=True) | |
for page_edit in page_edits: | |
edit_date, wiki_page_title, wiki_page_id, version, editor = page_edit | |
print( | |
f"{edit_date}: wiki/{wiki_page_id} '{wiki_page_title}' (version {version}) edit by {editor}" | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment