Skip to content

Instantly share code, notes, and snippets.

@traverseda
Last active January 26, 2019 21:47
Show Gist options
  • Save traverseda/ab48ca36f734f935302457f79d68a51c to your computer and use it in GitHub Desktop.
Save traverseda/ab48ca36f734f935302457f79d68a51c to your computer and use it in GitHub Desktop.
Archive bbcode on sufficient velocity.
import requests, lxml
from lxml.etree import tostring
import lxml.html
import html2text
bbUrl = "https://forums.sufficientvelocity.com/posts/{postId}/quote"
session = requests.Session()
session.headers.update({'User-Agent': 'WithThisRing downloader'})
for i in range(1,90):
source=f"https://forums.sufficientvelocity.com/threads/with-this-ring-young-justice-si-story-only.25076/page-{i}"
print("--------- starting page",i)
root = lxml.html.document_fromstring(session.get(source).text)
for a in root.xpath("//ol[@id='messageList']/li"):
postId = a.get('id').lstrip('post-')
response = session.post(bbUrl.format(postId=postId),data={'_xfResponseType':"json"})
open(f'story/{postId}.bbcode','w+').write(response.json()['quote'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment