Last active
March 30, 2021 06:30
-
-
Save bmispelon/2863a2c71a25765ddd03 to your computer and use it in GitHub Desktop.
Download monthly patron reports from Patreon (CSV).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Download your monthly patron reports from Patreon (CSV). | |
| Requires requests and lxml. | |
| """ | |
| import argparse | |
| from datetime import datetime | |
| from getpass import getpass | |
| import logging | |
| from os import makedirs, path | |
| import re | |
| from urllib.parse import parse_qs, urlparse | |
| from lxml import html | |
| import requests | |
| LOGIN_URL = 'https://api.patreon.com/login' | |
| BASE_URL = 'https://www.patreon.com' | |
| MANAGER_URL = BASE_URL + '/manageRewards' | |
| MONTH_CSV_DOWNLOAD_URL = BASE_URL + '/downloadCsv' | |
| MONTH_CONTAINER_XPATH = '//div[@id="boxGrid"]/div[@class="box mylink"]' | |
| MONTH_LINK_XPATH = './/div[@class="pledge"]/a' | |
| MONTH_TITLE_RE = re.compile('^(?P<month_year>.+) Patreon supported$') | |
| FILENAME_FORMAT = '{:%Y-%m}-Patreon.csv' | |
| def login(email, password): | |
| """ | |
| Log in to patreon.com using the given username/password. | |
| Return a session object with all the right cookies set. | |
| """ | |
| logging.info("Attempting login with email %r...", email) | |
| session = requests.session() | |
| response = session.post(LOGIN_URL, json={"data": {"email": email, "password": password}}) | |
| assert response.status_code == 200 | |
| logging.info("Login successful!") | |
| # TODO: handle bad username/password | |
| return session | |
| def gen_monthly_report_links(session): | |
| """ | |
| Generate download links to the current sessions's monthly CSV reports. | |
| """ | |
| logging.info("Opening patron manager page...") | |
| response = session.get(MANAGER_URL) | |
| assert response.status_code == 200 | |
| logging.info("Done.") | |
| tree = html.fromstring(response.text) | |
| containers = tree.xpath(MONTH_CONTAINER_XPATH) | |
| logging.info("Found %d result(s)", len(containers)) | |
| for container in containers: | |
| container.make_links_absolute(BASE_URL) | |
| anchor_node = container.xpath(MONTH_LINK_XPATH)[0] | |
| hid = _get_hid_from_url(anchor_node.attrib['href']) | |
| month_datetime = _get_datetime_from_title(anchor_node.text_content()) | |
| filename = FILENAME_FORMAT.format(month_datetime) | |
| url = _get_full_url(MONTH_CSV_DOWNLOAD_URL, params={'hid': hid}) | |
| yield filename, url | |
| def _get_full_url(url, params): | |
| """ | |
| Combined the given URL and query parameters into a final URL string. | |
| """ | |
| r = requests.Request('GET', url, params=params) | |
| return r.prepare().url | |
| def _get_hid_from_url(url): | |
| """ | |
| Return the hid (Patreon's month report ID) parameter of the given URL. | |
| """ | |
| parsed_url = urlparse(url) | |
| parsed_query = parse_qs(parsed_url.query) | |
| return parsed_query['hid'] | |
| def _get_datetime_from_title(title): | |
| match = MONTH_TITLE_RE.search(title.strip()) | |
| assert match is not None | |
| date_format = '%B %Y' | |
| return datetime.strptime(match.group('month_year'), date_format) | |
| def _download(session, url, filename): | |
| """ | |
| Download the file at the given URL and save it to the given filename. | |
| To save to a different folder, simply include the path in the filename. | |
| This reads the whole file into memory so it's not suitable for really big | |
| files. | |
| """ | |
| logging.info("Downloading file at %r...", url) | |
| response = session.get(url) | |
| assert response.status_code == 200 | |
| logging.info("Downloaded!") | |
| logging.info("Writing downloaded file to %r...", filename) | |
| with open(filename, 'wb') as f: | |
| f.write(response.content) | |
| logging.info("File written!") | |
| def get_parser(): | |
| parser = argparse.ArgumentParser(description="Download Patreon's monthly CSV reports.") | |
| parser.add_argument('-u', '--username', help="Patreon username") | |
| parser.add_argument('-p', '--password', help="Patreon password") | |
| parser.add_argument('-d', '--directory', help="save CSV reports to DIRECTORY", default='.') | |
| parser.add_argument('-v', '--verbose', help="show debug messages", action='store_true') | |
| return parser | |
| def get_options(): | |
| parser = get_parser() | |
| args = parser.parse_args() | |
| return { | |
| 'username': args.username or input("Patreon username: "), | |
| 'password': args.password or getpass("Patreon password: "), | |
| 'directory': args.directory, | |
| 'verbose': args.verbose, | |
| } | |
| if __name__ == '__main__': | |
| options = get_options() | |
| if options['verbose']: | |
| logging.basicConfig(level=logging.INFO) | |
| session = login(options['username'], options['password']) | |
| for filename, url in gen_monthly_report_links(session): | |
| _download(session, url, path.join(options['directory'], filename)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment