Last active
July 9, 2018 13:58
-
-
Save Xowap/27979b779d971e8109f52997886434c4 to your computer and use it in GitHub Desktop.
Fix pyppeteer download location
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This is a monkey patch of the requests-html module so it can download chrome | |
and store its data in a custom location and not in the home directory (which | |
is not writable on servers and also we do not want chrome stored there). | |
""" | |
from os.path import ( | |
join, | |
) | |
from pathlib import ( | |
Path, | |
) | |
from sys import ( | |
modules, | |
) | |
from django.conf import ( | |
settings, | |
) | |
def clean_modules(): | |
""" | |
Unloads any requests_html/pyppeteer module in order to make sure that | |
everything is clean and that we can override things. Also used when the | |
patching is done to get things back to their normal state in the outside | |
world. | |
""" | |
to_delete = [] | |
for module in modules.keys(): | |
is_interesting = \ | |
module == 'requests_html' \ | |
or module.startswith('requests_html.') \ | |
or module == 'pyppeteer' \ | |
or module.startswith('pyppeteer.') | |
if is_interesting: | |
to_delete.append(module) | |
for module in to_delete: | |
del modules[module] | |
def set_download_paths(m): | |
""" | |
Patch paths in the Puppeteer module. | |
""" | |
m.DOWNLOADS_FOLDER = Path(settings.PYPPETEER_DIR) / 'local-chromium' | |
m.chromiumExecutable = { | |
'linux': m.DOWNLOADS_FOLDER / m.REVISION / 'chrome-linux' / 'chrome', | |
'mac': (m.DOWNLOADS_FOLDER / m.REVISION / 'chrome-mac' / | |
'Chromium.app' / 'Contents' / 'MacOS' / 'Chromium'), | |
'win32': (m.DOWNLOADS_FOLDER / m.REVISION / 'chrome-win32' / | |
'chrome.exe'), | |
'win64': (m.DOWNLOADS_FOLDER / m.REVISION / 'chrome-win32' / | |
'chrome.exe'), | |
} | |
def make_html_session(): | |
""" | |
Applies a series of monkey patch to the pyppeteer module in order to get | |
it to download its files to an arbitrary location. Returns an HTMLSession | |
version which has everything in place. Also cleans up so if you import the | |
lib for real and not through this module you'll get the default behaviour. | |
""" | |
clean_modules() | |
from pyppeteer import chromium_downloader | |
set_download_paths(chromium_downloader) | |
import pyppeteer | |
original_launch = pyppeteer.launch | |
def fixed_launch(*args, **kwargs): | |
kwargs['userDataDir'] = join(settings.PYPPETEER_DIR, 'data') | |
return original_launch(*args, **kwargs) | |
pyppeteer.launch = fixed_launch | |
from requests_html import HTMLSession | |
clean_modules() | |
return HTMLSession | |
HTMLSession = make_html_session() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment