Instantly share code, notes, and snippets.
Created
April 22, 2021 03:41
-
Star
0
(0)
You must be signed in to star a gist -
Fork
0
(0)
You must be signed in to fork a gist
-
Save kakarukeys/bbfabee1298bf0bc5ffc15b1cb36feb9 to your computer and use it in GitHub Desktop.
borrow_cookies.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import json | |
import logging | |
import os.path | |
import subprocess | |
import time | |
from http.cookiejar import LWPCookieJar | |
from urllib.parse import urlparse | |
from selenium import webdriver | |
from selenium.webdriver.common.action_chains import ActionChains | |
from selenium.webdriver.common.by import By | |
import browser_cookie3 | |
import click | |
from miners.logging import configure_logging | |
logger = logging.getLogger(__name__) | |
LOGIN_URL = "https://login.aliexpress.com/?flag=1&return_url=http%3A%2F%2Fhome.aliexpress.com%2Fdropshippercenter%2Fproduct_find.htm" | |
VISIT_URLS = { | |
"hot-selling": None, | |
"rev-img-search": "https://home.aliexpress.com/dropshippercenter/product_find.htm#/search", | |
} | |
def login_aliexpress(driver, username, password, url=None): | |
""" login to aliexpress with <username>, <password> on <driver> | |
visit <url> if passed | |
""" | |
driver.get(LOGIN_URL) | |
time.sleep(3) | |
driver.set_window_size(1248, 692) | |
time.sleep(1) | |
element = driver.find_element(By.CSS_SELECTOR, ".fm-button") | |
actions = ActionChains(driver) | |
actions.move_to_element(element).perform() | |
time.sleep(1) | |
driver.find_element(By.CSS_SELECTOR, ".fm-button").click() | |
time.sleep(2) | |
driver.find_element(By.ID, "fm-login-id").click() | |
time.sleep(1) | |
driver.find_element(By.ID, "fm-login-id").send_keys(username) | |
time.sleep(1) | |
driver.find_element(By.ID, "fm-login-password").send_keys(password) | |
time.sleep(1) | |
driver.find_element(By.CSS_SELECTOR, ".fm-button").click() | |
time.sleep(7) | |
if url: | |
driver.get(url) | |
def get_cookies_path(driver): | |
""" get the path to the cookies sqlite database """ | |
driver.get("about:support") | |
profile_dir = driver.find_element_by_id("profile-dir-box").text | |
return os.path.join(profile_dir, "cookies.sqlite") | |
def extract_cookies(domain, output_filename, cookie_file=None): | |
""" extract cookies for <domain> from Firefox, return a cookie jar | |
output_filename: cookie jar output filename | |
cookie_file: path to the cookies sqlite database, if omitted, | |
default to desktop browser cookie file | |
""" | |
allowed_domains = {domain, '.' + domain.split('.', 1)[1]} | |
jar = LWPCookieJar(output_filename) | |
cookies = browser_cookie3.firefox(cookie_file=cookie_file) | |
# domain_name argument does not work in browser_cookie3.firefox | |
# has to do the filtering manually | |
for c in cookies: | |
if c.domain in allowed_domains: | |
jar.set_cookie(c) | |
return jar | |
def extract_save_cookies(domain, output_filename, cookie_file=None): | |
""" extract and save cookies for <domain> to <output_filename> | |
attempt multiple times before giving up | |
cookie_file: path to the cookies sqlite database, if omitted, | |
default to desktop browser cookie file | |
raises: | |
KeyError -- if missing httpOnly cookies | |
""" | |
attempts = 0 | |
while attempts < 30: | |
attempts += 1 | |
logger.debug(f"extracting cookies from {cookie_file}") | |
cookie_jar = extract_cookies(domain, output_filename, cookie_file) | |
cookie_jar.save(ignore_discard=True, ignore_expires=True) | |
if "JSESSIONID" in {c.name for c in cookie_jar}: | |
break | |
time.sleep(3) | |
else: | |
raise KeyError(f"missing httpOnly cookies for {domain}") | |
logger.debug(f"saved to {output_filename}") | |
def borrow_cookies(url, account, output_filename, desktop): | |
""" borrow cookies on <url> and output to <output_filename> | |
account: user account credentials | |
desktop: if True, you need to input usernames and passwords yourself | |
if False, selenium will do that for you | |
""" | |
domain = urlparse(url).netloc | |
if desktop: | |
# open a tab on desktop firefox | |
subprocess.Popen(["firefox", "-new-tab", url], stdout=subprocess.PIPE) | |
time.sleep(40) | |
extract_save_cookies(domain, output_filename) | |
else: | |
# open selenium firefox | |
with webdriver.Firefox() as driver: | |
cookie_file = get_cookies_path(driver) | |
login_aliexpress(driver, url=url, **account) # auto-login | |
extract_save_cookies(domain, output_filename, cookie_file) | |
time.sleep(5) | |
@click.command() | |
@click.option('-a', '--acc-filename', default="aliexpress-acc.json", type=click.File('r'), help="use login credentials from this file, default: aliexpress-acc.json") | |
@click.option('-o', '--output-filename', default="cookies.txt", help="output filename to use, default: cookies.txt") | |
@click.option('-t', '--target', default="rev-img-search", help="which URL to visit after login? (keys and URLs are defined in the script)") | |
@click.option('--desktop/--no-desktop', default=False, help="use a real desktop browser to get the cookies? DEFAULT: no-desktop") | |
def main(acc_filename, output_filename, target, desktop): | |
""" Borrow session cookies at AliExpress from Firefox browser, save them to a cookie file. | |
""" | |
configure_logging(logger, "logs/borrow_cookies.log") | |
acc = json.load(acc_filename) | |
url = VISIT_URLS[target] | |
if url: | |
logger.info(f"borrowing cookies at {url}") | |
if not acc: | |
logger.error("missing user account credentials.") | |
return | |
try: | |
borrow_cookies(url, acc, output_filename, desktop) | |
except Exception as e: | |
logger.exception(e) | |
raise | |
logger.info("done.") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment