Created
May 3, 2023 14:05
-
-
Save nvllsvm/37a7393729aa45ab11ebeb46a1d1e2a1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import asyncio | |
import hashlib | |
import json | |
import logging | |
import pathlib | |
from playwright.async_api import async_playwright | |
import bs4 # beautifulsoup4 | |
LOGGER = logging.getLogger('scrape') | |
def sha256sum(data): | |
hasher = hashlib.sha256() | |
hasher.update(data) | |
return hasher.hexdigest() | |
def get_status(content): | |
soup = bs4.BeautifulSoup(content, features='html.parser') | |
for found in soup.find_all('script'): | |
if found.get('type') == 'text/javascript': | |
for line in found.get_text().splitlines(): | |
line = line.strip() | |
if line.startswith('dataLayer.push(') and line.endswith(')'): | |
line = line.strip(')').strip('dataLayer.push(') | |
return json.loads(line) | |
async def main(): | |
tracking_num = '9405508205497576264031' | |
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO) | |
previous_hash = None | |
while True: | |
LOGGER.info('Checking') | |
async with async_playwright() as p: | |
for browser_type in [p.firefox]: | |
browser = await browser_type.launch() | |
page = await browser.new_page() | |
await page.set_viewport_size({"width": 1600, "height": 1200}) | |
await page.goto(f'https://tools.usps.com/go/TrackConfirmAction?qtc_tLabels1={tracking_num}') | |
await asyncio.sleep(5) | |
status = get_status(await page.content()) | |
if status: | |
for impression in status['ecommerce']['impressions']: | |
LOGGER.info('%s (%s)', impression['eventCode'], impression['category']) | |
pathlib.Path('content.html').write_text(await page.content()) | |
await page.screenshot(path='screenshot.png') | |
await browser.close() | |
current_hash = sha256sum(pathlib.Path('screenshot.png').read_bytes()) | |
if not previous_hash: | |
previous_hash = current_hash | |
elif previous_hash != current_hash: | |
LOGGER.info('Delivered?!') | |
exit() | |
LOGGER.info('Sleeping') | |
await asyncio.sleep(300) | |
if __name__ == '__main__': | |
asyncio.run(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment