Skip to content

Instantly share code, notes, and snippets.

@zyocum
Last active February 5, 2019 00:55
Show Gist options
  • Select an option

  • Save zyocum/393d681d8fb1a98af32630832cf77e7c to your computer and use it in GitHub Desktop.

Select an option

Save zyocum/393d681d8fb1a98af32630832cf77e7c to your computer and use it in GitHub Desktop.
Scrape Limecrime
#!/usr/bin/env python3
"""Collect data from limecrime.com"""
import json
from requests_html import HTMLSession
LIMECRIME = 'https://www.limecrime.com/'
session = HTMLSession()
def categories(url):
"""Generate per-category URLs from the main page"""
r = session.get(url)
primary_nav = r.html.find('div.primary-nav', first=True)
for link in primary_nav.absolute_links:
if '/categories' in link:
yield link
def products(product_url):
"""Generate product dictionaries"""
r = session.get(product_url)
products = r.html.find('div.product-summary__info')
for product in products:
a = product.find('a', first=True)
url, *_ = a.absolute_links
d = json.loads(a.attrs['data-analytics'])['payload']
d['url'] = url
yield d
def main():
"""Print out all product dicts to stdout as JSON"""
for category_url in categories(LIMECRIME):
for product in products(category_url):
print(json.dumps(product, ensure_ascii=False))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment