Skip to content

Instantly share code, notes, and snippets.

@Granitosaurus
Last active April 21, 2017 06:45
Show Gist options
  • Save Granitosaurus/83364ecb325844c89f9d4a573de06854 to your computer and use it in GitHub Desktop.
Save Granitosaurus/83364ecb325844c89f9d4a573de06854 to your computer and use it in GitHub Desktop.
Scraper for bundlestars bundles, ouputs links and steam reviews in reddit comment format.
import json
import re
import requests
from parsel import Selector
def scrape():
data = requests.get('https://www.bundlestars.com/api/promotions/mega-pick-mix-bundle-2')
products = json.loads(data.text)[0]['products']
for p in products:
page = requests.get(f'https://www.bundlestars.com/api/products/{p["slug"]}').text
product = json.loads(page)
# get data from steam
steam_url = f"http://store.steampowered.com/app/{product['steam']['id']}"
steam_page = requests.get(steam_url).text
sel = Selector(text=steam_page)
rating = Selector(text=steam_page).css("span.responsive_reviewdesc").re('\d+%')
tags = sel.css("#game_highlights .app_tag::text").extract()[:3]
item = {
'name': p['name'],
'rating': rating[-1] if rating else '~',
'tags': ','.join([t.strip() for t in tags if t.strip() == '+']),
'steam_url': steam_url,
}
yield item
if __name__ == '__main__':
header = [
'| Name | Rating | Tags |',
'|:-------|:----------:|:-------|'
]
lines = []
for item in scrape():
lines.append('|[{name}]({steam_url}) | {rating} | {tags} |'.format(**item))
lines = sorted(lines, key=lambda v: int((re.findall('(\d+)%', v) or [0])[0]), reverse=True)
print(' \n'.join(header + lines))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment