Last active
April 21, 2017 06:45
-
-
Save Granitosaurus/83364ecb325844c89f9d4a573de06854 to your computer and use it in GitHub Desktop.
Scraper for bundlestars bundles, ouputs links and steam reviews in reddit comment format.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import re | |
import requests | |
from parsel import Selector | |
def scrape(): | |
data = requests.get('https://www.bundlestars.com/api/promotions/mega-pick-mix-bundle-2') | |
products = json.loads(data.text)[0]['products'] | |
for p in products: | |
page = requests.get(f'https://www.bundlestars.com/api/products/{p["slug"]}').text | |
product = json.loads(page) | |
# get data from steam | |
steam_url = f"http://store.steampowered.com/app/{product['steam']['id']}" | |
steam_page = requests.get(steam_url).text | |
sel = Selector(text=steam_page) | |
rating = Selector(text=steam_page).css("span.responsive_reviewdesc").re('\d+%') | |
tags = sel.css("#game_highlights .app_tag::text").extract()[:3] | |
item = { | |
'name': p['name'], | |
'rating': rating[-1] if rating else '~', | |
'tags': ','.join([t.strip() for t in tags if t.strip() == '+']), | |
'steam_url': steam_url, | |
} | |
yield item | |
if __name__ == '__main__': | |
header = [ | |
'| Name | Rating | Tags |', | |
'|:-------|:----------:|:-------|' | |
] | |
lines = [] | |
for item in scrape(): | |
lines.append('|[{name}]({steam_url}) | {rating} | {tags} |'.format(**item)) | |
lines = sorted(lines, key=lambda v: int((re.findall('(\d+)%', v) or [0])[0]), reverse=True) | |
print(' \n'.join(header + lines)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment