Skip to content

Instantly share code, notes, and snippets.

@thinrhino
Forked from thekindlyone/amirocalypse.py
Created November 27, 2015 06:31
Show Gist options
  • Save thinrhino/3c95e4e40ddb455dbe25 to your computer and use it in GitHub Desktop.
Save thinrhino/3c95e4e40ddb455dbe25 to your computer and use it in GitHub Desktop.
from __future__ import division
from bs4 import BeautifulSoup as bs
import requests
import re
import time
from pymongo import MongoClient
from time import mktime
from datetime import datetime
import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools
from itertools import product
def get_time(struct):
return datetime.fromtimestamp(mktime(struct))
def get_reviews():
for pno in range(1,200):
data={'reviewType':'0',
'pageNum':str(pno),
'id':'com.snapdeal.main',
'reviewSortOrder':'4',
'xhr':'1',
'token':'YEB6Kq1c9CQVYEF3hs_yi9_r9Bc:1448513248663',}
url='https://play.google.com/store/getreviews?authuser=0'
r=requests.post(url,data=data,verify=False)
s=r.content
html=s[s.find(',"')+2:s.rfind('"')]
html=html.replace('\\','')
table={'u0026': '&', 'u003c': '<', 'u003d': '=', 'u003e': '>'}
pattern = re.compile(r'|'.join(table.keys()))
html = pattern.sub(lambda x: table[x.group()], html)
soup=bs(html,'lxml')
for div in soup.findAll('div','single-review'):
data=dict(
date = get_time(time.strptime(div.find('span','review-date').text, "%d %B %Y")),
rating = int(re.search('\d',div.find('div','tiny-star').get('aria-label')).group()),
title=div.find('span',"review-title").text,
review=div.find('span',"review-title").next_sibling)
yield data
client = MongoClient()
db = client.snapdeal
collection=db.reviews
for review in get_reviews():
print review
collection.insert_one(review)
d={}
for date in collection.find().distinct('date'):
cursor = collection.find({'date':date})
total=cursor.count()
high=collection.find({'date':date,'rating':{'$gt':3}}).count()
low= collection.find({'date':date,'rating':{'$lt':2}}).count()
d[date]=dict(avg=sum([int(item['rating']) for item in cursor])/total,total=total,high=high,low=low)
dates=sorted(d.keys())
legend=dict(
avg = 'Average Ratings Received',
total = 'Total Ratings Received',
high = 'Ratings >= 4',
low = 'Ratings <= 1'
)
axes=dict(x=['Date']*4,
y=[legend[key] for key in 'avg total high low'.split()])
traces=[go.Scatter(
x=dates,
y=[d[date][key] for date in dates],
name=legend[key]
) for key in 'avg total high low'.split()]
layout = dict(title = 'Award Wapasi',
xaxis = dict(title = 'Date'),
yaxis = dict(title = 'Month'),
)
fig = tools.make_subplots(rows=2, cols=2,subplot_titles=axes.get('y'))
for (x,y),trace in zip(product(range(1,3),range(1,3)),traces):
fig.append_trace(trace,x,y)
template='{}axis{}'.format
for axis,num in product(['x','y'],range(1,5)):
fig['layout'][template(axis,num)].update(title=axes.get(axis)[num-1])
plot_url = py.plot(fig, filename='award wapasi')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment