Created
November 25, 2019 08:52
-
-
Save mzaksana/64c8ff54a144b46a77ebb45b42788fd0 to your computer and use it in GitHub Desktop.
Virtualization
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pandas | |
import json | |
import re | |
from collections import OrderedDict | |
import plotly.graph_objects as go | |
import chart_studio.plotly as py | |
from bs4 import BeautifulSoup | |
import yaml | |
PRODUCT_DATA={} | |
SHOP_DATA={} | |
def orderDict(data): | |
return OrderedDict(sorted(data.items(), key=lambda kv: kv[1]['avg_rating'], reverse=True)) | |
def ascii_encode_dict(data): | |
ascii_encode = lambda x: x.encode('ascii') if isinstance(x, unicode) else x | |
return dict(map(ascii_encode, pair) for pair in data.items()) | |
def readData(text): | |
return ascii_encode_dict(json.loads(text)) | |
def makeData(jsonObj): | |
global PRODUCT_DATA | |
if jsonObj['asin'] not in PRODUCT_DATA: | |
PRODUCT_DATA[jsonObj['asin']]={ | |
'data':[jsonObj], | |
'avg_rating':1, | |
'stars':{1.0:0,2.0:0,3.0:0,4.0:0,5.0:0} | |
} | |
else: | |
PRODUCT_DATA[jsonObj['asin']]['data']+=[jsonObj] | |
PRODUCT_DATA[jsonObj['asin']]['stars'][jsonObj['overall']]+=1 | |
def makeDataShop(jsonObj): | |
global SHOP_DATA | |
if jsonObj['brand'] not in SHOP_DATA: | |
SHOP_DATA[jsonObj['brand']]=[jsonObj] | |
else: | |
SHOP_DATA[jsonObj['brand']]+=[jsonObj] | |
def makeView(top): | |
labels_data=["review"] | |
values_data=[] | |
parents_data=[""] | |
counter=1 | |
total_data=0 | |
for key in PRODUCT_DATA.keys(): | |
total=0 | |
counter+=1 | |
top-=1 | |
if top == 0: | |
break | |
labels_data+=[key,"1.0"+str(counter),"2.0"+str(counter),"3.0"+str(counter),"4.0"+str(counter),"5.0"+str(counter)] | |
parents_data+=["review",key,key,key,key,key] | |
total+=(PRODUCT_DATA[key]['stars'][1.0]+PRODUCT_DATA[key]['stars'][2.0]+PRODUCT_DATA[key]['stars'][3.0]+PRODUCT_DATA[key]['stars'][4.0]+PRODUCT_DATA[key]['stars'][5.0]) | |
total_data+=total | |
values_data+=[ | |
total, | |
PRODUCT_DATA[key]['stars'][1.0], | |
PRODUCT_DATA[key]['stars'][2.0], | |
PRODUCT_DATA[key]['stars'][3.0], | |
PRODUCT_DATA[key]['stars'][4.0], | |
PRODUCT_DATA[key]['stars'][5.0]] | |
values_data.insert(0,total_data) | |
fig =go.Figure(go.Sunburst( | |
labels=labels_data , | |
parents=parents_data, | |
values=values_data, | |
branchvalues="total", | |
)) | |
fig.update_layout(margin = dict(t=0, l=0, r=0, b=0)) | |
fig.show() | |
def sumGroupCount(dictData): | |
total=0 | |
for key in dictData: | |
total+=dictData[key] | |
return total | |
def to_utf8(loader, node): | |
return loader.construct_scalar(node).encode('utf-8') | |
def calculateAvgRating(): | |
for key in PRODUCT_DATA.keys(): | |
sumData=0; | |
deminator={}; | |
for data in PRODUCT_DATA[key]['data']: | |
sumData+=data['overall'] | |
if data['overall'] not in deminator: | |
deminator[data['overall']]=1 | |
else: | |
deminator[data['overall']]+=1 | |
#print(PRODUCT_DATA[key]['data']) | |
PRODUCT_DATA[key]['avg_rating']=sumData / sumGroupCount(deminator) | |
filepath = '../data/500.json' | |
with open(filepath) as fp: | |
line = fp.readline() | |
cnt = 1 | |
while line: | |
line = fp.readline() | |
#print(line) | |
if(len(line)<5): | |
continue | |
makeData(readData(line)) | |
#print(readData(line)) | |
cnt+=1 | |
#print("read",cnt) | |
calculateAvgRating() | |
orderDict(PRODUCT_DATA) | |
makeView(10) | |
#print(PRODUCT_DATA) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment