Last active
April 1, 2019 05:30
-
-
Save Havoc24k/b7fe5c3f5589045212540c588c7ed28b to your computer and use it in GitHub Desktop.
Get some basic statistical data to detect any irregularities on the ratings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Efood ML. | |
Phase 1: | |
Collect data from efood comments | |
Future Phase 2: | |
Detect patterns on comments. | |
- Most common names | |
- Most common times the commnets are added | |
- Most common words in comments | |
Future Phase 3: | |
Try and deduce which shops have false stats due to false reviews. | |
""" | |
import json | |
import requests | |
from itertools import groupby | |
from scipy import stats | |
# Define what number of occurences of a name is of statistical significance | |
MIN_NAME_FREQUENCY = 20 | |
API_URL = "https://api.e-food.gr/api/v1" | |
MAX_RESULTS = 20000 | |
# Get shop id from browser dev tools | |
SHOP_ID = 000000 | |
def main(): | |
""" | |
@brief { function_description }. | |
@return { description_of_the_return_value } | |
""" | |
url = API_URL + "/restaurants/{}/ratings/".format(SHOP_ID) | |
querystring = { | |
"limit": MAX_RESULTS, | |
"offset": "0", | |
"mode": "extended", | |
"comment_only": "false" | |
} | |
headers = { | |
'Accept': "*/*", | |
'Origin': "https://www.e-food.gr", | |
'DNT': "1", | |
'Content-Type': "application/json", | |
'cache-control': "no-cache", | |
} | |
response = requests.request("GET", url, headers=headers, params=querystring) | |
results = json.loads(response.text) | |
print("Shop Summary") | |
print("--------------") | |
# print("avg_quality: ", results["data"]["summary"]["avg_quality"]) | |
# print("avg_service: ", results["data"]["summary"]["avg_service"]) | |
# print("avg_delivery_time: ", results["data"]["summary"]["avg_delivery_time"]) | |
# print("ratings_comments: ", results["data"]["summary"]["ratings_comments"]) | |
# print("ratings_without_comments: ", results["data"]["summary"]["ratings_without_comments"]) | |
print("Total Ratings: ", results["data"]["summary"]["ratings_without_comments"] + | |
results["data"]["summary"]["ratings_comments"]) | |
print("Average: ", results["data"]["summary"]["average"]) | |
print("*****: ", results["data"]["summary"]["five_star_ratings"]) | |
print("****: ".ljust(7), results["data"]["summary"]["four_star_ratings"]) | |
print("***: ".ljust(7), results["data"]["summary"]["three_star_ratings"]) | |
print("**: ".ljust(7), results["data"]["summary"]["two_star_ratings"]) | |
print("*: ".ljust(7), results["data"]["summary"]["one_star_ratings"]) | |
# init data coollection | |
first_names = [] | |
frequent_names = [] | |
for rating in results["data"]["ratings"]: | |
first_names.append(rating["first_name"]) | |
# print(first_names) | |
print("--------------") | |
print("Total names count: {}".format(len(first_names))) | |
unique_names = set(first_names) | |
# print(unique_names) | |
print("--------------") | |
print("Unique names count: {}".format(len(unique_names))) | |
common_names = {name: len(list(freq)) for name, freq in groupby(sorted(first_names))} | |
print("--------------") | |
print("Name frequency:") | |
sorted_common_names = sorted(common_names.items(), key=lambda kv: kv[1], reverse=True) | |
# print(sorted_common_names) | |
# drop any names that has frequency less than 10 | |
for name, frequency in sorted_common_names: | |
if frequency >= MIN_NAME_FREQUENCY: | |
print("{}: {}".format(name, frequency)) | |
frequent_names.append(name) | |
# frequent_names.append((name, frequency)) | |
scores = {} | |
for rating in results["data"]["ratings"]: | |
if rating["first_name"] in frequent_names: | |
if rating["first_name"] not in scores: | |
scores[rating["first_name"]] = [] | |
scores[rating["first_name"]].append(rating["overall_numeric"]) | |
print("--------------") | |
print("Scores by name:") | |
for name, scores_by_name in scores.items(): | |
score_stats = stats.describe(scores_by_name) | |
print("Name: ", name) | |
print("Scores: ", scores_by_name) | |
print("Num of ratings: ", len(scores_by_name)) | |
print("Mean: ", score_stats.mean) | |
print("Min: ", score_stats.minmax[0]) | |
print("Max: ", score_stats.minmax[1]) | |
print("Variance: ", score_stats.variance) | |
print("--------------") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment