Skip to content

Instantly share code, notes, and snippets.

Last active April 1, 2019 05:30
Show Gist options
  • Save Havoc24k/b7fe5c3f5589045212540c588c7ed28b to your computer and use it in GitHub Desktop.
Save Havoc24k/b7fe5c3f5589045212540c588c7ed28b to your computer and use it in GitHub Desktop.
Get some basic statistical data to detect any irregularities on the ratings
Efood ML.
Phase 1:
Collect data from efood comments
Future Phase 2:
Detect patterns on comments.
- Most common names
- Most common times the commnets are added
- Most common words in comments
Future Phase 3:
Try and deduce which shops have false stats due to false reviews.
import json
import requests
from itertools import groupby
from scipy import stats
# Define what number of occurences of a name is of statistical significance
API_URL = ""
# Get shop id from browser dev tools
SHOP_ID = 000000
def main():
@brief { function_description }.
@return { description_of_the_return_value }
url = API_URL + "/restaurants/{}/ratings/".format(SHOP_ID)
querystring = {
"limit": MAX_RESULTS,
"offset": "0",
"mode": "extended",
"comment_only": "false"
headers = {
'Accept': "*/*",
'Origin': "",
'DNT': "1",
'Content-Type': "application/json",
'cache-control': "no-cache",
response = requests.request("GET", url, headers=headers, params=querystring)
results = json.loads(response.text)
print("Shop Summary")
# print("avg_quality: ", results["data"]["summary"]["avg_quality"])
# print("avg_service: ", results["data"]["summary"]["avg_service"])
# print("avg_delivery_time: ", results["data"]["summary"]["avg_delivery_time"])
# print("ratings_comments: ", results["data"]["summary"]["ratings_comments"])
# print("ratings_without_comments: ", results["data"]["summary"]["ratings_without_comments"])
print("Total Ratings: ", results["data"]["summary"]["ratings_without_comments"] +
print("Average: ", results["data"]["summary"]["average"])
print("*****: ", results["data"]["summary"]["five_star_ratings"])
print("****: ".ljust(7), results["data"]["summary"]["four_star_ratings"])
print("***: ".ljust(7), results["data"]["summary"]["three_star_ratings"])
print("**: ".ljust(7), results["data"]["summary"]["two_star_ratings"])
print("*: ".ljust(7), results["data"]["summary"]["one_star_ratings"])
# init data coollection
first_names = []
frequent_names = []
for rating in results["data"]["ratings"]:
# print(first_names)
print("Total names count: {}".format(len(first_names)))
unique_names = set(first_names)
# print(unique_names)
print("Unique names count: {}".format(len(unique_names)))
common_names = {name: len(list(freq)) for name, freq in groupby(sorted(first_names))}
print("Name frequency:")
sorted_common_names = sorted(common_names.items(), key=lambda kv: kv[1], reverse=True)
# print(sorted_common_names)
# drop any names that has frequency less than 10
for name, frequency in sorted_common_names:
if frequency >= MIN_NAME_FREQUENCY:
print("{}: {}".format(name, frequency))
# frequent_names.append((name, frequency))
scores = {}
for rating in results["data"]["ratings"]:
if rating["first_name"] in frequent_names:
if rating["first_name"] not in scores:
scores[rating["first_name"]] = []
print("Scores by name:")
for name, scores_by_name in scores.items():
score_stats = stats.describe(scores_by_name)
print("Name: ", name)
print("Scores: ", scores_by_name)
print("Num of ratings: ", len(scores_by_name))
print("Mean: ", score_stats.mean)
print("Min: ", score_stats.minmax[0])
print("Max: ", score_stats.minmax[1])
print("Variance: ", score_stats.variance)
if __name__ == '__main__':
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment