Skip to content

Instantly share code, notes, and snippets.

@breeko
Created March 24, 2018 02:37
Show Gist options
  • Select an option

  • Save breeko/06d9bc8be92474ec48b25ea3f189f872 to your computer and use it in GitHub Desktop.

Select an option

Save breeko/06d9bc8be92474ec48b25ea3f189f872 to your computer and use it in GitHub Desktop.
# Replace cuisine description of each restaurant with most common
most_common_cuisine = df.groupby(["DBA"])["CUISINE"].agg(lambda x: x.value_counts().index[0])
df = df.drop("CUISINE",axis=1)
df = df.join(most_common_cuisine, on="DBA")
# Calculate how many times each restaurant chain was graded
num_score_dba = df.groupby("DBA")[["SCORE"]].count()
num_score_dba.columns = ["NUM_SCORE_DBA"]
mean_score_dba = df.groupby("DBA")[["SCORE"]].mean()
mean_score_dba.columns = ["MEAN_SCORE_DBA"]
mean_score_dba = df.groupby("DBA")[["SCORE"]].median()
mean_score_dba.columns = ["MED_SCORE_DBA"]
max_score_dba = df.groupby("DBA")[["SCORE"]].max()
max_score_dba.columns = ["MAX_SCORE_DBA"]
min_score_dba = df.groupby("DBA")[["SCORE"]].min()
min_score_dba.columns = ["MIN_SCORE_DBA"]
std_score_dba = df.groupby("DBA")[["SCORE"]].std()
std_score_dba.columns = ["STD_SCORE_DBA"]
for field in [num_score_dba, mean_score_dba, min_score_dba, max_score_dba, std_score_dba]:
df = df.join(field, on="DBA")
# Update the dataframe with percentage breakdown of each grade
grade_dba = df.groupby(["DBA","GRADE"]).agg({'GRADE': 'count'})
grade_dba = grade_dba.groupby(level=0).apply(lambda x: x / float(x.sum()))
for grade in grade_dba.index.get_level_values("GRADE").unique():
_grade_dba = grade_dba[grade_dba.index.get_level_values("GRADE") == grade]
_grade_dba.index = _grade_dba.index.droplevel(level="GRADE")
df = df.join(_grade_dba, on="DBA", rsuffix="_{}".format(grade))
df["GRADE_{}".format(grade)].fillna(0, inplace = True)
df[df.NUM_SCORE_DBA > 50].sort_values(["CUISINE", "GRADE_A"], ascending=[True,False]).groupby(
["CUISINE","DBA", "GRADE_A", "GRADE_C", "MED_SCORE_DBA", "MAX_SCORE_DBA"], sort=False)[["SCORE"]].count()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment