Skip to content

Instantly share code, notes, and snippets.

@ashaegupta
Created April 3, 2012 00:16
Show Gist options
  • Select an option

  • Save ashaegupta/2288196 to your computer and use it in GitHub Desktop.

Select an option

Save ashaegupta/2288196 to your computer and use it in GitHub Desktop.
process_reviews.py
# Top level attributes
PLACE_ID_A = 'place_id'
FS_ID_A = 'fs_id'
YELP_ID_A = 'yelp_id'
PHONE_A = 'phone'
NAME_A = 'name'
LOCATION_A = 'loc' # dict, see sub attributes below
REVIEW_A = 'review' # single incoming review; a dict, see sub attributes below
ALL_REVIEWS_A = 'all_reviews' # list of raw reviews
RECOS_A = 'recos' # dict of processed reviews
YELP_RATING_COUNT_A = 'yp_rating_count'
YELP_REVIEW_COUNT_A = 'yp_review_count'
CREATED_A = 'created'
UPDATED_A = 'updated'
# Sub attributes under location dictionary
ADDRESS_SA = 'address'
LAT_SA = 'lat'
LON_SA = 'lon'
# Sub attributes under review dictionary and wifi, exp, plugs are first level under recommendation data
WIFI_SA = 'wifi'
EXP_SA = 'exp'
PLUGS_SA = 'plugs'
REVIEWER_SA = 'reviewer'
CREATED_REV_SA = 'created_review'
# Second level under recommendation data for wifi, exp, plugs keys
LOW_SA = 'low' # wifi = None, experience = Poor, plugs = None
MED_SA = 'med' # wifi = Limited, experience = Okay
HIGH_SA = 'high' # wifi = Unlimited, experience = Great
REVIEW_COUNT_SA = 'total_count'
RECO_SA = 'reco'
OVERALL_RECO_SA = 'overall_reco' # first level key, overall recommendation
'''
Sample dictionaries
# ignoring timestamp and sender for now
review = {
'wifi': 'low'
'plugs': 'low'
}
reco_dict = {
'wifi' : {
'low' : 1
'high' : 3
'total_count': 4
'reco' : 'high'
},
'plugs' : {
'low':1
'total_count': 1
'reco' : 'low'
}
}
'''
def process_reviews_and_update_recommendation_data(review={}, reco_dict={}):
for (review_attribute, review_score) in review.iteritems():
# ensure score is a valid key in reco_dict i.e., ignore reviewer and created data in review
if score_is_valid_reco_dict_key(review_score):
# get the attribute to update in the reco_dict
reco_attribute_dict = reco_dict.get(review_attribute)
# historical data exists for this attribute in the reco_dict
if reco_attribute_dict:
# increment the current count for this score
if reco_attribute_dict.has_key(review_score):
reco_attribute_dict[review_score] += 1
reco_attribute_dict[REVIEW_COUNT_SA] += 1
# otherwise create this score with count 1
else:
reco_attribute_dict[review_score] = 1
curr_count = reco_attribute_dict.get(REVIEW_COUNT_SA)
if curr_count:
reco_attribute_dict[REVIEW_COUNT_SA] += 1
else:
reco_attribute_dict[REVIEW_COUNT_SA] = 1
# update the highest frequency score if necessary
curr_reco = reco_attribute_dict.get(RECO_SA)
if reco_attribute_dict[review_score] > reco_attribute_dict.get(curr_reco):
reco_attribute_dict[RECO_SA] = review_score
# update the reco_dict
reco_dict[review_attribute] = reco_attribute_dict
# otherwise create this attribute and this score with count 1 in the reco_dict
else:
print review_attribute
reco_dict[review_attribute] = {}
reco_dict[review_attribute][review_score] = 1
reco_dict[review_attribute][REVIEW_COUNT_SA] = 1
reco_dict[review_attribute][RECO_SA] = review_score
return reco_dict
def score_is_valid_reco_dict_key(score=None):
if score==HIGH_SA or score==MED_SA or score==LOW_SA:
return True
else:
return False
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment