Created
April 3, 2012 00:16
-
-
Save ashaegupta/2288196 to your computer and use it in GitHub Desktop.
process_reviews.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Top level attributes | |
| PLACE_ID_A = 'place_id' | |
| FS_ID_A = 'fs_id' | |
| YELP_ID_A = 'yelp_id' | |
| PHONE_A = 'phone' | |
| NAME_A = 'name' | |
| LOCATION_A = 'loc' # dict, see sub attributes below | |
| REVIEW_A = 'review' # single incoming review; a dict, see sub attributes below | |
| ALL_REVIEWS_A = 'all_reviews' # list of raw reviews | |
| RECOS_A = 'recos' # dict of processed reviews | |
| YELP_RATING_COUNT_A = 'yp_rating_count' | |
| YELP_REVIEW_COUNT_A = 'yp_review_count' | |
| CREATED_A = 'created' | |
| UPDATED_A = 'updated' | |
| # Sub attributes under location dictionary | |
| ADDRESS_SA = 'address' | |
| LAT_SA = 'lat' | |
| LON_SA = 'lon' | |
| # Sub attributes under review dictionary and wifi, exp, plugs are first level under recommendation data | |
| WIFI_SA = 'wifi' | |
| EXP_SA = 'exp' | |
| PLUGS_SA = 'plugs' | |
| REVIEWER_SA = 'reviewer' | |
| CREATED_REV_SA = 'created_review' | |
| # Second level under recommendation data for wifi, exp, plugs keys | |
| LOW_SA = 'low' # wifi = None, experience = Poor, plugs = None | |
| MED_SA = 'med' # wifi = Limited, experience = Okay | |
| HIGH_SA = 'high' # wifi = Unlimited, experience = Great | |
| REVIEW_COUNT_SA = 'total_count' | |
| RECO_SA = 'reco' | |
| OVERALL_RECO_SA = 'overall_reco' # first level key, overall recommendation | |
| ''' | |
| Sample dictionaries | |
| # ignoring timestamp and sender for now | |
| review = { | |
| 'wifi': 'low' | |
| 'plugs': 'low' | |
| } | |
| reco_dict = { | |
| 'wifi' : { | |
| 'low' : 1 | |
| 'high' : 3 | |
| 'total_count': 4 | |
| 'reco' : 'high' | |
| }, | |
| 'plugs' : { | |
| 'low':1 | |
| 'total_count': 1 | |
| 'reco' : 'low' | |
| } | |
| } | |
| ''' | |
| def process_reviews_and_update_recommendation_data(review={}, reco_dict={}): | |
| for (review_attribute, review_score) in review.iteritems(): | |
| # ensure score is a valid key in reco_dict i.e., ignore reviewer and created data in review | |
| if score_is_valid_reco_dict_key(review_score): | |
| # get the attribute to update in the reco_dict | |
| reco_attribute_dict = reco_dict.get(review_attribute) | |
| # historical data exists for this attribute in the reco_dict | |
| if reco_attribute_dict: | |
| # increment the current count for this score | |
| if reco_attribute_dict.has_key(review_score): | |
| reco_attribute_dict[review_score] += 1 | |
| reco_attribute_dict[REVIEW_COUNT_SA] += 1 | |
| # otherwise create this score with count 1 | |
| else: | |
| reco_attribute_dict[review_score] = 1 | |
| curr_count = reco_attribute_dict.get(REVIEW_COUNT_SA) | |
| if curr_count: | |
| reco_attribute_dict[REVIEW_COUNT_SA] += 1 | |
| else: | |
| reco_attribute_dict[REVIEW_COUNT_SA] = 1 | |
| # update the highest frequency score if necessary | |
| curr_reco = reco_attribute_dict.get(RECO_SA) | |
| if reco_attribute_dict[review_score] > reco_attribute_dict.get(curr_reco): | |
| reco_attribute_dict[RECO_SA] = review_score | |
| # update the reco_dict | |
| reco_dict[review_attribute] = reco_attribute_dict | |
| # otherwise create this attribute and this score with count 1 in the reco_dict | |
| else: | |
| print review_attribute | |
| reco_dict[review_attribute] = {} | |
| reco_dict[review_attribute][review_score] = 1 | |
| reco_dict[review_attribute][REVIEW_COUNT_SA] = 1 | |
| reco_dict[review_attribute][RECO_SA] = review_score | |
| return reco_dict | |
| def score_is_valid_reco_dict_key(score=None): | |
| if score==HIGH_SA or score==MED_SA or score==LOW_SA: | |
| return True | |
| else: | |
| return False |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment