Created
June 9, 2015 23:02
-
-
Save kgn/e96e7ae71a38447ac614 to your computer and use it in GitHub Desktop.
App Reviews - Python script to retrieve App Store reviews and save them to a CSV file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
try: | |
# For Python 3.0 and later | |
from urllib.request import urlopen | |
except ImportError: | |
# Fall back to Python 2's urllib2 | |
from urllib2 import urlopen | |
import json | |
import time | |
def getJson(url): | |
response = urlopen(url) | |
data = str(response.read()) | |
return json.loads(data) | |
def getReviews(appID, page=1): | |
url = 'https://itunes.apple.com/rss/customerreviews/id=%s/page=%d/sortby=mostrecent/json' % (appID, page) | |
data = getJson(url).get('feed') | |
if data.get('entry') == None: | |
getReviews(appID, page+1) | |
return | |
for entry in data.get('entry'): | |
if entry.get('im:name'): continue | |
review_id = entry.get('id').get('label') | |
title = entry.get('title').get('label') | |
author = entry.get('author').get('name').get('label') | |
author_url = entry.get('author').get('uri').get('label') | |
version = entry.get('im:version').get('label') | |
rating = entry.get('im:rating').get('label') | |
review = entry.get('content').get('label') | |
vote_count = entry.get('im:voteCount').get('label') | |
csvData = [review_id, title.replace('"', '""'), author, author_url, version, rating, review.replace('"', '""'), vote_count] | |
print '"'+'","'.join(csvData)+'"' | |
getReviews(appID, page+1) | |
csvTitles = ['review_id', 'title', 'author', 'author_url', 'version', 'rating', 'review', 'vote_count'] | |
print ','.join(csvTitles) | |
getReviews(<app store id>) |
Updated rigogsilva's code with the new URL:
import pprint
import time
import typing
import requests
def is_error_response(http_response, seconds_to_sleep: float = 1) -> bool:
"""
Returns False if status_code is 503 (system unavailable) or 200 (success),
otherwise it will return True (failed). This function should be used
after calling the commands requests.post() and requests.get().
:param http_response:
The response object returned from requests.post or requests.get.
:param seconds_to_sleep:
The sleep time used if the status_code is 503. This is used to not
overwhelm the service since it is unavailable.
"""
if http_response.status_code == 503:
time.sleep(seconds_to_sleep)
return False
return http_response.status_code != 200
def get_json(url) -> typing.Union[dict, None]:
"""
Returns json response if any. Returns None if no json found.
:param url:
The url go get the json from.
"""
response = requests.get(url)
if is_error_response(response):
return None
json_response = response.json()
return json_response
def get_reviews(app_id, page=1) -> typing.List[dict]:
"""
Returns a list of dictionaries with each dictionary being one review.
:param app_id:
The app_id you are searching.
:param page:
The page id to start the loop. Once it reaches the final page + 1, the
app will return a non valid json, thus it will exit with the current
reviews.
"""
print(f'STARTED {page}')
reviews: typing.List[dict] = [{}]
while True:
url = (f'https://itunes.apple.com/ru/rss/customerreviews/page={page}/id={app_id}/sortBy=mostRecent/json')
json = get_json(url)
if not json:
return reviews
data_feed = json.get('feed')
try:
if not data_feed.get('entry'):
get_reviews(app_id, page + 1)
reviews += [
{
'review_id': entry.get('id').get('label'),
'title': entry.get('title').get('label'),
'author': entry.get('author').get('name').get('label'),
'author_url': entry.get('author').get('uri').get('label'),
'version': entry.get('im:version').get('label'),
'rating': entry.get('im:rating').get('label'),
'review': entry.get('content').get('label'),
'vote_count': entry.get('im:voteCount').get('label'),
'page': page
}
for entry in data_feed.get('entry')
if not entry.get('im:name')
]
page += 1
except Exception:
return reviews
reviews = get_reviews('1234567')
print(len(reviews))
pprint.pprint(reviews)
Can someone help, Its not going beyond 500 reviews.How to get all the reviews
500 is the max Apple stores, you'll need to pull them periodically over time if you want to build up your own dataset of reviews
Hi I'm getting this output:
STARTED 1
STARTED 2
STARTED 3
STARTED 4
STARTED 5
STARTED 6
STARTED 7
STARTED 8
STARTED 9
STARTED 10
STARTED 11
1
[{}]
What's wrong? can someone help?
@dvlmcr69 you should change the country field.
url = (f'https://itunes.apple.com/------> ru <-------/rss/customerreviews/page={page}/id={app_id}/sortBy=mostRecent/json')
such as us, ko, ru, ca
Does this script have the same purpose as the app_store_scraper library?
I see that the return comes different.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi rigogsilva,
This is not working for me. Can u help me to solve