Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save dimitryzub/785cd77806346706adfbde2a89b7d2de to your computer and use it in GitHub Desktop.
Save dimitryzub/785cd77806346706adfbde2a89b7d2de to your computer and use it in GitHub Desktop.
Scrape all Google Play App Product Reviews with Python and Playwright
# https://stackoverflow.com/questions/54554261/selenium-unable-to-locate-app-id-title-element-when-trying-to-load-google-play/72490391#72490391
# https://replit.com/@DimitryZub1/Scrape-Google-Play-Store-App-Reviews-Bs4-SerpApi#playwright_solution.py
# https://serpapi.com/google-play-product-reviews
from playwright.sync_api import sync_playwright
import json, time, re
with sync_playwright() as p:
browser = p.chromium.launch(headless=True, slow_mo=50)
page = browser.new_page(user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36',
viewport={'width': 1920, 'height': 1080})
page.goto('https://play.google.com/store/apps/details?id=com.topsecurity.android&hl=en_GB&gl=US')
# open user reviews window
page.locator('button.VfPpkd-LgbsSe.aLey0c', has_text='See all reviews').click()
time.sleep(1)
user_comments = []
if page.query_selector('.VfPpkd-wzTsW'):
last_height = page.evaluate("document.querySelector('.fysCi').scrollHeight;")
page.screenshot(path='start_of_the_reviews.png', full_page=True)
while True:
print('Scrolling..')
# Scroll down
page.evaluate("document.querySelector('.fysCi').scrollTo(0, document.querySelector('.fysCi').scrollHeight);")
time.sleep(0.5)
current_height = page.evaluate("document.querySelector('.fysCi').scrollHeight;")
print(f'last height = {last_height}')
print(f'current height = {current_height}')
if current_height == last_height:
break
else:
last_height = current_height
else:
print('Looks like the review window does not appear.')
print('Extracting reviews...')
for index, comment in enumerate(page.query_selector_all('.RHo1pe'), start=1):
user_comments.append({
'position': index,
'name': comment.query_selector('.X5PpBb').text_content(),
'avatar': comment.query_selector('.gSGphe img').get_attribute('src'),
'rating': re.search(r'\d+', comment.query_selector('.Jx4nYe .iXRFPc').get_attribute('aria-label')).group(),
'comment_likes': comment.query_selector('[jscontroller=SWD8cc]').get_attribute('data-original-thumbs-up-count'),
'date': comment.query_selector('.bp9Aid').text_content(),
'comment': comment.query_selector('.h3YV2d').text_content(),
})
print(json.dumps(user_comments, indent=2, ensure_ascii=False))
page.screenshot(path='end_of_the_reviews.png', full_page=True)
browser.close()
@dimitryzub
Copy link
Author

Outputs:

[
  {
    "position": 1,
    "name": "Kevin Bauer",
    "avatar": "https://play-lh.googleusercontent.com/a/AATXAJyAb8JYXl-_si7_fWbpydfgEuuAKcvdbiGaW0OP=s32-rw-mo",
    "rating": "2",
    "comment_likes": "329",
    "date": "27 May 2022",
    "comment": "The interrupting is horrible it is messing with me about 65% of the time when iam texting and i was interrupted when i was filling out a job application twice, i had to start over both times OMG It just did it while iam filling out this remarks of my experience with bravo security app also before I go it interrupted me 3 more times, they need to do something with the programming of the app like when there hasn't been any activity for one minute, then pop in otherwise it seems to be doing ok."
  }, ... other reviews
  {
    "position": 244,
    "name": "Kay Ach",
    "avatar": "https://play-lh.googleusercontent.com/a-/AOh14GiarKPy6h0DG73hd195aEmJaVedKnr1ZTl4i6GO=s32-rw",
    "rating": "4",
    "comment_likes": "4",
    "date": "14 May 2022",
    "comment": "Ok"
  }
]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment