Created
June 3, 2022 13:40
-
-
Save dimitryzub/785cd77806346706adfbde2a89b7d2de to your computer and use it in GitHub Desktop.
Scrape all Google Play App Product Reviews with Python and Playwright
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # https://stackoverflow.com/questions/54554261/selenium-unable-to-locate-app-id-title-element-when-trying-to-load-google-play/72490391#72490391 | |
| # https://replit.com/@DimitryZub1/Scrape-Google-Play-Store-App-Reviews-Bs4-SerpApi#playwright_solution.py | |
| # https://serpapi.com/google-play-product-reviews | |
| from playwright.sync_api import sync_playwright | |
| import json, time, re | |
| with sync_playwright() as p: | |
| browser = p.chromium.launch(headless=True, slow_mo=50) | |
| page = browser.new_page(user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36', | |
| viewport={'width': 1920, 'height': 1080}) | |
| page.goto('https://play.google.com/store/apps/details?id=com.topsecurity.android&hl=en_GB&gl=US') | |
| # open user reviews window | |
| page.locator('button.VfPpkd-LgbsSe.aLey0c', has_text='See all reviews').click() | |
| time.sleep(1) | |
| user_comments = [] | |
| if page.query_selector('.VfPpkd-wzTsW'): | |
| last_height = page.evaluate("document.querySelector('.fysCi').scrollHeight;") | |
| page.screenshot(path='start_of_the_reviews.png', full_page=True) | |
| while True: | |
| print('Scrolling..') | |
| # Scroll down | |
| page.evaluate("document.querySelector('.fysCi').scrollTo(0, document.querySelector('.fysCi').scrollHeight);") | |
| time.sleep(0.5) | |
| current_height = page.evaluate("document.querySelector('.fysCi').scrollHeight;") | |
| print(f'last height = {last_height}') | |
| print(f'current height = {current_height}') | |
| if current_height == last_height: | |
| break | |
| else: | |
| last_height = current_height | |
| else: | |
| print('Looks like the review window does not appear.') | |
| print('Extracting reviews...') | |
| for index, comment in enumerate(page.query_selector_all('.RHo1pe'), start=1): | |
| user_comments.append({ | |
| 'position': index, | |
| 'name': comment.query_selector('.X5PpBb').text_content(), | |
| 'avatar': comment.query_selector('.gSGphe img').get_attribute('src'), | |
| 'rating': re.search(r'\d+', comment.query_selector('.Jx4nYe .iXRFPc').get_attribute('aria-label')).group(), | |
| 'comment_likes': comment.query_selector('[jscontroller=SWD8cc]').get_attribute('data-original-thumbs-up-count'), | |
| 'date': comment.query_selector('.bp9Aid').text_content(), | |
| 'comment': comment.query_selector('.h3YV2d').text_content(), | |
| }) | |
| print(json.dumps(user_comments, indent=2, ensure_ascii=False)) | |
| page.screenshot(path='end_of_the_reviews.png', full_page=True) | |
| browser.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Outputs:
[ { "position": 1, "name": "Kevin Bauer", "avatar": "https://play-lh.googleusercontent.com/a/AATXAJyAb8JYXl-_si7_fWbpydfgEuuAKcvdbiGaW0OP=s32-rw-mo", "rating": "2", "comment_likes": "329", "date": "27 May 2022", "comment": "The interrupting is horrible it is messing with me about 65% of the time when iam texting and i was interrupted when i was filling out a job application twice, i had to start over both times OMG It just did it while iam filling out this remarks of my experience with bravo security app also before I go it interrupted me 3 more times, they need to do something with the programming of the app like when there hasn't been any activity for one minute, then pop in otherwise it seems to be doing ok." }, ... other reviews { "position": 244, "name": "Kay Ach", "avatar": "https://play-lh.googleusercontent.com/a-/AOh14GiarKPy6h0DG73hd195aEmJaVedKnr1ZTl4i6GO=s32-rw", "rating": "4", "comment_likes": "4", "date": "14 May 2022", "comment": "Ok" } ]