Skip to content

Instantly share code, notes, and snippets.

@shirish201
Last active December 8, 2020 07:03
Show Gist options
  • Save shirish201/bbb511cdeef176e33b9339c0ff4dc075 to your computer and use it in GitHub Desktop.
Save shirish201/bbb511cdeef176e33b9339c0ff4dc075 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 4 00:02:26 2020
@author: shirishgupta
"""
from bs4 import BeautifulSoup
from bs4.element import Comment
import pandas as pd
import numpy as np
import requests
final_data = pd.DataFrame()
for i in range(10):
url = "https://itunes.apple.com/us/rss/customerreviews/page={}/id=284882215/sortBy=mostrecent/xml".format(i+1)
xml_data = requests.get(url).content
soup = BeautifulSoup(xml_data, "xml")
# Find all text in the data
texts = str(soup.findAll(text=True)).replace('\\n','')
#Find the tag/child
child = soup.find("entry")
Title = []
content_type = []
updated = []
rating = []
user_name = []
while True:
try:
updated.append(" ".join(child.find('updated')))
except:
updated.append(" ")
try:
Title.append(" ".join(child.find('title')))
except:
Title.append(" ")
try:
content_type.append(" ".join(child.find('content')))
except:
content_type.append(" ")
try:
rating.append(" ".join(child.find('im:rating')))
except:
rating.append(" ")
try:
user_name.append(" ".join(child.find('name')))
except:
user_name.append(" ")
try:
# Next sibling of child, here: entry
child = child.find_next_sibling('entry')
except:
break
data = []
data = pd.DataFrame({"updated":updated,
"Title":Title,
"content_type":content_type,
"rating":rating,
"user_name":user_name})
final_data = final_data.append(data, ignore_index = True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment