adityajn105 · June 1, 2019 14:16
diff --git a/webscrapper.py b/webscrapper.py
 from bs4 import BeautifulSoup
 import urllib3
 import re
 import pandas as pd

 http = urllib3.PoolManager()

 link = "https://www.sitejabber.com/reviews/dream11.com"

 #making http get request
 r = http.request('GET', link)

 #getting all html data
 soup = BeautifulSoup(r.data, 'lxml')


 #getting all tags staring with ReviewText
 reviews = soup.findAll("p", {"id": re.compile('^ReviewText')})
 #getting all review text
 reviews = list(map(lambda x: x.text, reviews))

 titles = soup.findAll("div", {"class": "review_title"})
 #first title is not needed
 titles = titles[1:]
 #getting title text from html tag
 titles = list(map( lambda x: x.a.text[1:-1], titles ))

 #getting authors url
 authors = soup.findAll("div",{"class":'author_name'})
 authors = list(map(lambda x: "https://www.sitejabber.com"+x.a['href'], authors))

 #creating a dataframe
 df = pd.DataFrame({ 'title':titles, 'author':authors, 'review':reviews })
 df.head()
	from bs4 import BeautifulSoup
	import urllib3
	import re
	import pandas as pd

	http = urllib3.PoolManager()

	link = "https://www.sitejabber.com/reviews/dream11.com"

	#making http get request
	r = http.request('GET', link)

	#getting all html data
	soup = BeautifulSoup(r.data, 'lxml')


	#getting all tags staring with ReviewText
	reviews = soup.findAll("p", {"id": re.compile('^ReviewText')})
	#getting all review text
	reviews = list(map(lambda x: x.text, reviews))

	titles = soup.findAll("div", {"class": "review_title"})
	#first title is not needed
	titles = titles[1:]
	#getting title text from html tag
	titles = list(map( lambda x: x.a.text[1:-1], titles ))

	#getting authors url
	authors = soup.findAll("div",{"class":'author_name'})
	authors = list(map(lambda x: "https://www.sitejabber.com"+x.a['href'], authors))

	#creating a dataframe
	df = pd.DataFrame({ 'title':titles, 'author':authors, 'review':reviews })
	df.head()