Skip to content

Instantly share code, notes, and snippets.

@manashmandal
Created July 29, 2016 15:06
Show Gist options
  • Save manashmandal/42995e52427bd0157b8e17fc89d5f6b2 to your computer and use it in GitHub Desktop.
Save manashmandal/42995e52427bd0157b8e17fc89d5f6b2 to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
import urllib2
# Scraping Info (visiting a link with tags)
"""
title: h2 .entry-title with link
"""
news_link = 'http://www.dhakatribune.com/tags/Garments'
urllib2_header = [('User-agent', 'Mozilla/5.0')]
if __name__ == '__main__':
request = urllib2.Request(news_link)
request.add_header('User-agent', 'Mozilla/5.0')
response = urllib2.urlopen(request)
soup = BeautifulSoup(response, 'lxml')
# print soup.prettify()
news_title = soup.findAll('h2', {"class": "entry-title"})
for title in news_title:
s = BeautifulSoup(str(title), 'lxml')
# Gets title
print s.a.string
# Gets link
print s.a['href']
# print news_title
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment