Skip to content

Instantly share code, notes, and snippets.

@conquistadorjd
Last active November 8, 2018 10:51
Show Gist options
  • Save conquistadorjd/a2c84e6034bf6531c77fead7d3ace91b to your computer and use it in GitHub Desktop.
Save conquistadorjd/a2c84e6034bf6531c77fead7d3ace91b to your computer and use it in GitHub Desktop.
python webscrapping
################################################################################################
# name: webscrapping_01_simple.py
# desc: webscraping using beautifulsoup
# date: 2018-11-08
# Author: conquistadorjd
################################################################################################
from bs4 import BeautifulSoup
import requests
url = 'https://en.wikipedia.org/wiki/List_of_programming_languages'
r = requests.get(url)
data = r.text
soup = BeautifulSoup(data, features = "lxml")
header = soup.find('h1')
print('header : ', header)
print('header Text: ', header.text)
################################################################################################
# name: webscrapping_02.py
# desc: webscraping using beautifulsoup
# date: 2018-11-08
# Author: conquistadorjd
################################################################################################
from bs4 import BeautifulSoup
import requests
url = 'https://en.wikipedia.org/wiki/List_of_programming_languages'
r = requests.get(url)
data = r.text
soup = BeautifulSoup(data, features = "lxml")
# to fetch specific tag
header = soup.find('h1')
print('header : ', header)
print('header Text: ', header.text)
# to fetch tag having specific attribute
var1 = soup.find(attrs={"class" : "navbox-title"})
print("var1 : ", var1)
print("var1 : ", var1.text)
# to fetch all <li> tas
for link in soup.find_all('li'):
print(link.text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment