Skip to content

Instantly share code, notes, and snippets.

@masrab
Created January 7, 2015 03:41
Show Gist options
  • Save masrab/ec5f4627564b87afb349 to your computer and use it in GitHub Desktop.
Save masrab/ec5f4627564b87afb349 to your computer and use it in GitHub Desktop.
Simple web scraper in Python
# Simple script to scrape data from http://bigocheatsheet.com/
import requests
from bs4 import BeautifulSoup
page = requests.get('http://bigocheatsheet.com/')
soup = BeautifulSoup(page.text)
tab = soup.find(id='data-structures').find_next('table')
# list of list of rows
rows = []
for i, row in enumerate(tab.find_all('tr')):
if i>1: #skip the first two rows
rows.append([cell.text for cell in row.find_all(['td', 'th'])])
with open('data-structures.txt', 'w') as f:
f.writelines([','.join(row)+'\n' for row in rows])
# repeat for Heap
tab = soup.find(id='heaps').find_next('table')
# list of list of rows
rows = []
for i, row in enumerate(tab.find_all('tr')):
if i>0: #skip the first rows
rows.append([cell.text for cell in row.find_all(['td', 'th'])])
with open('heaps.txt', 'w') as f:
f.writelines([','.join(row)+'\n' for row in rows])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment