Created
May 9, 2016 14:27
-
-
Save ndunn219/925f3fa65d0f62e9d912589d9862f44e to your computer and use it in GitHub Desktop.
Code used in video at https://www.youtube.com/watch?v=VIe7ES7N6Xk based on blog post at https://chihacknight.org/blog/2014/11/26/an-intro-to-web-scraping-with-python.html.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import requests | |
import pandas as pd | |
url = "https://www.akc.org/reg/dogreg_stats.cfm" | |
r = requests.get(url) | |
data = r.text | |
soup = BeautifulSoup(data, "lxml") | |
table = soup.find_all('table')[0] | |
rows = table.find_all('tr')[2:] | |
data = { | |
'breeds' : [], | |
'rank2014' : [], | |
'rank2013' : [], | |
'rank2009' : [] | |
} | |
for row in rows: | |
cols = row.find_all('td') | |
data['breeds'].append( cols[0].get_text() ) | |
data['rank2014'].append( cols[1].get_text() ) | |
data['rank2013'].append( cols[2].get_text() ) | |
data['rank2009'].append( cols[3].get_text() ) | |
dogData = pd.DataFrame( data ) | |
dogData.to_csv("AKC_Dog_Registrations.csv") | |
dogData |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment