Skip to content

Instantly share code, notes, and snippets.

@ndunn219
Created May 9, 2016 14:27
Show Gist options
  • Save ndunn219/925f3fa65d0f62e9d912589d9862f44e to your computer and use it in GitHub Desktop.
Save ndunn219/925f3fa65d0f62e9d912589d9862f44e to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
import requests
import pandas as pd
url = "https://www.akc.org/reg/dogreg_stats.cfm"
r = requests.get(url)
data = r.text
soup = BeautifulSoup(data, "lxml")
table = soup.find_all('table')[0]
rows = table.find_all('tr')[2:]
data = {
'breeds' : [],
'rank2014' : [],
'rank2013' : [],
'rank2009' : []
}
for row in rows:
cols = row.find_all('td')
data['breeds'].append( cols[0].get_text() )
data['rank2014'].append( cols[1].get_text() )
data['rank2013'].append( cols[2].get_text() )
data['rank2009'].append( cols[3].get_text() )
dogData = pd.DataFrame( data )
dogData.to_csv("AKC_Dog_Registrations.csv")
dogData
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment