Skip to content

Instantly share code, notes, and snippets.

@jeremyboggs
Created April 26, 2017 17:41
Show Gist options
  • Save jeremyboggs/1cf348d13eea9c0b46553e6a2bbb61f0 to your computer and use it in GitHub Desktop.
Save jeremyboggs/1cf348d13eea9c0b46553e6a2bbb61f0 to your computer and use it in GitHub Desktop.
Scrape locally-saved Pinterest web pages
#!/usr/bin/env python
import os
import sys
import urllib2
from bs4 import BeautifulSoup
# Base URL for Pinterest.
base_url = 'http://pinterest.com'
html_file = sys.argv[1]
file_in = open(html_file,'r')
newfile = os.path.splitext(html_file)[0] + '.txt'
with open (newfile, 'w') as file_out:
soup = BeautifulSoup(file_in,'html.parser')
# Find all the articles on each page.
for pin in soup.find_all('div', {'class': ['PinRep','GrowthUnauthPin_brioPin']}):
title = pin.h3
if title is None:
title = ''
else:
title = title.contents[0].encode('utf-8')
description = pin.p
if description is None:
description = ''
elif len(description.contents) < 1:
description = ''
else:
description = description.contents[0].encode('utf-8')
file_out.write(title + description)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment