jeremyboggs · April 26, 2017 17:41
diff --git a/PinterestScraper.py b/PinterestScraper.py
 #!/usr/bin/env python
 import os
 import sys
 import urllib2
 from bs4 import BeautifulSoup

 # Base URL for Pinterest.
 base_url = 'http://pinterest.com'
 html_file = sys.argv[1]

 file_in = open(html_file,'r')
 newfile = os.path.splitext(html_file)[0] + '.txt'

 with open (newfile, 'w') as file_out:

    soup = BeautifulSoup(file_in,'html.parser')
    # Find all the articles on each page.
    for pin in soup.find_all('div', {'class': ['PinRep','GrowthUnauthPin_brioPin']}):
        title = pin.h3
        if title is None:
            title = ''
        else:
            title = title.contents[0].encode('utf-8')

        description = pin.p

        if description is None:
            description = ''

        elif len(description.contents) < 1:
            description = ''

        else:
            description = description.contents[0].encode('utf-8')

        file_out.write(title + description)
	#!/usr/bin/env python
	import os
	import sys
	import urllib2
	from bs4 import BeautifulSoup

	# Base URL for Pinterest.
	base_url = 'http://pinterest.com'
	html_file = sys.argv[1]

	file_in = open(html_file,'r')
	newfile = os.path.splitext(html_file)[0] + '.txt'

	with open (newfile, 'w') as file_out:

	soup = BeautifulSoup(file_in,'html.parser')
	# Find all the articles on each page.
	for pin in soup.find_all('div', {'class': ['PinRep','GrowthUnauthPin_brioPin']}):
	title = pin.h3
	if title is None:
	title = ''
	else:
	title = title.contents[0].encode('utf-8')

	description = pin.p

	if description is None:
	description = ''

	elif len(description.contents) < 1:
	description = ''

	else:
	description = description.contents[0].encode('utf-8')

	file_out.write(title + description)