wcneill · September 3, 2019 22:42
diff --git a/writetofile.py b/writetofile.py
 from bs4 import BeautifulSoup
 import requests as req
 import textwrap as tr

 # Iterates through all paragraph tags containing article content
 # and formats content such that paragraphs are separated and all
 # lines are no more than 72 characters in width. The .strings
 # attribute of BS4 Tags returns a generator containing all text
 # children of the given tag. In order to format the code
 # using string.join, the generator needed to be converted to a
 # list.


 if __name__ == '__main__':

    url = 'http://www.vanityfair.com/society/2014/06/monica-lewinsky-humiliation-culture'
    r = req.get(url)
    r_html = r.text

    soup = BeautifulSoup(r_html, 'html.parser')

    with open('html_to_text.txt', 'w') as open_file:
        # get all divs with CSS class 'grid--item body body__container article__body grid-layout__content'
        # then iterate through each paragraph in each div, format and print to file.
        for stuff in soup.find_all('div', class_='grid--item body body__container article__body grid-layout__content'):
            for par in stuff.find_all('p'):
                text = list(par.strings)
                open_file.write(tr.fill(" ".join(text), 72) + '\n')
	from bs4 import BeautifulSoup
	import requests as req
	import textwrap as tr

	# Iterates through all paragraph tags containing article content
	# and formats content such that paragraphs are separated and all
	# lines are no more than 72 characters in width. The .strings
	# attribute of BS4 Tags returns a generator containing all text
	# children of the given tag. In order to format the code
	# using string.join, the generator needed to be converted to a
	# list.


	if __name__ == '__main__':

	url = 'http://www.vanityfair.com/society/2014/06/monica-lewinsky-humiliation-culture'
	r = req.get(url)
	r_html = r.text

	soup = BeautifulSoup(r_html, 'html.parser')

	with open('html_to_text.txt', 'w') as open_file:
	# get all divs with CSS class 'grid--item body body__container article__body grid-layout__content'
	# then iterate through each paragraph in each div, format and print to file.
	for stuff in soup.find_all('div', class_='grid--item body body__container article__body grid-layout__content'):
	for par in stuff.find_all('p'):
	text = list(par.strings)
	open_file.write(tr.fill(" ".join(text), 72) + '\n')
No results found