jamescalam · May 4, 2020 12:17
diff --git a/hello_lucilius_letter.py b/hello_lucilius_letter.py
 # create function to pull letter from webpage (pulls text within <p> elements
 def pull_letter(http):

    # get html from webpage given by 'http'
    html = requests.get(http).text
    # parse into a beautiful soup object
    soup = BeautifulSoup(html, "html.parser")

    # build text contents within all p elements
    txt = '\n'.join([x.text for x in soup.find_all('p')])
    # replace extended whitespace with single space
    txt = txt.replace('  ', ' ')
    # replace webpage references ('[1]', '[2]', etc)
    txt = re.sub('\[\d+\]', '', txt)
    # replace all number bullet points that Seneca uses ('1.', '2.', etc)
    txt = re.sub('\d+. ', '', txt)
    # remove double newlines
    txt = txt.replace("\n\n", "\n")
    # and return the result
    return txt
	# create function to pull letter from webpage (pulls text within <p> elements
	def pull_letter(http):

	# get html from webpage given by 'http'
	html = requests.get(http).text
	# parse into a beautiful soup object
	soup = BeautifulSoup(html, "html.parser")

	# build text contents within all p elements
	txt = '\n'.join([x.text for x in soup.find_all('p')])
	# replace extended whitespace with single space
	txt = txt.replace(' ', ' ')
	# replace webpage references ('[1]', '[2]', etc)
	txt = re.sub('\[\d+\]', '', txt)
	# replace all number bullet points that Seneca uses ('1.', '2.', etc)
	txt = re.sub('\d+. ', '', txt)
	# remove double newlines
	txt = txt.replace("\n\n", "\n")
	# and return the result
	return txt
No results found