mateisuica · September 11, 2017 18:04
diff --git a/gistfile1.txt b/gistfile1.txt
 from bs4 import BeautifulSoup

 def parseContent( content ):
     # parse the html using beautiful soap and store in variable `soup`
    soup = BeautifulSoup(content, 'html.parser')

    # Take out the <div> of name and get its value
    content = soup.find_all(['h1','h2','h3','h4','h5', 'p','a'])

    text = ""
    for tag in content:
        if tag.string is not None:
            text = text + " " + tag.string
    return text
	from bs4 import BeautifulSoup

	def parseContent( content ):
	# parse the html using beautiful soap and store in variable `soup`
	soup = BeautifulSoup(content, 'html.parser')

	# Take out the <div> of name and get its value
	content = soup.find_all(['h1','h2','h3','h4','h5', 'p','a'])

	text = ""
	for tag in content:
	if tag.string is not None:
	text = text + " " + tag.string
	return text
No results found