victormurcia · September 5, 2022 04:28
diff --git a/get_urls_from_html.py b/get_urls_from_html.py
 # decode downloaded html and extract all <a href=""> links
 def get_urls_from_html(content):
    
    # decode the provided content as ascii text
    html = content.decode('utf-8')
    
    # parse the document as best we can
    soup = BeautifulSoup(html, 'html.parser')
    
    # find all all of the <a href=""> tags in the document
    atags = soup.find_all('a')
    
    # get all links from a tags
    return [tag.get('href') for tag in atags]
	# decode downloaded html and extract all <a href=""> links
	def get_urls_from_html(content):

	# decode the provided content as ascii text
	html = content.decode('utf-8')

	# parse the document as best we can
	soup = BeautifulSoup(html, 'html.parser')

	# find all all of the <a href=""> tags in the document
	atags = soup.find_all('a')

	# get all links from a tags
	return [tag.get('href') for tag in atags]
No results found