patwooky · January 14, 2025 05:32
diff --git a/pyWebScrapeBeautifulSoup.py b/pyWebScrapeBeautifulSoup.py
 from bs4 import BeautifulSoup
 from urllib.request import urlopen

 url = "http://olympus.realpython.org/profiles/dionysus"
 page = urlopen(url)
 html = page.read().decode("utf-8")
 soup = BeautifulSoup(html, "html.parser")
 print(soup.get_text())
diff --git a/PyWebScrapingTest.py b/PyWebScrapingTest.py
 # python 3
 # from realpython.com web scraping tutorial
 # https://realpython.com/python-web-scraping-practical-introduction/

 from urllib.request import urlopen
 import re

 # print(dir(ul))
 # url = "http://olympus.realpython.org/profiles/aphrodite"
 # url = "http://olympus.realpython.org/profiles/poseidon"
 # page = urlopen(url)
 # html_bytes = page.read()
 # html = html_bytes.decode("utf-8")

 # titleStartEnd = [html.find("<title>") + len("<title>"), html.find("</title>")]
 # titleText = html[titleStartEnd[0]:titleStartEnd[1]]
 # # print(titleText)

 # pattern = "<title.*?>.*?</title.*?>"
 # match_results = re.search(pattern, html, re.IGNORECASE)
 # title = match_results.group()
 # title = re.sub("<.*?>", "", title) # Remove HTML tags

 # print(title)

 # Write a program that grabs the full HTML from the following URL:
 url = "http://olympus.realpython.org/profiles/dionysus"
 # Then use .find() to display the text following Name: and Favorite Color: (not including any leading spaces or trailing HTML tags that might appear on the same line).
 page = urlopen(url)
 html_bytes = page.read()
 html = html_bytes.decode("utf-8")
 print(html)
 pattern = r"<h\d>*Name:*(.*?)\s*</h\d>"
 match_results = re.search(pattern, html, re.IGNORECASE)
 print(f'match_results {match_results}')
 print(f'Name: {match_results.group(1).strip()}')
	from bs4 import BeautifulSoup
	from urllib.request import urlopen

	url = "http://olympus.realpython.org/profiles/dionysus"
	page = urlopen(url)
	html = page.read().decode("utf-8")
	soup = BeautifulSoup(html, "html.parser")
	print(soup.get_text())
	# python 3
	# from realpython.com web scraping tutorial
	# https://realpython.com/python-web-scraping-practical-introduction/

	from urllib.request import urlopen
	import re

	# print(dir(ul))
	# url = "http://olympus.realpython.org/profiles/aphrodite"
	# url = "http://olympus.realpython.org/profiles/poseidon"
	# page = urlopen(url)
	# html_bytes = page.read()
	# html = html_bytes.decode("utf-8")

	# titleStartEnd = [html.find("<title>") + len("<title>"), html.find("</title>")]
	# titleText = html[titleStartEnd[0]:titleStartEnd[1]]
	# # print(titleText)

	# pattern = "<title.?>.?</title.*?>"
	# match_results = re.search(pattern, html, re.IGNORECASE)
	# title = match_results.group()
	# title = re.sub("<.*?>", "", title) # Remove HTML tags

	# print(title)

	# Write a program that grabs the full HTML from the following URL:
	url = "http://olympus.realpython.org/profiles/dionysus"
	# Then use .find() to display the text following Name: and Favorite Color: (not including any leading spaces or trailing HTML tags that might appear on the same line).
	page = urlopen(url)
	html_bytes = page.read()
	html = html_bytes.decode("utf-8")
	print(html)
	pattern = r"<h\d>Name:(.?)\s</h\d>"
	match_results = re.search(pattern, html, re.IGNORECASE)
	print(f'match_results {match_results}')
	print(f'Name: {match_results.group(1).strip()}')