martin-martin · March 29, 2021 13:37
diff --git a/rescrape.py b/rescrape.py
 import requests
 from bs4 import BeautifulSoup


 BASE_URL = "https://codingnomads.github.io/recipes/"

 def get_page_content(url):
    """Gets the response from a HTTP call to the URL."""
    page = requests.get(url)
    return page

 def get_html_content(url):
    """Gets the HTML from a page."""
    html = get_page_content(url).text
    return html

 def make_soup(html):
    """Converts an HTML string to a BeautifulSoup object."""
    soup = BeautifulSoup(html, "html.parser")
    return soup

 def get_recipe_links(soup):
    """Extracts the URLs of all links on a page, given a bs4 object."""
    links = [link["href"] for link in soup.find_all("a")]
    return links

 def get_author(soup):
    """Extracts the name of the author of a recipe."""
    author = soup.find("p", class_="author").text.strip("by ")
    return author

 def get_recipe(soup):
    """Extracts the recipe text from a bs4 object."""
    recipe = soup.find("div", class_="md").text
    return recipe


 if __name__ == "__main__":
    index_html = get_html_content(BASE_URL)
    index_soup = make_soup(index_html)
    recipe_links = get_recipe_links(index_soup)

    for r_link in recipe_links:
        URL = f"{BASE_URL}/{r_link}"
        soup = make_soup(get_html_content(URL))
        author = get_author(soup)
        recipe = get_recipe(soup)
        print(f"({author})\t[{recipe}]\n\n\n")
diff --git a/test_rescrape.py b/test_rescrape.py
 import unittest
 import rescrape


 class TestRescrape(unittest.TestCase):

    def setUp(self):
        self.BASE_URL = "https://codingnomads.github.io/recipes/"
        self.url = f"{self.BASE_URL}recipes/11-making-my-own-baguet.html"

    def test_get_valid_html_response(self):
        index_page = rescrape.get_page_content(self.BASE_URL)
        page = rescrape.get_page_content(self.url)
        self.assertEqual(index_page.status_code, 200)
        self.assertEqual(page.status_code, 200)

    def test_get_html_content_returns_html_string(self):
        index_html = rescrape.get_html_content(self.BASE_URL)
        html = rescrape.get_html_content(self.url)
        self.assertIn("<!DOCTYPE html>", index_html)
        self.assertIn("<!DOCTYPE html>", html)

    def test_make_soup_makes_soup(self):
        html = rescrape.get_html_content(self.url)
        soup = rescrape.make_soup(html)
        self.assertEqual("<class 'bs4.BeautifulSoup'>", str(type(soup)))

    def test_get_recipe_links_gets_recipe_links(self):
        index_html = rescrape.get_html_content(self.BASE_URL)
        index_soup = rescrape.make_soup(index_html)
        self.assertGreater(len(rescrape.get_recipe_links(index_soup)), 0)

    def test_get_author_finds_author(self):
        html = rescrape.get_html_content(self.url)
        soup = rescrape.make_soup(html)
        author = rescrape.get_author(soup)
        self.assertNotEqual(len(author), 0)
        self.assertEqual("Jadafaa", author)

    def test_get_recipe_gets_recipe_text(self):
        html = rescrape.get_html_content(self.url)
        soup = rescrape.make_soup(html)
        recipe = rescrape.get_recipe(soup)
        self.assertIsInstance(recipe, str)
        self.assertGreater(len(recipe), 0)


 if __name__ == "__main__":
    unittest.main()
	import requests
	from bs4 import BeautifulSoup


	BASE_URL = "https://codingnomads.github.io/recipes/"

	def get_page_content(url):
	"""Gets the response from a HTTP call to the URL."""
	page = requests.get(url)
	return page

	def get_html_content(url):
	"""Gets the HTML from a page."""
	html = get_page_content(url).text
	return html

	def make_soup(html):
	"""Converts an HTML string to a BeautifulSoup object."""
	soup = BeautifulSoup(html, "html.parser")
	return soup

	def get_recipe_links(soup):
	"""Extracts the URLs of all links on a page, given a bs4 object."""
	links = [link["href"] for link in soup.find_all("a")]
	return links

	def get_author(soup):
	"""Extracts the name of the author of a recipe."""
	author = soup.find("p", class_="author").text.strip("by ")
	return author

	def get_recipe(soup):
	"""Extracts the recipe text from a bs4 object."""
	recipe = soup.find("div", class_="md").text
	return recipe


	if __name__ == "__main__":
	index_html = get_html_content(BASE_URL)
	index_soup = make_soup(index_html)
	recipe_links = get_recipe_links(index_soup)

	for r_link in recipe_links:
	URL = f"{BASE_URL}/{r_link}"
	soup = make_soup(get_html_content(URL))
	author = get_author(soup)
	recipe = get_recipe(soup)
	print(f"({author})\t[{recipe}]\n\n\n")
	import unittest
	import rescrape


	class TestRescrape(unittest.TestCase):

	def setUp(self):
	self.BASE_URL = "https://codingnomads.github.io/recipes/"
	self.url = f"{self.BASE_URL}recipes/11-making-my-own-baguet.html"

	def test_get_valid_html_response(self):
	index_page = rescrape.get_page_content(self.BASE_URL)
	page = rescrape.get_page_content(self.url)
	self.assertEqual(index_page.status_code, 200)
	self.assertEqual(page.status_code, 200)

	def test_get_html_content_returns_html_string(self):
	index_html = rescrape.get_html_content(self.BASE_URL)
	html = rescrape.get_html_content(self.url)
	self.assertIn("<!DOCTYPE html>", index_html)
	self.assertIn("<!DOCTYPE html>", html)

	def test_make_soup_makes_soup(self):
	html = rescrape.get_html_content(self.url)
	soup = rescrape.make_soup(html)
	self.assertEqual("<class 'bs4.BeautifulSoup'>", str(type(soup)))

	def test_get_recipe_links_gets_recipe_links(self):
	index_html = rescrape.get_html_content(self.BASE_URL)
	index_soup = rescrape.make_soup(index_html)
	self.assertGreater(len(rescrape.get_recipe_links(index_soup)), 0)

	def test_get_author_finds_author(self):
	html = rescrape.get_html_content(self.url)
	soup = rescrape.make_soup(html)
	author = rescrape.get_author(soup)
	self.assertNotEqual(len(author), 0)
	self.assertEqual("Jadafaa", author)

	def test_get_recipe_gets_recipe_text(self):
	html = rescrape.get_html_content(self.url)
	soup = rescrape.make_soup(html)
	recipe = rescrape.get_recipe(soup)
	self.assertIsInstance(recipe, str)
	self.assertGreater(len(recipe), 0)


	if __name__ == "__main__":
	unittest.main()