geoffreygarrett · March 30, 2022 09:34
diff --git a/scrape_url_for_tables.py b/scrape_url_for_tables.py
 """

 Function to scrape all tables from a URL to pandas DataFrame objects. The
 default URL is the Wikipedia page for Orbital Launch Systems.

 """

 import pandas as pd
 import requests
 from bs4 import BeautifulSoup as bs


 def scrape_url_for_tables(url, html_class="wikitable sortable"):
    """
    Retrieves Comparison of orbital launch systems (by default) from wikipedia
    and stores it in a pandas dataframe.

    Parameters
    ----------
    url : str
        URL of the wikipedia page to scrape.
    html_class
        HTML class of the table to scrape.

    Returns
    -------
    dfs : List[pandas.DataFrame]
        List of Dataframes containing the scraped data.

    """
    # Retrieve the page
    page = requests.get(url).text

    # Create a BeautifulSoup object
    soup = bs(page, "html.parser")

    # Find the tables
    table = soup.find("table", {"class": html_class})

    return pd.read_html(str(table))


 if __name__ == "__main__":
    URL = "https://en.wikipedia.org/wiki/Comparison_of_orbital_launcher_families"
    df = url_tables_to_dataframes(URL)[0]
    print(df)
	"""

	Function to scrape all tables from a URL to pandas DataFrame objects. The
	default URL is the Wikipedia page for Orbital Launch Systems.

	"""

	import pandas as pd
	import requests
	from bs4 import BeautifulSoup as bs


	def scrape_url_for_tables(url, html_class="wikitable sortable"):
	"""
	Retrieves Comparison of orbital launch systems (by default) from wikipedia
	and stores it in a pandas dataframe.

	Parameters
	----------
	url : str
	URL of the wikipedia page to scrape.
	html_class
	HTML class of the table to scrape.

	Returns
	-------
	dfs : List[pandas.DataFrame]
	List of Dataframes containing the scraped data.

	"""
	# Retrieve the page
	page = requests.get(url).text

	# Create a BeautifulSoup object
	soup = bs(page, "html.parser")

	# Find the tables
	table = soup.find("table", {"class": html_class})

	return pd.read_html(str(table))


	if __name__ == "__main__":
	URL = "https://en.wikipedia.org/wiki/Comparison_of_orbital_launcher_families"
	df = url_tables_to_dataframes(URL)[0]
	print(df)