Last active
October 3, 2022 09:28
-
-
Save AO8/63b9a5acb9fb238cbed13a0269d14137 to your computer and use it in GitHub Desktop.
Convert an HTML table into a CSV file with Python and BeautifulSoup.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Adapted from example in "Web Scraping with Python, 2nd Edition" by Ran Mitchell. | |
import csv | |
from urllib.request import urlopen | |
from bs4 import BeautifulSoup | |
html = urlopen("http://en.wikipedia.org/wiki/" | |
"Comparison_of_text_editors") | |
soup = BeautifulSoup(html, "html.parser") | |
table = soup.findAll("table", {"class":"wikitable"})[0] | |
rows = table.findAll("tr") | |
with open("editors.csv", "wt+", newline="") as f: | |
writer = csv.writer(f) | |
for row in rows: | |
csv_row = [] | |
for cell in row.findAll(["td", "th"]): | |
csv_row.append(cell.get_text()) | |
writer.writerow(csv_row) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment