Created
August 30, 2024 12:30
-
-
Save huynhbaoan/7f4a90ac8a3489ed300f92e3205a2502 to your computer and use it in GitHub Desktop.
Code to handle small tasks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import csv | |
import requests | |
# URL of the static HTML page | |
url = 'https://example.com/static-page.html' # Replace with your actual URL | |
# Fetch the page content | |
response = requests.get(url) | |
# Check if the request was successful | |
if response.status_code == 200: | |
# Parse the HTML content | |
soup = BeautifulSoup(response.content, 'html.parser') | |
# Open a CSV file to write the output | |
with open('output.csv', 'w', newline='') as csvfile: | |
csvwriter = csv.writer(csvfile) | |
# Write header row (optional) | |
csvwriter.writerow(['Account ID', 'Environment', 'Description', 'URL']) | |
# Loop through each table row (tr) and discard the first row | |
for index, tr in enumerate(soup.find_all('tr')): | |
if index == 0: | |
continue # Skip the first row | |
# Extract data from specific columns (td elements) | |
columns = tr.find_all('td') | |
if len(columns) >= 7: # Ensure there are at least 7 columns | |
account_id = columns[0].text.strip() | |
environment = columns[1].text.strip() | |
description = columns[2].text.strip() | |
url = columns[6].find('a')['href'] if columns[6].find('a') else '' | |
# Write the extracted data to the CSV file | |
csvwriter.writerow([account_id, environment, description, url]) | |
print("CSV file has been created successfully.") | |
else: | |
print(f"Failed to retrieve the page. Status code: {response.status_code}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment