Last active
May 25, 2024 21:38
-
-
Save therealkenc/46afe6f09da9edde4b55c8ccbf550bf0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import http.client | |
import gzip | |
import io | |
import pandas as pd | |
from bs4 import BeautifulSoup | |
# Define the URL and headers | |
url = 'www.forexfactory.com' | |
if len(sys.argv) > 1: | |
date_range = sys.argv[1] | |
path = f'/calendar?range={date_range}' | |
else: | |
path = '/calendar' | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3', | |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | |
'Accept-Language': 'en-US,en;q=0.5', | |
'Accept-Encoding': 'gzip, deflate, br', | |
'Connection': 'keep-alive', | |
'Upgrade-Insecure-Requests': '1', | |
'Cache-Control': 'max-age=0' | |
} | |
# Create a connection | |
conn = http.client.HTTPSConnection(url) | |
# Make a request | |
conn.request("GET", path, headers=headers) | |
# Get the response | |
response = conn.getresponse() | |
status_code = response.status | |
content = response.read() | |
# Check if the request was successful | |
if status_code == 200: | |
# Handle compression if necessary | |
if response.getheader('Content-Encoding') == 'gzip': | |
buf = io.BytesIO(content) | |
f = gzip.GzipFile(fileobj=buf) | |
content = f.read() | |
# Decode the content | |
content = content.decode('utf-8') | |
# Parse the HTML content | |
soup = BeautifulSoup(content, 'html.parser') | |
# Find the calendar table | |
table = soup.find('table', {'class': 'calendar__table'}) | |
if table: | |
# Find all rows within the table | |
rows = table.find_all('tr', {'class': 'calendar__row'}) | |
# Extract details from each row | |
calendar_data = [] | |
for row in rows: | |
time_element = row.find('td', {'class': 'calendar__time'}) | |
currency_element = row.find('td', {'class': 'calendar__currency'}) | |
event_element = row.find('td', {'class': 'calendar__event'}) | |
if time_element and currency_element and event_element: | |
date_element = row.find_previous('tr', {'class': 'calendar__row--day-breaker'}) | |
date = date_element.text.strip() if date_element else 'N/A' | |
time = time_element.text.strip() | |
currency = currency_element.text.strip() | |
event = event_element.text.strip() | |
calendar_data.append({ | |
'date': date, | |
'time': time, | |
'currency': currency, | |
'event': event | |
}) | |
# Create a pandas DataFrame | |
df = pd.DataFrame(calendar_data) | |
# Print the DataFrame | |
print(df) | |
else: | |
print("Calendar table not found.") | |
else: | |
print(f"Failed to retrieve the webpage. Status code: {status_code}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment