Created
July 16, 2023 22:17
-
-
Save StellarStoic/3c19567b28dfb2ee1d5c748c5cfa0a1d to your computer and use it in GitHub Desktop.
slo_control_scrape
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
from datetime import datetime, timedelta | |
import pytz | |
import re | |
# the URL of the page you want to scrape | |
url = "https://www.sloveniacontrol.si/Strani/Summary-C.aspx" | |
# send a GET request to the URL | |
response = requests.get(url) | |
# get the HTML content of the page | |
html_content = response.text | |
soup = BeautifulSoup(html_content, 'html.parser') | |
# find all elements with the NOTAM class | |
notam_data_elements = soup.find_all(class_="kzps-notam-item") | |
# check if any NOTAM data elements were found | |
if not notam_data_elements: | |
print("No NOTAM data elements found.") | |
else: | |
# get today's date | |
today = datetime.now(pytz.timezone('Europe/Ljubljana')) | |
# initialize a dictionary to store NOTAMs for each day | |
notams_by_day = {today + timedelta(days=i): [] for i in range(7)} | |
# timezone abbreviations mapping | |
tz_mapping = {'EST': 'US/Eastern', 'CET': 'Europe/Paris', 'UTC': 'UTC'} | |
# regular expression for matching date strings | |
date_pattern = re.compile(r'^(PERM|\d{2}\.\d{2}\.\d{4}( \d{2}:\d{2}( EST| CET)?)?)$') | |
# loop through each NOTAM data element | |
for notam_data in notam_data_elements: | |
# extract the NOTAM number | |
notam_number = notam_data.find('h1').get_text() | |
# initialize start_date and end_date to None | |
start_date = None | |
end_date = None | |
# extract the NOTAM details | |
notam_details = notam_data.find_all('p') | |
for detail in notam_details: | |
if detail.find(class_='kzps-notam-item-b') is not None: | |
# check if the text contains a date | |
potential_date_str = detail.find(class_='kzps-notam-item-b').get_text().strip() | |
if potential_date_str.startswith('B)'): # check if the string starts with 'B) ' | |
start_date_str = potential_date_str[3:] # remove the 'B) ' prefix | |
if date_pattern.match(start_date_str): # check if the date string matches the pattern | |
start_date_tz_str = start_date_str.split()[-1] # get the timezone abbreviation | |
start_date_str = ' '.join(start_date_str.split()[:-1]) # remove the timezone abbreviation from the date string | |
if ' ' in start_date_str: # check if time is specified | |
start_date = datetime.strptime(start_date_str, "%d.%m.%Y %H:%M") | |
else: | |
start_date = datetime.strptime(start_date_str, "%d.%m.%Y") | |
start_date = pytz.timezone(tz_mapping.get(start_date_tz_str, 'Europe/Ljubljana')).localize(start_date) | |
if detail.find(class_='kzps-notam-item-c') is not None: | |
end_date_str = detail.find(class_='kzps-notam-item-c').get_text().strip()[3:] # remove the 'C) ' prefix | |
if date_pattern.match(end_date_str): # check if the date string matches the pattern | |
if end_date_str == 'PERM': | |
end_date = start_date + timedelta(hours=48) | |
else: | |
end_date_tz_str = end_date_str.split()[-1] # get the timezone abbreviation | |
end_date_str = ' '.join(end_date_str.split()[:-1]) # remove the timezone abbreviation from the date string | |
if ' ' in end_date_str: # check if time is specified | |
end_date = datetime.strptime(end_date_str, "%d.%m.%Y %H:%M") | |
else: | |
end_date = datetime.strptime(end_date_str, "%d.%m.%Y") | |
end_date = pytz.timezone(tz_mapping.get(end_date_tz_str, 'Europe/Ljubljana')).localize(end_date) | |
# check if the NOTAM is or will be active in the next 7 days | |
for day, notams in notams_by_day.items(): | |
if start_date is not None and end_date is not None and start_date <= day <= end_date: | |
notams.append((notam_number, notam_details)) | |
# print NOTAMs for each day | |
for day, notams in notams_by_day.items(): | |
print("\n\n" + day.strftime('%A:') + "\n") | |
if notams: | |
for notam_number, notam_details in notams: | |
print("----------------------------------------") | |
print(f"NOTAM Number: {notam_number}") | |
for detail in notam_details: | |
print(detail.get_text()) | |
else: | |
print("No NOTAMs. Yay!") | |
print(f"\nStart date: {start_date.strftime('%d.%m.%Y %H:%M')} {start_date.tzinfo}" if start_date else "Start date: None") | |
print(f"End date: {end_date.strftime('%d.%m.%Y %H:%M')} {end_date.tzinfo}" if end_date else "End date: None") | |
print(f"Today's date: {today.strftime('%d.%m.%Y %H:%M')}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment