Last active
April 28, 2025 11:17
-
-
Save bjornblissing/ee90f045445e61900f0eaa4b21fd9fab to your computer and use it in GitHub Desktop.
Lista dagens luncher i Ebbepark
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import requests | |
from bs4 import BeautifulSoup | |
import re | |
from datetime import date | |
def strip_trailing_empty_lines(s: str) -> str: | |
lines = s.splitlines() | |
while lines and lines[-1].strip() == "": | |
lines.pop() | |
return "\n".join(lines) | |
def laluna(): | |
print("\n=============") | |
print("La Luna") | |
print("=============") | |
base_url = "http://www.lalunat1.se" | |
try: | |
response = requests.get(base_url) | |
response.raise_for_status() | |
except requests.RequestException as e: | |
print(f"Error fetching main page: {e}") | |
return | |
soup = BeautifulSoup(response.content, "html.parser") | |
nav = soup.find(id="nav") | |
if not nav: | |
print("Could not find navigation section.") | |
return | |
lunch_links = nav.find_all("a", string="Lunch") | |
lunch_text = "" | |
for link in lunch_links: | |
lunch_url = base_url + link["href"] | |
try: | |
lunch_response = requests.get(lunch_url) | |
lunch_response.raise_for_status() | |
except requests.RequestException as e: | |
print(f"Error fetching lunch page: {e}") | |
continue | |
lunch_soup = BeautifulSoup(lunch_response.content, "html.parser") | |
text_blocks = lunch_soup.find_all("div", class_="text_content") | |
for block in text_blocks: | |
for p in block.find_all("p"): | |
for strong in p.find_all("strong"): | |
lunch_text += strong.get_text() | |
lunch_text += "\n" | |
# Clean up string | |
replacements = { | |
u"\u00A0": " ", # non-breaking space | |
u"\u200B": " ", # zero width space | |
} | |
for pattern, repl in replacements.items(): | |
lunch_text = lunch_text.replace(pattern, repl) | |
# Collapse lines with misplaced line breaks | |
lunch_text = re.sub(r"(\S)[ \t]*\n[ \t]*(\S)", r"\1 \2", lunch_text) | |
lunch_text = re.sub(r"[ \t]{2,}", " ", lunch_text) | |
lunch_text = re.sub(r"^[ ]+", "", lunch_text, flags=re.MULTILINE) | |
# Normalize headers with a single newline after each day header | |
day_headers = [ | |
"Måndag", "Tisdag", "Onsdag", "Torsdag", "Fredag", | |
"Veckans sallad", "Veckans special", "Veckans vegetariska" | |
] | |
for header in day_headers: | |
lunch_text = re.sub(fr"{header}[\s:]*", f"{header}\n", lunch_text, flags=re.IGNORECASE) | |
# Add new line after week number | |
lunch_text = re.sub(r"(Vecka \d+)", r"\1\n", lunch_text, flags=re.IGNORECASE) | |
# Insert a tab character before each non-day header line | |
lunch_text = "".join([s for s in lunch_text.splitlines(True) if s.strip("\r\n")]) | |
lunch_text = re.sub(r"^(?!Måndag|Tisdag|Onsdag|Torsdag|Fredag|Veckans sallad|Veckans special|Veckans vegetariska|Vecka \d+)(.*)", r"\t\1", lunch_text, flags=re.MULTILINE) | |
lunch_text = strip_trailing_empty_lines(lunch_text) | |
print(lunch_text) | |
def get_lazlos_week_menu(week_tag): | |
lunch_menu = week_tag.find_next(id=re.compile(r'^fdm-menu-\d+$')) | |
if not lunch_menu: | |
print("Could not find lunch menu on the page.") | |
return | |
days = lunch_menu.find_all("ul") | |
for day in days: | |
day_name_tag = day.find("h3") | |
if day_name_tag: | |
print(f"\n{day_name_tag.get_text(strip=True)}") | |
food_items = day.find_all("li", class_="fdm-item") | |
for item in food_items: | |
food_title_tag = item.find("p", class_="fdm-item-title") | |
if food_title_tag: | |
print(f"\t{food_title_tag.get_text(strip=True)}") | |
food_content_div = item.find("div", class_="fdm-item-content") | |
if food_content_div: | |
subtitles = food_content_div.find_all("p") | |
for subtitle in subtitles: | |
subtitle_text = subtitle.get_text(strip=True) | |
if subtitle_text: | |
print(f"\t {subtitle_text}") | |
def laszlos_krog(): | |
print("\n=============") | |
print("Laszlos Krog") | |
print("=============") | |
url = "https://www.laszloskrog.se/ebbepark/" | |
try: | |
response = requests.get(url) | |
response.raise_for_status() | |
except requests.RequestException as e: | |
print(f"Error fetching page: {e}") | |
return | |
soup = BeautifulSoup(response.content, "html.parser") | |
week_tags = soup.find_all("p", class_="vecka") | |
current_week_num = date.today().isocalendar().week | |
for week_tag in week_tags: | |
strong_tag = week_tag.find("strong") | |
if not strong_tag: | |
continue | |
week_text = strong_tag.get_text(strip=True) | |
match = re.match(r"VECKA\s+(\d+)", week_text, flags=re.IGNORECASE) | |
if match: | |
week_num = int(match.group(1)) | |
if week_num == current_week_num: | |
get_lazlos_week_menu(week_tag) | |
return # Stop after finding the correct week | |
print("Current week's menu not found.") | |
if __name__ == "__main__": | |
laszlos_krog() | |
laluna() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment