bjornblissing · October 9, 2025 09:35
diff --git a/lunch.py b/lunch.py
 #!/usr/bin/env python3

 import requests
 from bs4 import BeautifulSoup
 import re
 from datetime import date
 from PIL import Image
 import easyocr
 import io
 import warnings
 import os

 # Suppress EasyOCR GPU warnings
 os.environ['EASYOCR_VERBOSE'] = '0'
 warnings.filterwarnings("ignore", category=UserWarning, module="torch")


 def strip_trailing_empty_lines(s: str) -> str:
    lines = s.splitlines()
    while lines and lines[-1].strip() == "":
        lines.pop()
    return "\n".join(lines)


 def laluna():
    print("\n=============")
    print("La Luna")
    print("=============")

    base_url = "http://www.lalunat1.se"

    # First, get the main page to find the lunch link
    try:
        response = requests.get(base_url)
        response.raise_for_status()
    except requests.RequestException as e:
        print(f"Error fetching main page: {e}")
        return

    soup = BeautifulSoup(response.content, "html.parser")

    # Find the lunch link in the navigation
    lunch_link = None

    # Look for links containing "Lunch" text
    for link in soup.find_all("a", href=True):
        if link.get_text(strip=True).lower() == "lunch":
            lunch_link = link["href"]
            break

    if not lunch_link:
        print("Could not find lunch page link.")
        return

    # Construct full URL if it's a relative link
    if lunch_link.startswith("/"):
        lunch_url = base_url + lunch_link
    elif lunch_link.startswith("http"):
        lunch_url = lunch_link
    else:
        lunch_url = base_url + "/" + lunch_link

    # Now get the lunch page
    try:
        response = requests.get(lunch_url)
        response.raise_for_status()
    except requests.RequestException as e:
        print(f"Error fetching lunch page: {e}")
        return

    soup = BeautifulSoup(response.content, "html.parser")

    # First, handle double <br> tags as dish separators
    # Convert <br><br> patterns to dish separators, but only if followed by a capital letter
    html_content = str(soup)
    html_content = re.sub(r"<br[^>]*>\s*<br[^>]*>\s*([A-ZÅÄÖ])", r"__DISH_SEPARATOR__\1", html_content, flags=re.IGNORECASE)
    soup = BeautifulSoup(html_content, "html.parser")

    # Replace remaining single <br> tags with spaces
    for br in soup.find_all(["br", "br/"]):
        br.replace_with(" ")

    # Also handle self-closing br tags and other potential line break elements
    for element in soup.find_all():
        if element.name in ["br", "hr"] or (element.name and element.name.lower() == "br"):
            element.replace_with(" ")

    # Replace dish separator spans with a special marker
    # Look for spans with class "wysiwyg-font-size-24" that contain a <b> tag
    for span in soup.find_all("span", class_="wysiwyg-font-size-24"):
        if span.find("b"):  # Only if it contains a <b> tag
            # Check if the span text starts with a capital letter
            span_text = span.get_text(strip=True)
            if span_text and span_text[0].isupper():
                span.insert_before("__DISH_SEPARATOR__")
                # Keep the span content but mark it as a new dish

    # Try different selectors to find content
    selectors = [
        "div[class*='widget-content-']",
        ".widget-content",
        "div[class*='content']",
        "p", "div"
    ]

    lunch_text = ""

    for selector in selectors:
        elements = soup.select(selector)
        for element in elements:
            # Get text with separator to handle missing spaces between elements
            text = element.get_text(separator=" ", strip=True)
            if text and any(keyword in text.lower() for keyword in ["vecka", "måndag", "tisdag", "onsdag", "torsdag", "fredag", "dagens lunch", "special", "vegetarisk"]):
                if text not in lunch_text:  # Avoid duplicates
                    lunch_text += text + "\n"

        if lunch_text.strip():  # If we found content, break
            break

    if not lunch_text.strip():
        print("Could not find lunch menu content.")
        return

    # Clean up string
    replacements = {
        u"\u00A0": " ",  # non-breaking space
        u"\u200B": " ",  # zero width space
    }

    for pattern, repl in replacements.items():
        lunch_text = lunch_text.replace(pattern, repl)

    # First, handle the dish separators
    lunch_text = re.sub(r"__DISH_SEPARATOR__", r"\n__SECOND_DISH__", lunch_text)

    # Collapse lines with misplaced line breaks, but preserve important separators
    lunch_text = re.sub(r"(\S)[ \t]*\n[ \t]*(?!__SECOND_DISH__|Måndag|Tisdag|Onsdag|Torsdag|Fredag|Veckans)(\S)", r"\1 \2", lunch_text)
    lunch_text = re.sub(r"[ \t]{2,}", " ", lunch_text)
    lunch_text = re.sub(r"^[ ]+", "", lunch_text, flags=re.MULTILINE)

    # Normalize headers with a single newline after each day header
    day_headers = [
        "Måndag", "Tisdag", "Onsdag", "Torsdag", "Fredag",
        "Veckans sallad", "Veckans special", "Veckans vegetariska"
    ]

    for header in day_headers:
        lunch_text = re.sub(fr"{header}[\s:]*", f"\n{header}\n", lunch_text, flags=re.IGNORECASE)

    # Add new line after week number
    lunch_text = re.sub(r"(Vecka \d+)", r"\1\n", lunch_text, flags=re.IGNORECASE)

    # Add newlines after "Dagens lunch"
    lunch_text = re.sub(r"(Dagens lunch)", r"\1\n", lunch_text, flags=re.IGNORECASE)

    # Clean up extra line breaks that might have been introduced
    lunch_text = re.sub(r"\n\s*\n\s*__SECOND_DISH__", r"\n__SECOND_DISH__", lunch_text)

    # Split additional info at the end and remove "Luncha med oss!"
    lunch_text = re.sub(r"(vegetarisk|sallad)(.*?)(Luncha med oss!)", r"\1\2", lunch_text, flags=re.IGNORECASE)
    lunch_text = re.sub(r"(ris) (Luncha med oss!)", r"\1", lunch_text, flags=re.IGNORECASE)
    lunch_text = re.sub(r"(Luncha med oss!)(.*?)(Alla ordinarie)", r"\n\3", lunch_text, flags=re.IGNORECASE)
    lunch_text = re.sub(r"(lunchmenyn)(.*?)(Vi serverar)", r"\1\n\3", lunch_text, flags=re.IGNORECASE)

    # Add line break before "Alla ordinarie pizzor" if it doesn't already have one
    lunch_text = re.sub(r"([^\n])(Alla ordinarie)", r"\1\n\2", lunch_text, flags=re.IGNORECASE)

    # Insert a tab character before each non-day header line
    lunch_text = "".join([s for s in lunch_text.splitlines(True) if s.strip("\r\n")])

    # Split into lines and process each line for consistent indentation
    lines = lunch_text.split('\n')
    processed_lines = []

    for line in lines:
        line = line.strip()
        if line:
            # Skip the "Luncha med oss!" line
            if line == "Luncha med oss!":
                continue
            # Check if this is a header line (should not be indented)
            elif re.match(r'^(Måndag|Tisdag|Onsdag|Torsdag|Fredag|Veckans sallad|Veckans special|Veckans vegetariska|Vecka \d+|Dagens lunch)$', line):
                processed_lines.append(line)
            elif line.startswith('__SECOND_DISH__'):
                # Handle second dish marker - replace with tab and add any remaining text
                remaining_text = line.replace('__SECOND_DISH__', '').strip()
                if remaining_text:
                    processed_lines.append(f'\t{remaining_text}')
                else:
                    processed_lines.append('\t')
            else:
                # This should be indented content
                if not line.startswith('\t'):
                    processed_lines.append(f'\t{line}')
                else:
                    processed_lines.append(line)

    lunch_text = '\n'.join(processed_lines)

    lunch_text = strip_trailing_empty_lines(lunch_text)
    print(lunch_text)


 def get_lazlos_week_menu(week_tag):
    lunch_menu = week_tag.find_next(id=re.compile(r'^fdm-menu-\d+$'))
    
    if not lunch_menu:
        print("Could not find lunch menu on the page.")
        return

    days = lunch_menu.find_all("ul")

    for day in days:
        day_name_tag = day.find("h3")
        if day_name_tag:
            print(f"\n{day_name_tag.get_text(strip=True)}")
        
        food_items = day.find_all("li", class_="fdm-item")
        for item in food_items:
            food_title_tag = item.find("p", class_="fdm-item-title")
            if food_title_tag:
                print(f"\t{food_title_tag.get_text(strip=True)}")
            
            food_content_div = item.find("div", class_="fdm-item-content")
            if food_content_div:
                subtitles = food_content_div.find_all("p")
                for subtitle in subtitles:
                    subtitle_text = subtitle.get_text(strip=True)
                    if subtitle_text:
                        print(f"\t  {subtitle_text}")

 def laszlos_krog():
    print("\n=============")
    print("Laszlos Krog")
    print("=============")

    url = "https://www.laszloskrog.se/ebbepark/"
    
    try:
        response = requests.get(url)
        response.raise_for_status()
    except requests.RequestException as e:
        print(f"Error fetching page: {e}")
        return

    soup = BeautifulSoup(response.content, "html.parser")
    week_tags = soup.find_all("p", class_="vecka")

    current_week_num = date.today().isocalendar().week

    for week_tag in week_tags:
        strong_tag = week_tag.find("strong")
        if not strong_tag:
            continue

        week_text = strong_tag.get_text(strip=True)
        match = re.match(r"VECKA\s+(\d+)", week_text, flags=re.IGNORECASE)

        if match:
            week_num = int(match.group(1))
            if week_num == current_week_num:
                get_lazlos_week_menu(week_tag)
                return  # Stop after finding the correct week

    print("Current week's menu not found.")


 def don_luigi():
    print("\n=============")
    print("Don Luigi")
    print("=============")

    url = "https://www.donluigi.se/lunchmeny/"

    try:
        response = requests.get(url)
        response.raise_for_status()
    except requests.RequestException as e:
        print(f"Error fetching page: {e}")
        return

    soup = BeautifulSoup(response.content, "html.parser")

    # Find the entry-content div
    entry_content = soup.find("div", class_="entry-content")
    if not entry_content:
        print("Could not find entry-content div.")
        return

    # Find the image within the entry-content div
    img_tag = entry_content.find("img")
    if not img_tag:
        print("Could not find image in entry-content div.")
        return

    img_src = img_tag.get("src")
    if not img_src:
        print("Could not find image source.")
        return

    # Make sure we have the full URL
    if img_src.startswith("//"):
        img_src = "https:" + img_src
    elif img_src.startswith("/"):
        img_src = "https://www.donluigi.se" + img_src

    try:
        # Download the image
        img_response = requests.get(img_src)
        img_response.raise_for_status()

        # Open the image with PIL
        image = Image.open(io.BytesIO(img_response.content))

        # Use EasyOCR to extract text from the image
        # Initialize EasyOCR reader with Swedish and English languages
        # Suppress the GPU warning by redirecting stdout temporarily
        import sys
        from io import StringIO

        old_stdout = sys.stdout
        sys.stdout = StringIO()
        try:
            reader = easyocr.Reader(['sv', 'en'], verbose=False)  # Swedish and English
        finally:
            sys.stdout = old_stdout

        # Extract text from the image
        results = reader.readtext(img_response.content)

        # Combine all detected text
        text_parts = []
        for (bbox, text, confidence) in results:
            if confidence > 0.5:  # Only include text with reasonable confidence
                text_parts.append(text)

        text = ' '.join(text_parts)

        if not text.strip():
            print("Could not extract text from the menu image.")
            return

        # Clean up the extracted text
        # Replace multiple spaces with single spaces and clean up
        text = re.sub(r'\s+', ' ', text.strip())

        # Remove everything from "*Det går bra att beställa" onwards
        after_work_pos = text.find('*Det går bra att beställa')
        if after_work_pos != -1:
            text = text[:after_work_pos].strip()

        # Fix common OCR artifacts
        text = text.replace('_', ', ')  # OCR sometimes reads commas as underscores
        #text = text.replace('írån', 'från')  # Fix Swedish characters
        #text = text.replace('Falaíel', 'Falafel')  # Fix OCR error
        #text = text.replace('49krlextra', '49kr extra')  # Fix price formatting

        # Try to identify and format different sections
        formatted_text = text

        # Remove prices (3 or more digits)
        formatted_text = re.sub(r'\b\d{3,}\b', '', formatted_text)

        # Add line breaks before key sections
        formatted_text = re.sub(r'(LUNCHMENY|Veckans|Pasta|Sallad|Husman)', r'\n\1', formatted_text)
        formatted_text = re.sub(r'(MAN - FRE|MÅN FRE)', r'\n\1', formatted_text)

        # Add line breaks after specific headers and indent the following content
        formatted_text = re.sub(r'(Veckans Pinsa)\s+([A-ZÅÄÖ])', r'\1\n\t\2', formatted_text)
        formatted_text = re.sub(r'(Pasta)\s+(Pasta|[A-ZÅÄÖ])', r'\1\n\t\2', formatted_text)
        formatted_text = re.sub(r'(Sallad)\s+([A-ZÅÄÖ])', r'\1\n\t\2', formatted_text)
        formatted_text = re.sub(r'(Husman)\s+([A-ZÅÄÖ])', r'\1\n\t\2', formatted_text)

        # Clean up multiple newlines
        formatted_text = re.sub(r'\n\s*\n', '\n', formatted_text)

        # Split into lines for further processing
        lines = formatted_text.split('\n')
        final_lines = []

        for line in lines:
            line = line.strip()
            if line:
                # Check if this is a main section header (should not be indented)
                if any(keyword in line.upper() for keyword in ['LUNCHMENY', 'MAN - FRE', 'MÅN FRE']):
                    # This is a main section header
                    if final_lines:  # Add spacing before headers (except first)
                        final_lines.append("")
                    final_lines.append(line)
                # Check if this is a menu item header (should not be indented)
                elif line in ['Veckans Pinsa', 'Pasta', 'Sallad', 'Husman']:
                    if final_lines:  # Add spacing before headers (except first)
                        final_lines.append("")
                    final_lines.append(line)
                else:
                    # Regular text, indent if not already indented
                    if not line.startswith('\t'):
                        final_lines.append(f"\t{line}")
                    else:
                        final_lines.append(line)

        final_text = '\n'.join(final_lines)

        if final_text.strip():
            print(final_text)
        else:
            print("No readable menu content found in the image.")

    except requests.RequestException as e:
        print(f"Error downloading image: {e}")
    except Exception as e:
        print(f"Error processing image: {e}")


 if __name__ == "__main__":
    laszlos_krog()
    laluna()
    don_luigi()
	#!/usr/bin/env python3

	import requests
	from bs4 import BeautifulSoup
	import re
	from datetime import date
	from PIL import Image
	import easyocr
	import io
	import warnings
	import os

	# Suppress EasyOCR GPU warnings
	os.environ['EASYOCR_VERBOSE'] = '0'
	warnings.filterwarnings("ignore", category=UserWarning, module="torch")


	def strip_trailing_empty_lines(s: str) -> str:
	lines = s.splitlines()
	while lines and lines[-1].strip() == "":
	lines.pop()
	return "\n".join(lines)


	def laluna():
	print("\n=============")
	print("La Luna")
	print("=============")

	base_url = "http://www.lalunat1.se"

	# First, get the main page to find the lunch link
	try:
	response = requests.get(base_url)
	response.raise_for_status()
	except requests.RequestException as e:
	print(f"Error fetching main page: {e}")
	return

	soup = BeautifulSoup(response.content, "html.parser")

	# Find the lunch link in the navigation
	lunch_link = None

	# Look for links containing "Lunch" text
	for link in soup.find_all("a", href=True):
	if link.get_text(strip=True).lower() == "lunch":
	lunch_link = link["href"]
	break

	if not lunch_link:
	print("Could not find lunch page link.")
	return

	# Construct full URL if it's a relative link
	if lunch_link.startswith("/"):
	lunch_url = base_url + lunch_link
	elif lunch_link.startswith("http"):
	lunch_url = lunch_link
	else:
	lunch_url = base_url + "/" + lunch_link

	# Now get the lunch page
	try:
	response = requests.get(lunch_url)
	response.raise_for_status()
	except requests.RequestException as e:
	print(f"Error fetching lunch page: {e}")
	return

	soup = BeautifulSoup(response.content, "html.parser")

	# First, handle double <br> tags as dish separators
	# Convert <br><br> patterns to dish separators, but only if followed by a capital letter
	html_content = str(soup)
	html_content = re.sub(r"<br[^>]>\s<br[^>]>\s([A-ZÅÄÖ])", r"__DISH_SEPARATOR__\1", html_content, flags=re.IGNORECASE)
	soup = BeautifulSoup(html_content, "html.parser")

	# Replace remaining single <br> tags with spaces
	for br in soup.find_all(["br", "br/"]):
	br.replace_with(" ")

	# Also handle self-closing br tags and other potential line break elements
	for element in soup.find_all():
	if element.name in ["br", "hr"] or (element.name and element.name.lower() == "br"):
	element.replace_with(" ")

	# Replace dish separator spans with a special marker
	# Look for spans with class "wysiwyg-font-size-24" that contain a <b> tag
	for span in soup.find_all("span", class_="wysiwyg-font-size-24"):
	if span.find("b"): # Only if it contains a <b> tag
	# Check if the span text starts with a capital letter
	span_text = span.get_text(strip=True)
	if span_text and span_text[0].isupper():
	span.insert_before("__DISH_SEPARATOR__")
	# Keep the span content but mark it as a new dish

	# Try different selectors to find content
	selectors = [
	"div[class*='widget-content-']",
	".widget-content",
	"div[class*='content']",
	"p", "div"
	]

	lunch_text = ""

	for selector in selectors:
	elements = soup.select(selector)
	for element in elements:
	# Get text with separator to handle missing spaces between elements
	text = element.get_text(separator=" ", strip=True)
	if text and any(keyword in text.lower() for keyword in ["vecka", "måndag", "tisdag", "onsdag", "torsdag", "fredag", "dagens lunch", "special", "vegetarisk"]):
	if text not in lunch_text: # Avoid duplicates
	lunch_text += text + "\n"

	if lunch_text.strip(): # If we found content, break
	break

	if not lunch_text.strip():
	print("Could not find lunch menu content.")
	return

	# Clean up string
	replacements = {
	u"\u00A0": " ", # non-breaking space
	u"\u200B": " ", # zero width space
	}

	for pattern, repl in replacements.items():
	lunch_text = lunch_text.replace(pattern, repl)

	# First, handle the dish separators
	lunch_text = re.sub(r"__DISH_SEPARATOR__", r"\n__SECOND_DISH__", lunch_text)

	# Collapse lines with misplaced line breaks, but preserve important separators
	lunch_text = re.sub(r"(\S)[ \t]\n[ \t](?!__SECOND_DISH__\|Måndag\|Tisdag\|Onsdag\|Torsdag\|Fredag\|Veckans)(\S)", r"\1 \2", lunch_text)
	lunch_text = re.sub(r"[ \t]{2,}", " ", lunch_text)
	lunch_text = re.sub(r"^[ ]+", "", lunch_text, flags=re.MULTILINE)

	# Normalize headers with a single newline after each day header
	day_headers = [
	"Måndag", "Tisdag", "Onsdag", "Torsdag", "Fredag",
	"Veckans sallad", "Veckans special", "Veckans vegetariska"
	]

	for header in day_headers:
	lunch_text = re.sub(fr"{header}[\s:]*", f"\n{header}\n", lunch_text, flags=re.IGNORECASE)

	# Add new line after week number
	lunch_text = re.sub(r"(Vecka \d+)", r"\1\n", lunch_text, flags=re.IGNORECASE)

	# Add newlines after "Dagens lunch"
	lunch_text = re.sub(r"(Dagens lunch)", r"\1\n", lunch_text, flags=re.IGNORECASE)

	# Clean up extra line breaks that might have been introduced
	lunch_text = re.sub(r"\n\s\n\s__SECOND_DISH__", r"\n__SECOND_DISH__", lunch_text)

	# Split additional info at the end and remove "Luncha med oss!"
	lunch_text = re.sub(r"(vegetarisk\|sallad)(.*?)(Luncha med oss!)", r"\1\2", lunch_text, flags=re.IGNORECASE)
	lunch_text = re.sub(r"(ris) (Luncha med oss!)", r"\1", lunch_text, flags=re.IGNORECASE)
	lunch_text = re.sub(r"(Luncha med oss!)(.*?)(Alla ordinarie)", r"\n\3", lunch_text, flags=re.IGNORECASE)
	lunch_text = re.sub(r"(lunchmenyn)(.*?)(Vi serverar)", r"\1\n\3", lunch_text, flags=re.IGNORECASE)

	# Add line break before "Alla ordinarie pizzor" if it doesn't already have one
	lunch_text = re.sub(r"([^\n])(Alla ordinarie)", r"\1\n\2", lunch_text, flags=re.IGNORECASE)

	# Insert a tab character before each non-day header line
	lunch_text = "".join([s for s in lunch_text.splitlines(True) if s.strip("\r\n")])

	# Split into lines and process each line for consistent indentation
	lines = lunch_text.split('\n')
	processed_lines = []

	for line in lines:
	line = line.strip()
	if line:
	# Skip the "Luncha med oss!" line
	if line == "Luncha med oss!":
	continue
	# Check if this is a header line (should not be indented)
	elif re.match(r'^(Måndag\|Tisdag\|Onsdag\|Torsdag\|Fredag\|Veckans sallad\|Veckans special\|Veckans vegetariska\|Vecka \d+\|Dagens lunch)$', line):
	processed_lines.append(line)
	elif line.startswith('__SECOND_DISH__'):
	# Handle second dish marker - replace with tab and add any remaining text
	remaining_text = line.replace('__SECOND_DISH__', '').strip()
	if remaining_text:
	processed_lines.append(f'\t{remaining_text}')
	else:
	processed_lines.append('\t')
	else:
	# This should be indented content
	if not line.startswith('\t'):
	processed_lines.append(f'\t{line}')
	else:
	processed_lines.append(line)

	lunch_text = '\n'.join(processed_lines)

	lunch_text = strip_trailing_empty_lines(lunch_text)
	print(lunch_text)


	def get_lazlos_week_menu(week_tag):
	lunch_menu = week_tag.find_next(id=re.compile(r'^fdm-menu-\d+$'))

	if not lunch_menu:
	print("Could not find lunch menu on the page.")
	return

	days = lunch_menu.find_all("ul")

	for day in days:
	day_name_tag = day.find("h3")
	if day_name_tag:
	print(f"\n{day_name_tag.get_text(strip=True)}")

	food_items = day.find_all("li", class_="fdm-item")
	for item in food_items:
	food_title_tag = item.find("p", class_="fdm-item-title")
	if food_title_tag:
	print(f"\t{food_title_tag.get_text(strip=True)}")

	food_content_div = item.find("div", class_="fdm-item-content")
	if food_content_div:
	subtitles = food_content_div.find_all("p")
	for subtitle in subtitles:
	subtitle_text = subtitle.get_text(strip=True)
	if subtitle_text:
	print(f"\t {subtitle_text}")

	def laszlos_krog():
	print("\n=============")
	print("Laszlos Krog")
	print("=============")

	url = "https://www.laszloskrog.se/ebbepark/"

	try:
	response = requests.get(url)
	response.raise_for_status()
	except requests.RequestException as e:
	print(f"Error fetching page: {e}")
	return

	soup = BeautifulSoup(response.content, "html.parser")
	week_tags = soup.find_all("p", class_="vecka")

	current_week_num = date.today().isocalendar().week

	for week_tag in week_tags:
	strong_tag = week_tag.find("strong")
	if not strong_tag:
	continue

	week_text = strong_tag.get_text(strip=True)
	match = re.match(r"VECKA\s+(\d+)", week_text, flags=re.IGNORECASE)

	if match:
	week_num = int(match.group(1))
	if week_num == current_week_num:
	get_lazlos_week_menu(week_tag)
	return # Stop after finding the correct week

	print("Current week's menu not found.")


	def don_luigi():
	print("\n=============")
	print("Don Luigi")
	print("=============")

	url = "https://www.donluigi.se/lunchmeny/"

	try:
	response = requests.get(url)
	response.raise_for_status()
	except requests.RequestException as e:
	print(f"Error fetching page: {e}")
	return

	soup = BeautifulSoup(response.content, "html.parser")

	# Find the entry-content div
	entry_content = soup.find("div", class_="entry-content")
	if not entry_content:
	print("Could not find entry-content div.")
	return

	# Find the image within the entry-content div
	img_tag = entry_content.find("img")
	if not img_tag:
	print("Could not find image in entry-content div.")
	return

	img_src = img_tag.get("src")
	if not img_src:
	print("Could not find image source.")
	return

	# Make sure we have the full URL
	if img_src.startswith("//"):
	img_src = "https:" + img_src
	elif img_src.startswith("/"):
	img_src = "https://www.donluigi.se" + img_src

	try:
	# Download the image
	img_response = requests.get(img_src)
	img_response.raise_for_status()

	# Open the image with PIL
	image = Image.open(io.BytesIO(img_response.content))

	# Use EasyOCR to extract text from the image
	# Initialize EasyOCR reader with Swedish and English languages
	# Suppress the GPU warning by redirecting stdout temporarily
	import sys
	from io import StringIO

	old_stdout = sys.stdout
	sys.stdout = StringIO()
	try:
	reader = easyocr.Reader(['sv', 'en'], verbose=False) # Swedish and English
	finally:
	sys.stdout = old_stdout

	# Extract text from the image
	results = reader.readtext(img_response.content)

	# Combine all detected text
	text_parts = []
	for (bbox, text, confidence) in results:
	if confidence > 0.5: # Only include text with reasonable confidence
	text_parts.append(text)

	text = ' '.join(text_parts)

	if not text.strip():
	print("Could not extract text from the menu image.")
	return

	# Clean up the extracted text
	# Replace multiple spaces with single spaces and clean up
	text = re.sub(r'\s+', ' ', text.strip())

	# Remove everything from "*Det går bra att beställa" onwards
	after_work_pos = text.find('*Det går bra att beställa')
	if after_work_pos != -1:
	text = text[:after_work_pos].strip()

	# Fix common OCR artifacts
	text = text.replace('_', ', ') # OCR sometimes reads commas as underscores
	#text = text.replace('írån', 'från') # Fix Swedish characters
	#text = text.replace('Falaíel', 'Falafel') # Fix OCR error
	#text = text.replace('49krlextra', '49kr extra') # Fix price formatting

	# Try to identify and format different sections
	formatted_text = text

	# Remove prices (3 or more digits)
	formatted_text = re.sub(r'\b\d{3,}\b', '', formatted_text)

	# Add line breaks before key sections
	formatted_text = re.sub(r'(LUNCHMENY\|Veckans\|Pasta\|Sallad\|Husman)', r'\n\1', formatted_text)
	formatted_text = re.sub(r'(MAN - FRE\|MÅN FRE)', r'\n\1', formatted_text)

	# Add line breaks after specific headers and indent the following content
	formatted_text = re.sub(r'(Veckans Pinsa)\s+([A-ZÅÄÖ])', r'\1\n\t\2', formatted_text)
	formatted_text = re.sub(r'(Pasta)\s+(Pasta\|[A-ZÅÄÖ])', r'\1\n\t\2', formatted_text)
	formatted_text = re.sub(r'(Sallad)\s+([A-ZÅÄÖ])', r'\1\n\t\2', formatted_text)
	formatted_text = re.sub(r'(Husman)\s+([A-ZÅÄÖ])', r'\1\n\t\2', formatted_text)

	# Clean up multiple newlines
	formatted_text = re.sub(r'\n\s*\n', '\n', formatted_text)

	# Split into lines for further processing
	lines = formatted_text.split('\n')
	final_lines = []

	for line in lines:
	line = line.strip()
	if line:
	# Check if this is a main section header (should not be indented)
	if any(keyword in line.upper() for keyword in ['LUNCHMENY', 'MAN - FRE', 'MÅN FRE']):
	# This is a main section header
	if final_lines: # Add spacing before headers (except first)
	final_lines.append("")
	final_lines.append(line)
	# Check if this is a menu item header (should not be indented)
	elif line in ['Veckans Pinsa', 'Pasta', 'Sallad', 'Husman']:
	if final_lines: # Add spacing before headers (except first)
	final_lines.append("")
	final_lines.append(line)
	else:
	# Regular text, indent if not already indented
	if not line.startswith('\t'):
	final_lines.append(f"\t{line}")
	else:
	final_lines.append(line)

	final_text = '\n'.join(final_lines)

	if final_text.strip():
	print(final_text)
	else:
	print("No readable menu content found in the image.")

	except requests.RequestException as e:
	print(f"Error downloading image: {e}")
	except Exception as e:
	print(f"Error processing image: {e}")


	if __name__ == "__main__":
	laszlos_krog()
	laluna()
	don_luigi()
No results found