do-me · October 13, 2024 12:12
diff --git a/extract_markdown_table.py b/extract_markdown_table.py
 import pandas as pd
 import re

 def extract_markdown_table(text):
    """
    Extracts a markdown table from a string, removing other markdown elements.

    Args:
        text: The input string containing markdown.

    Returns:
        The extracted markdown table as a string, or an empty string if no table is found.
    """
    if text is None or pd.isna(text):  # Handle missing values
        return ""

    # Use regex to find the markdown table. This regex handles multi-line tables and some variations.
    match = re.search(r"(?:^|\n)([\s\S]*?\n)(\|.*\|\n)((?:\|.*\|\n)+)", text)

    if match:
        # Reconstruct the table with optional header separator
        header_row = match.group(1).strip()  # Extract the header row (which could be blank if no header)

        # Check if there's a proper header separator (--- or ===)
        if re.match(r"^\|(?:[-=]+(?:\|[-=]+)+)\|$", header_row):
            table_content = header_row + "\n" + match.group(2) + match.group(3)
        elif header_row:
            table_content = match.group(2) + match.group(3)  # No header separator, skip the header row
        else:
             table_content = match.group(2) + match.group(3)


        return table_content.strip()
    else:
        return ""
	import pandas as pd
	import re

	def extract_markdown_table(text):
	"""
	Extracts a markdown table from a string, removing other markdown elements.

	Args:
	text: The input string containing markdown.

	Returns:
	The extracted markdown table as a string, or an empty string if no table is found.
	"""
	if text is None or pd.isna(text): # Handle missing values
	return ""

	# Use regex to find the markdown table. This regex handles multi-line tables and some variations.
	match = re.search(r"(?:^\|\n)([\s\S]?\n)(\\|.\\|\n)((?:\\|.*\\|\n)+)", text)

	if match:
	# Reconstruct the table with optional header separator
	header_row = match.group(1).strip() # Extract the header row (which could be blank if no header)

	# Check if there's a proper header separator (--- or ===)
	if re.match(r"^\\|(?:[-=]+(?:\\|[-=]+)+)\\|$", header_row):
	table_content = header_row + "\n" + match.group(2) + match.group(3)
	elif header_row:
	table_content = match.group(2) + match.group(3) # No header separator, skip the header row
	else:
	table_content = match.group(2) + match.group(3)


	return table_content.strip()
	else:
	return ""