judell · June 25, 2024 23:28
diff --git a/gdoc-to-markdown-example-1.py b/gdoc-to-markdown-example-1.py
 combined_pattern = re.compile(r'''
    # Image reference
    ^\s*                    # Start of line, followed by optional whitespace
    \[image:\s+             # Literal '[image:', followed by at least one whitespace character
    ([^\]]+?)               # Capture group 1: Image name - one or more characters that are not ']', non-greedy
    \s*                     # Optional whitespace
    (?:                     # Start of non-capturing group for optional width
        (\d+)               # Capture group 2: One or more digits for width
        %                   # Literal '%' character
        \s*                 # Optional whitespace
    )?                      # End of optional non-capturing group
    \]                      # Literal ']' character
    [\s\S]*?                # Any characters (including newlines), non-greedy
    # Corresponding image
    !\[                     # Literal '![' to start markdown image
    .*?                     # Any characters for alt text, non-greedy
    \]                      # Literal ']' to close alt text
    \(                      # Literal '(' to start URL
    (.*?)                   # Capture group 3: URL - any characters, non-greedy
    \)                      # Literal ')' to close URL
 ''', re.MULTILINE | re.VERBOSE)

 def download_and_replace_images(markdown):
    parts = []
    last_end = 0

    for match in combined_pattern.finditer(markdown):
        # Add the text before this match
        parts.append(markdown[last_end:match.start()])

        name = match.group(1)
        width = match.group(2)
        url = match.group(3)  # This is the URL from the actual markdown image

        filename = f"{name}.png"
        sitepath = f"{SITE_PATH_IMAGES}{filename}"

        # Download the image
        download_image(url, filename)

        if width:
            result = f'<p><img alt="{name}" style={{{{"width":"{width}%"}}}} src="{sitepath}"/></p>'
        else:
            result = f'<p><img alt="{name}" src="{sitepath}"/></p>'

        parts.append(result)
        last_end = match.end()

    # Add any remaining text after the last match
    parts.append(markdown[last_end:])

    return ''.join(parts)

 def download_image(url, filename):
    image_path = REPO_PATH_IMAGES + filename
    response = requests.get(url, stream=True)
    response.raise_for_status()
    with open(image_path, 'wb') as file:
        for chunk in response.iter_content(chunk_size=8192):
            file.write(chunk)
    print(url, image_path)
	combined_pattern = re.compile(r'''
	# Image reference
	^\s* # Start of line, followed by optional whitespace
	\[image:\s+ # Literal '[image:', followed by at least one whitespace character
	([^\]]+?) # Capture group 1: Image name - one or more characters that are not ']', non-greedy
	\s* # Optional whitespace
	(?: # Start of non-capturing group for optional width
	(\d+) # Capture group 2: One or more digits for width
	% # Literal '%' character
	\s* # Optional whitespace
	)? # End of optional non-capturing group
	\] # Literal ']' character
	[\s\S]*? # Any characters (including newlines), non-greedy
	# Corresponding image
	!\[ # Literal '![' to start markdown image
	.*? # Any characters for alt text, non-greedy
	\] # Literal ']' to close alt text
	\( # Literal '(' to start URL
	(.*?) # Capture group 3: URL - any characters, non-greedy
	\) # Literal ')' to close URL
	''', re.MULTILINE \| re.VERBOSE)

	def download_and_replace_images(markdown):
	parts = []
	last_end = 0

	for match in combined_pattern.finditer(markdown):
	# Add the text before this match
	parts.append(markdown[last_end:match.start()])

	name = match.group(1)
	width = match.group(2)
	url = match.group(3) # This is the URL from the actual markdown image

	filename = f"{name}.png"
	sitepath = f"{SITE_PATH_IMAGES}{filename}"

	# Download the image
	download_image(url, filename)

	if width:
	result = f'<p><img alt="{name}" style={{{{"width":"{width}%"}}}} src="{sitepath}"/></p>'
	else:
	result = f'<p><img alt="{name}" src="{sitepath}"/></p>'

	parts.append(result)
	last_end = match.end()

	# Add any remaining text after the last match
	parts.append(markdown[last_end:])

	return ''.join(parts)

	def download_image(url, filename):
	image_path = REPO_PATH_IMAGES + filename
	response = requests.get(url, stream=True)
	response.raise_for_status()
	with open(image_path, 'wb') as file:
	for chunk in response.iter_content(chunk_size=8192):
	file.write(chunk)
	print(url, image_path)