Skip to content

Instantly share code, notes, and snippets.

@judell
Last active June 25, 2024 23:28
Show Gist options
  • Save judell/c6b02a15403f4a27b89222d2c67f6347 to your computer and use it in GitHub Desktop.
Save judell/c6b02a15403f4a27b89222d2c67f6347 to your computer and use it in GitHub Desktop.
gdoc to markdown example 1
combined_pattern = re.compile(r'''
# Image reference
^\s* # Start of line, followed by optional whitespace
\[image:\s+ # Literal '[image:', followed by at least one whitespace character
([^\]]+?) # Capture group 1: Image name - one or more characters that are not ']', non-greedy
\s* # Optional whitespace
(?: # Start of non-capturing group for optional width
(\d+) # Capture group 2: One or more digits for width
% # Literal '%' character
\s* # Optional whitespace
)? # End of optional non-capturing group
\] # Literal ']' character
[\s\S]*? # Any characters (including newlines), non-greedy
# Corresponding image
!\[ # Literal '![' to start markdown image
.*? # Any characters for alt text, non-greedy
\] # Literal ']' to close alt text
\( # Literal '(' to start URL
(.*?) # Capture group 3: URL - any characters, non-greedy
\) # Literal ')' to close URL
''', re.MULTILINE | re.VERBOSE)
def download_and_replace_images(markdown):
parts = []
last_end = 0
for match in combined_pattern.finditer(markdown):
# Add the text before this match
parts.append(markdown[last_end:match.start()])
name = match.group(1)
width = match.group(2)
url = match.group(3) # This is the URL from the actual markdown image
filename = f"{name}.png"
sitepath = f"{SITE_PATH_IMAGES}{filename}"
# Download the image
download_image(url, filename)
if width:
result = f'<p><img alt="{name}" style={{{{"width":"{width}%"}}}} src="{sitepath}"/></p>'
else:
result = f'<p><img alt="{name}" src="{sitepath}"/></p>'
parts.append(result)
last_end = match.end()
# Add any remaining text after the last match
parts.append(markdown[last_end:])
return ''.join(parts)
def download_image(url, filename):
image_path = REPO_PATH_IMAGES + filename
response = requests.get(url, stream=True)
response.raise_for_status()
with open(image_path, 'wb') as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
print(url, image_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment