Last active
June 25, 2024 23:28
-
-
Save judell/c6b02a15403f4a27b89222d2c67f6347 to your computer and use it in GitHub Desktop.
gdoc to markdown example 1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
combined_pattern = re.compile(r''' | |
# Image reference | |
^\s* # Start of line, followed by optional whitespace | |
\[image:\s+ # Literal '[image:', followed by at least one whitespace character | |
([^\]]+?) # Capture group 1: Image name - one or more characters that are not ']', non-greedy | |
\s* # Optional whitespace | |
(?: # Start of non-capturing group for optional width | |
(\d+) # Capture group 2: One or more digits for width | |
% # Literal '%' character | |
\s* # Optional whitespace | |
)? # End of optional non-capturing group | |
\] # Literal ']' character | |
[\s\S]*? # Any characters (including newlines), non-greedy | |
# Corresponding image | |
!\[ # Literal '![' to start markdown image | |
.*? # Any characters for alt text, non-greedy | |
\] # Literal ']' to close alt text | |
\( # Literal '(' to start URL | |
(.*?) # Capture group 3: URL - any characters, non-greedy | |
\) # Literal ')' to close URL | |
''', re.MULTILINE | re.VERBOSE) | |
def download_and_replace_images(markdown): | |
parts = [] | |
last_end = 0 | |
for match in combined_pattern.finditer(markdown): | |
# Add the text before this match | |
parts.append(markdown[last_end:match.start()]) | |
name = match.group(1) | |
width = match.group(2) | |
url = match.group(3) # This is the URL from the actual markdown image | |
filename = f"{name}.png" | |
sitepath = f"{SITE_PATH_IMAGES}{filename}" | |
# Download the image | |
download_image(url, filename) | |
if width: | |
result = f'<p><img alt="{name}" style={{{{"width":"{width}%"}}}} src="{sitepath}"/></p>' | |
else: | |
result = f'<p><img alt="{name}" src="{sitepath}"/></p>' | |
parts.append(result) | |
last_end = match.end() | |
# Add any remaining text after the last match | |
parts.append(markdown[last_end:]) | |
return ''.join(parts) | |
def download_image(url, filename): | |
image_path = REPO_PATH_IMAGES + filename | |
response = requests.get(url, stream=True) | |
response.raise_for_status() | |
with open(image_path, 'wb') as file: | |
for chunk in response.iter_content(chunk_size=8192): | |
file.write(chunk) | |
print(url, image_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment