Created
May 2, 2024 16:02
-
-
Save vicradon/065a1cdee3b88da3934b837827c5363b to your computer and use it in GitHub Desktop.
Download images and replace the previous remote URL with local URL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import os | |
from urllib.parse import urlparse | |
import re | |
def process_markdown(markdown_file): | |
""" | |
Analyzes a markdown file, downloads images (current folder), and replaces URLs. | |
Args: | |
markdown_file: Path to the markdown file. | |
""" | |
# Read the markdown content | |
with open(markdown_file, 'r') as f: | |
content = f.read() | |
# Regex to find image references | |
image_regex = r"!\[(.*?)\]\((.*?)\)" | |
# Iterate through each image reference | |
for alt_text, url in re.findall(image_regex, content): | |
# Extract filename from URL (consider potential issues with extensions) | |
parsed_url = urlparse(url) | |
filename, _ = os.path.splitext(parsed_url.path) | |
# Sanitize filename (remove special characters) | |
sanitized_filename = ''.join(c for c in alt_text if c.isalnum() or c.isspace() or c == '-').replace(" ", "_") | |
# Download image with sanitized filename and .png extension | |
image_path = f"{sanitized_filename}.png" # Use current directory | |
download_image(url, image_path) | |
# Replace URL with relative path | |
content = content.replace(f"({url})", f"(./{image_path})") | |
# Write the modified content back to the file | |
with open(markdown_file, 'w') as f: | |
f.write(content) | |
def download_image(url, image_path): | |
""" | |
Downloads an image from a URL and saves it to a specific path. | |
Args: | |
url: URL of the image. | |
image_path: Path to save the downloaded image (relative to current dir). | |
""" | |
try: | |
response = requests.get(url, stream=True) | |
response.raise_for_status() | |
with open(image_path, 'wb') as f: | |
for chunk in response.iter_content(1024): | |
f.write(chunk) | |
print(f"Downloaded image: {image_path}") | |
except requests.exceptions.RequestException as e: | |
print(f"Error downloading image: {url} - {e}") | |
# Replace 'your_markdown_file.md' with the actual path to your file | |
process_markdown('index.md') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment