vicradon · May 2, 2024 16:02
diff --git a/download-images-and-replace-remote-urls.py b/download-images-and-replace-remote-urls.py
 import requests
 import os
 from urllib.parse import urlparse
 import re

 def process_markdown(markdown_file):
  """
  Analyzes a markdown file, downloads images (current folder), and replaces URLs.

  Args:
    markdown_file: Path to the markdown file.
  """
  # Read the markdown content
  with open(markdown_file, 'r') as f:
    content = f.read()

  # Regex to find image references
  image_regex = r"!\[(.*?)\]\((.*?)\)"

  # Iterate through each image reference
  for alt_text, url in re.findall(image_regex, content):
    # Extract filename from URL (consider potential issues with extensions)
    parsed_url = urlparse(url)
    filename, _ = os.path.splitext(parsed_url.path)
    
    # Sanitize filename (remove special characters)
    sanitized_filename = ''.join(c for c in alt_text if c.isalnum() or c.isspace() or c == '-').replace(" ", "_")

    # Download image with sanitized filename and .png extension
    image_path = f"{sanitized_filename}.png"  # Use current directory
    download_image(url, image_path)
    
    # Replace URL with relative path
    content = content.replace(f"({url})", f"(./{image_path})")

  # Write the modified content back to the file
  with open(markdown_file, 'w') as f:
    f.write(content)

 def download_image(url, image_path):
  """
  Downloads an image from a URL and saves it to a specific path.

  Args:
    url: URL of the image.
    image_path: Path to save the downloaded image (relative to current dir).
  """
  try:
    response = requests.get(url, stream=True)
    response.raise_for_status()

    with open(image_path, 'wb') as f:
      for chunk in response.iter_content(1024):
        f.write(chunk)

    print(f"Downloaded image: {image_path}")
  except requests.exceptions.RequestException as e:
    print(f"Error downloading image: {url} - {e}")

 # Replace 'your_markdown_file.md' with the actual path to your file
 process_markdown('index.md')