Skip to content

Instantly share code, notes, and snippets.

@jasonsperske
Created November 11, 2024 10:38
Show Gist options
  • Save jasonsperske/a0bda7d3e707f0ff46b85df34ae03e69 to your computer and use it in GitHub Desktop.
Save jasonsperske/a0bda7d3e707f0ff46b85df34ae03e69 to your computer and use it in GitHub Desktop.
A simple command line utility that takes one or more published Claude artifacts URLs and creates the files described in their code blocks automatically
# Adapted from https://claude.site/artifacts/adb7d26c-d81e-4df8-a8ba-d33ae4747a16
# very slight changes to some regexes
import argparse
import os
import re
from typing import List, Dict, Optional, Tuple
import requests
from bs4 import BeautifulSoup
class ClaudeArtifactParser:
def __init__(self):
self.code_blocks = []
self.current_path = ""
def fetch_artifact_content(self, url: str) -> str:
"""Fetch content from a published claude.ai artifact URL."""
try:
response = requests.get(url)
response.raise_for_status()
return response.text
except requests.RequestException as e:
print(f"Error fetching URL {url}: {e}")
return ""
def extract_code_blocks(self, html_content: str) -> List[str]:
"""Extract code blocks from HTML content."""
soup = BeautifulSoup(html_content, 'html.parser')
code_blocks = []
# Find all code blocks (both fenced and within pre/code tags)
for code_element in soup.find_all(['pre', 'code']):
code_blocks.append(code_element.get_text().strip())
return code_blocks
def parse_comment_path(self, line: str) -> Optional[str]:
"""
Parse a comment line that specifies a file path.
Handles formats like:
# path/to/file.py
# *path/to/file.py*
"""
patterns = [
r'^#\s*\*?([\w./\-]+)\*?$', # Matches # path/to/file.py or # *path/to/file.py*
r'^#\s*[\w./\-]+:\s*\*?([\w./\-]+)\*?$', # Matches # filename: path/to/file.py
]
for pattern in patterns:
match = re.match(pattern, line.strip())
if match:
return match.group(1).strip()
return None
def split_code_blocks_by_files(self, code_block: str) -> Dict[str, str]:
"""
Split a code block into individual files based on comment headers.
Returns a dictionary mapping file paths to their contents.
"""
files = {}
current_file = None
current_content = []
lines = code_block.split('\n')
for line in lines:
# Skip empty lines at the start
if not current_file and not line.strip():
continue
# Check for file path in comment
if line.strip().startswith('#'):
path = self.parse_comment_path(line)
if path:
# Save previous file if it exists
if current_file:
files[current_file] = '\n'.join(current_content).strip()
current_file = path
current_content = []
continue
# Add line to current file if we have one
if current_file:
current_content.append(line)
# Save the last file
if current_file and current_content:
files[current_file] = '\n'.join(current_content).strip()
return files
def parse_directory_map(self, lines: List[str]) -> Tuple[Dict[str, str], int]:
"""
Parse directory/file map at the start of a code block.
Returns a dictionary of file paths and their contents, and the line number where the map ends.
"""
file_map = {}
current_path = []
last_indent = -1
map_end_line = 0
# Common directory map patterns
dir_patterns = [
r'^[\s│├└─]*([^│├└─]+)/$', # Directory with trailing slash
r'^[\s│├└─]*([^│├└─]+)$', # Plain text with possible tree characters
r'^\s*[-+]\s+(.+)/$', # Bullet point with trailing slash
]
file_patterns = [
r'^[\s│├└─]*([^│├└─]+\.[a-zA-Z0-9_]+)$', # File with extension
r'^\s*[-+]\s+(.+\.[a-zA-Z0-9]+)$', # Bullet point with file extension
]
for i, line in enumerate(lines):
if not line.strip() or line.strip().startswith(('```', '/*', '*/', '//', '#')):
map_end_line = i
break
# Calculate current indent level
indent = len(line) - len(line.lstrip())
# Check if this is a directory
is_dir = False
dir_name = None
for pattern in dir_patterns:
match = re.match(pattern, line)
if match:
dir_name = match.group(1).strip()
is_dir = True
break
# Check if this is a file
is_file = False
file_name = None
for pattern in file_patterns:
match = re.match(pattern, line)
if match:
file_name = match.group(1).strip()
is_file = True
break
# Handle indentation changes
if indent < last_indent:
levels_up = (last_indent - indent) // 2
current_path = current_path[:-levels_up]
if is_dir:
current_path.append(dir_name)
elif is_file:
full_path = os.path.join(*current_path, file_name) if current_path else file_name
file_map[full_path] = ""
last_indent = indent
return file_map, map_end_line
def create_files_from_map(self, file_map: Dict[str, str]):
"""Create all files and directories from the file map."""
for file_path, content in file_map.items():
self._create_file_with_content(file_path, content)
def parse_file_operations(self, code_block: str):
"""Parse code block for file operations and execute them."""
# First try to parse files based on comment headers
files_from_comments = self.split_code_blocks_by_files(code_block)
if files_from_comments:
self.create_files_from_map(files_from_comments)
return
# If no files found from comments, try directory map
lines = code_block.split('\n')
file_map, map_end_line = self.parse_directory_map(lines)
if file_map:
# Parse contents for files in the map
current_file = None
current_content = []
for line in lines[map_end_line:]:
file_name_match = re.match(r'^[\s│├└─]*([^│├└─]+\.[a-zA-Z0-9]+):?\s*$', line)
if file_name_match:
if current_file and current_file in file_map:
file_map[current_file] = '\n'.join(current_content).strip()
current_file = file_name_match.group(1)
current_content = []
elif current_file and line.strip():
current_content.append(line)
# Save last file content
if current_file and current_file in file_map:
file_map[current_file] = '\n'.join(current_content).strip()
self.create_files_from_map(file_map)
return
# If no structured format found, fall back to parsing explicit operations
current_file = None
current_content = []
for line in lines:
# Check for mkdir operations
if 'mkdir' in line or 'os.makedirs' in line:
dir_match = re.search(r'["\'](.+?)["\']', line)
if dir_match:
self._create_directory(dir_match.group(1))
# Check for file write operations
elif 'with open' in line and 'w' in line:
file_match = re.search(r'open\(["\'](.+?)["\']', line)
if file_match:
if current_file:
self._create_file_with_content(current_file, '\n'.join(current_content))
current_file = file_match.group(1)
current_content = []
# Collect content for the current file
elif current_file and line.strip() and not line.strip().startswith(('with', '}')):
current_content.append(line)
# Write any remaining file content
if current_file:
self._create_file_with_content(current_file, '\n'.join(current_content))
def _create_file_with_content(self, file_path: str, content: str):
"""Create a file and its parent directories, then write content."""
try:
os.makedirs(os.path.dirname(file_path), exist_ok=True)
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content.strip())
print(f"Created file: {file_path}")
except OSError as e:
print(f"Error creating file {file_path}: {e}")
def _create_directory(self, dir_path: str):
"""Create directory if it doesn't exist."""
try:
os.makedirs(dir_path, exist_ok=True)
print(f"Created directory: {dir_path}")
except OSError as e:
print(f"Error creating directory {dir_path}: {e}")
def process_urls(self, urls: List[str]):
"""Process multiple URLs and their code blocks."""
all_code_blocks = []
for url in urls:
content = self.fetch_artifact_content(url)
if content:
code_blocks = self.extract_code_blocks(content)
all_code_blocks.extend(code_blocks)
# Process each code block
for code_block in all_code_blocks:
self.parse_file_operations(code_block)
def main():
parser = argparse.ArgumentParser(description='Parse Claude.ai artifacts and create files/directories')
parser.add_argument('urls', nargs='+', help='One or more Claude.ai artifact URLs')
parser.add_argument('--output-dir', default='.', help='Output directory for created files')
args = parser.parse_args()
# Change to output directory
os.chdir(args.output_dir)
# Process the artifacts
artifact_parser = ClaudeArtifactParser()
artifact_parser.process_urls(args.urls)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment