Created
November 11, 2024 10:38
-
-
Save jasonsperske/a0bda7d3e707f0ff46b85df34ae03e69 to your computer and use it in GitHub Desktop.
A simple command line utility that takes one or more published Claude artifacts URLs and creates the files described in their code blocks automatically
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Adapted from https://claude.site/artifacts/adb7d26c-d81e-4df8-a8ba-d33ae4747a16 | |
# very slight changes to some regexes | |
import argparse | |
import os | |
import re | |
from typing import List, Dict, Optional, Tuple | |
import requests | |
from bs4 import BeautifulSoup | |
class ClaudeArtifactParser: | |
def __init__(self): | |
self.code_blocks = [] | |
self.current_path = "" | |
def fetch_artifact_content(self, url: str) -> str: | |
"""Fetch content from a published claude.ai artifact URL.""" | |
try: | |
response = requests.get(url) | |
response.raise_for_status() | |
return response.text | |
except requests.RequestException as e: | |
print(f"Error fetching URL {url}: {e}") | |
return "" | |
def extract_code_blocks(self, html_content: str) -> List[str]: | |
"""Extract code blocks from HTML content.""" | |
soup = BeautifulSoup(html_content, 'html.parser') | |
code_blocks = [] | |
# Find all code blocks (both fenced and within pre/code tags) | |
for code_element in soup.find_all(['pre', 'code']): | |
code_blocks.append(code_element.get_text().strip()) | |
return code_blocks | |
def parse_comment_path(self, line: str) -> Optional[str]: | |
""" | |
Parse a comment line that specifies a file path. | |
Handles formats like: | |
# path/to/file.py | |
# *path/to/file.py* | |
""" | |
patterns = [ | |
r'^#\s*\*?([\w./\-]+)\*?$', # Matches # path/to/file.py or # *path/to/file.py* | |
r'^#\s*[\w./\-]+:\s*\*?([\w./\-]+)\*?$', # Matches # filename: path/to/file.py | |
] | |
for pattern in patterns: | |
match = re.match(pattern, line.strip()) | |
if match: | |
return match.group(1).strip() | |
return None | |
def split_code_blocks_by_files(self, code_block: str) -> Dict[str, str]: | |
""" | |
Split a code block into individual files based on comment headers. | |
Returns a dictionary mapping file paths to their contents. | |
""" | |
files = {} | |
current_file = None | |
current_content = [] | |
lines = code_block.split('\n') | |
for line in lines: | |
# Skip empty lines at the start | |
if not current_file and not line.strip(): | |
continue | |
# Check for file path in comment | |
if line.strip().startswith('#'): | |
path = self.parse_comment_path(line) | |
if path: | |
# Save previous file if it exists | |
if current_file: | |
files[current_file] = '\n'.join(current_content).strip() | |
current_file = path | |
current_content = [] | |
continue | |
# Add line to current file if we have one | |
if current_file: | |
current_content.append(line) | |
# Save the last file | |
if current_file and current_content: | |
files[current_file] = '\n'.join(current_content).strip() | |
return files | |
def parse_directory_map(self, lines: List[str]) -> Tuple[Dict[str, str], int]: | |
""" | |
Parse directory/file map at the start of a code block. | |
Returns a dictionary of file paths and their contents, and the line number where the map ends. | |
""" | |
file_map = {} | |
current_path = [] | |
last_indent = -1 | |
map_end_line = 0 | |
# Common directory map patterns | |
dir_patterns = [ | |
r'^[\s│├└─]*([^│├└─]+)/$', # Directory with trailing slash | |
r'^[\s│├└─]*([^│├└─]+)$', # Plain text with possible tree characters | |
r'^\s*[-+]\s+(.+)/$', # Bullet point with trailing slash | |
] | |
file_patterns = [ | |
r'^[\s│├└─]*([^│├└─]+\.[a-zA-Z0-9_]+)$', # File with extension | |
r'^\s*[-+]\s+(.+\.[a-zA-Z0-9]+)$', # Bullet point with file extension | |
] | |
for i, line in enumerate(lines): | |
if not line.strip() or line.strip().startswith(('```', '/*', '*/', '//', '#')): | |
map_end_line = i | |
break | |
# Calculate current indent level | |
indent = len(line) - len(line.lstrip()) | |
# Check if this is a directory | |
is_dir = False | |
dir_name = None | |
for pattern in dir_patterns: | |
match = re.match(pattern, line) | |
if match: | |
dir_name = match.group(1).strip() | |
is_dir = True | |
break | |
# Check if this is a file | |
is_file = False | |
file_name = None | |
for pattern in file_patterns: | |
match = re.match(pattern, line) | |
if match: | |
file_name = match.group(1).strip() | |
is_file = True | |
break | |
# Handle indentation changes | |
if indent < last_indent: | |
levels_up = (last_indent - indent) // 2 | |
current_path = current_path[:-levels_up] | |
if is_dir: | |
current_path.append(dir_name) | |
elif is_file: | |
full_path = os.path.join(*current_path, file_name) if current_path else file_name | |
file_map[full_path] = "" | |
last_indent = indent | |
return file_map, map_end_line | |
def create_files_from_map(self, file_map: Dict[str, str]): | |
"""Create all files and directories from the file map.""" | |
for file_path, content in file_map.items(): | |
self._create_file_with_content(file_path, content) | |
def parse_file_operations(self, code_block: str): | |
"""Parse code block for file operations and execute them.""" | |
# First try to parse files based on comment headers | |
files_from_comments = self.split_code_blocks_by_files(code_block) | |
if files_from_comments: | |
self.create_files_from_map(files_from_comments) | |
return | |
# If no files found from comments, try directory map | |
lines = code_block.split('\n') | |
file_map, map_end_line = self.parse_directory_map(lines) | |
if file_map: | |
# Parse contents for files in the map | |
current_file = None | |
current_content = [] | |
for line in lines[map_end_line:]: | |
file_name_match = re.match(r'^[\s│├└─]*([^│├└─]+\.[a-zA-Z0-9]+):?\s*$', line) | |
if file_name_match: | |
if current_file and current_file in file_map: | |
file_map[current_file] = '\n'.join(current_content).strip() | |
current_file = file_name_match.group(1) | |
current_content = [] | |
elif current_file and line.strip(): | |
current_content.append(line) | |
# Save last file content | |
if current_file and current_file in file_map: | |
file_map[current_file] = '\n'.join(current_content).strip() | |
self.create_files_from_map(file_map) | |
return | |
# If no structured format found, fall back to parsing explicit operations | |
current_file = None | |
current_content = [] | |
for line in lines: | |
# Check for mkdir operations | |
if 'mkdir' in line or 'os.makedirs' in line: | |
dir_match = re.search(r'["\'](.+?)["\']', line) | |
if dir_match: | |
self._create_directory(dir_match.group(1)) | |
# Check for file write operations | |
elif 'with open' in line and 'w' in line: | |
file_match = re.search(r'open\(["\'](.+?)["\']', line) | |
if file_match: | |
if current_file: | |
self._create_file_with_content(current_file, '\n'.join(current_content)) | |
current_file = file_match.group(1) | |
current_content = [] | |
# Collect content for the current file | |
elif current_file and line.strip() and not line.strip().startswith(('with', '}')): | |
current_content.append(line) | |
# Write any remaining file content | |
if current_file: | |
self._create_file_with_content(current_file, '\n'.join(current_content)) | |
def _create_file_with_content(self, file_path: str, content: str): | |
"""Create a file and its parent directories, then write content.""" | |
try: | |
os.makedirs(os.path.dirname(file_path), exist_ok=True) | |
with open(file_path, 'w', encoding='utf-8') as f: | |
f.write(content.strip()) | |
print(f"Created file: {file_path}") | |
except OSError as e: | |
print(f"Error creating file {file_path}: {e}") | |
def _create_directory(self, dir_path: str): | |
"""Create directory if it doesn't exist.""" | |
try: | |
os.makedirs(dir_path, exist_ok=True) | |
print(f"Created directory: {dir_path}") | |
except OSError as e: | |
print(f"Error creating directory {dir_path}: {e}") | |
def process_urls(self, urls: List[str]): | |
"""Process multiple URLs and their code blocks.""" | |
all_code_blocks = [] | |
for url in urls: | |
content = self.fetch_artifact_content(url) | |
if content: | |
code_blocks = self.extract_code_blocks(content) | |
all_code_blocks.extend(code_blocks) | |
# Process each code block | |
for code_block in all_code_blocks: | |
self.parse_file_operations(code_block) | |
def main(): | |
parser = argparse.ArgumentParser(description='Parse Claude.ai artifacts and create files/directories') | |
parser.add_argument('urls', nargs='+', help='One or more Claude.ai artifact URLs') | |
parser.add_argument('--output-dir', default='.', help='Output directory for created files') | |
args = parser.parse_args() | |
# Change to output directory | |
os.chdir(args.output_dir) | |
# Process the artifacts | |
artifact_parser = ClaudeArtifactParser() | |
artifact_parser.process_urls(args.urls) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment