jasonsperske · November 11, 2024 10:38
diff --git a/claude-dl.py b/claude-dl.py
 # Adapted from https://claude.site/artifacts/adb7d26c-d81e-4df8-a8ba-d33ae4747a16
 # very slight changes to some regexes

 import argparse
 import os
 import re
 from typing import List, Dict, Optional, Tuple
 import requests
 from bs4 import BeautifulSoup

 class ClaudeArtifactParser:
    def __init__(self):
        self.code_blocks = []
        self.current_path = ""
        
    def fetch_artifact_content(self, url: str) -> str:
        """Fetch content from a published claude.ai artifact URL."""
        try:
            response = requests.get(url)
            response.raise_for_status()
            return response.text
        except requests.RequestException as e:
            print(f"Error fetching URL {url}: {e}")
            return ""

    def extract_code_blocks(self, html_content: str) -> List[str]:
        """Extract code blocks from HTML content."""
        soup = BeautifulSoup(html_content, 'html.parser')
        code_blocks = []
        
        # Find all code blocks (both fenced and within pre/code tags)
        for code_element in soup.find_all(['pre', 'code']):
            code_blocks.append(code_element.get_text().strip())
            
        return code_blocks

    def parse_comment_path(self, line: str) -> Optional[str]:
        """
        Parse a comment line that specifies a file path.
        Handles formats like:
        # path/to/file.py
        # *path/to/file.py*
        """
        patterns = [
            r'^#\s*\*?([\w./\-]+)\*?$',  # Matches # path/to/file.py or # *path/to/file.py*
            r'^#\s*[\w./\-]+:\s*\*?([\w./\-]+)\*?$',  # Matches # filename: path/to/file.py
        ]
        
        for pattern in patterns:
            match = re.match(pattern, line.strip())
            if match:
                return match.group(1).strip()
        return None

    def split_code_blocks_by_files(self, code_block: str) -> Dict[str, str]:
        """
        Split a code block into individual files based on comment headers.
        Returns a dictionary mapping file paths to their contents.
        """
        files = {}
        current_file = None
        current_content = []
        lines = code_block.split('\n')
        
        for line in lines:
            # Skip empty lines at the start
            if not current_file and not line.strip():
                continue
                
            # Check for file path in comment
            if line.strip().startswith('#'):
                path = self.parse_comment_path(line)
                if path:
                    # Save previous file if it exists
                    if current_file:
                        files[current_file] = '\n'.join(current_content).strip()
                    current_file = path
                    current_content = []
                    continue
            
            # Add line to current file if we have one
            if current_file:
                current_content.append(line)
        
        # Save the last file
        if current_file and current_content:
            files[current_file] = '\n'.join(current_content).strip()
        
        return files

    def parse_directory_map(self, lines: List[str]) -> Tuple[Dict[str, str], int]:
        """
        Parse directory/file map at the start of a code block.
        Returns a dictionary of file paths and their contents, and the line number where the map ends.
        """
        file_map = {}
        current_path = []
        last_indent = -1
        map_end_line = 0
        
        # Common directory map patterns
        dir_patterns = [
            r'^[\s│├└─]*([^│├└─]+)/$',  # Directory with trailing slash
            r'^[\s│├└─]*([^│├└─]+)$',    # Plain text with possible tree characters
            r'^\s*[-+]\s+(.+)/$',        # Bullet point with trailing slash
        ]
        
        file_patterns = [
            r'^[\s│├└─]*([^│├└─]+\.[a-zA-Z0-9_]+)$',  # File with extension
            r'^\s*[-+]\s+(.+\.[a-zA-Z0-9]+)$',       # Bullet point with file extension
        ]
        
        for i, line in enumerate(lines):
            if not line.strip() or line.strip().startswith(('```', '/*', '*/', '//', '#')):
                map_end_line = i
                break
                
            # Calculate current indent level
            indent = len(line) - len(line.lstrip())
            
            # Check if this is a directory
            is_dir = False
            dir_name = None
            for pattern in dir_patterns:
                match = re.match(pattern, line)
                if match:
                    dir_name = match.group(1).strip()
                    is_dir = True
                    break
            
            # Check if this is a file
            is_file = False
            file_name = None
            for pattern in file_patterns:
                match = re.match(pattern, line)
                if match:
                    file_name = match.group(1).strip()
                    is_file = True
                    break
            
            # Handle indentation changes
            if indent < last_indent:
                levels_up = (last_indent - indent) // 2
                current_path = current_path[:-levels_up]
            
            if is_dir:
                current_path.append(dir_name)
            elif is_file:
                full_path = os.path.join(*current_path, file_name) if current_path else file_name
                file_map[full_path] = ""
            
            last_indent = indent

        return file_map, map_end_line

    def create_files_from_map(self, file_map: Dict[str, str]):
        """Create all files and directories from the file map."""
        for file_path, content in file_map.items():
            self._create_file_with_content(file_path, content)

    def parse_file_operations(self, code_block: str):
        """Parse code block for file operations and execute them."""
        # First try to parse files based on comment headers
        files_from_comments = self.split_code_blocks_by_files(code_block)
        if files_from_comments:
            self.create_files_from_map(files_from_comments)
            return

        # If no files found from comments, try directory map
        lines = code_block.split('\n')
        file_map, map_end_line = self.parse_directory_map(lines)
        
        if file_map:
            # Parse contents for files in the map
            current_file = None
            current_content = []
            
            for line in lines[map_end_line:]:
                file_name_match = re.match(r'^[\s│├└─]*([^│├└─]+\.[a-zA-Z0-9]+):?\s*$', line)
                if file_name_match:
                    if current_file and current_file in file_map:
                        file_map[current_file] = '\n'.join(current_content).strip()
                    current_file = file_name_match.group(1)
                    current_content = []
                elif current_file and line.strip():
                    current_content.append(line)
            
            # Save last file content
            if current_file and current_file in file_map:
                file_map[current_file] = '\n'.join(current_content).strip()
            
            self.create_files_from_map(file_map)
            return

        # If no structured format found, fall back to parsing explicit operations
        current_file = None
        current_content = []
        
        for line in lines:
            # Check for mkdir operations
            if 'mkdir' in line or 'os.makedirs' in line:
                dir_match = re.search(r'["\'](.+?)["\']', line)
                if dir_match:
                    self._create_directory(dir_match.group(1))
            
            # Check for file write operations
            elif 'with open' in line and 'w' in line:
                file_match = re.search(r'open\(["\'](.+?)["\']', line)
                if file_match:
                    if current_file:
                        self._create_file_with_content(current_file, '\n'.join(current_content))
                    current_file = file_match.group(1)
                    current_content = []
            
            # Collect content for the current file
            elif current_file and line.strip() and not line.strip().startswith(('with', '}')):
                current_content.append(line)
        
        # Write any remaining file content
        if current_file:
            self._create_file_with_content(current_file, '\n'.join(current_content))

    def _create_file_with_content(self, file_path: str, content: str):
        """Create a file and its parent directories, then write content."""
        try:
            os.makedirs(os.path.dirname(file_path), exist_ok=True)
            with open(file_path, 'w', encoding='utf-8') as f:
                f.write(content.strip())
            print(f"Created file: {file_path}")
        except OSError as e:
            print(f"Error creating file {file_path}: {e}")

    def _create_directory(self, dir_path: str):
        """Create directory if it doesn't exist."""
        try:
            os.makedirs(dir_path, exist_ok=True)
            print(f"Created directory: {dir_path}")
        except OSError as e:
            print(f"Error creating directory {dir_path}: {e}")

    def process_urls(self, urls: List[str]):
        """Process multiple URLs and their code blocks."""
        all_code_blocks = []
        
        for url in urls:
            content = self.fetch_artifact_content(url)
            if content:
                code_blocks = self.extract_code_blocks(content)
                all_code_blocks.extend(code_blocks)
        
        # Process each code block
        for code_block in all_code_blocks:
            self.parse_file_operations(code_block)

 def main():
    parser = argparse.ArgumentParser(description='Parse Claude.ai artifacts and create files/directories')
    parser.add_argument('urls', nargs='+', help='One or more Claude.ai artifact URLs')
    parser.add_argument('--output-dir', default='.', help='Output directory for created files')
    
    args = parser.parse_args()
    
    # Change to output directory
    os.chdir(args.output_dir)
    
    # Process the artifacts
    artifact_parser = ClaudeArtifactParser()
    artifact_parser.process_urls(args.urls)

 if __name__ == '__main__':
    main()
	# Adapted from https://claude.site/artifacts/adb7d26c-d81e-4df8-a8ba-d33ae4747a16
	# very slight changes to some regexes

	import argparse
	import os
	import re
	from typing import List, Dict, Optional, Tuple
	import requests
	from bs4 import BeautifulSoup

	class ClaudeArtifactParser:
	def __init__(self):
	self.code_blocks = []
	self.current_path = ""

	def fetch_artifact_content(self, url: str) -> str:
	"""Fetch content from a published claude.ai artifact URL."""
	try:
	response = requests.get(url)
	response.raise_for_status()
	return response.text
	except requests.RequestException as e:
	print(f"Error fetching URL {url}: {e}")
	return ""

	def extract_code_blocks(self, html_content: str) -> List[str]:
	"""Extract code blocks from HTML content."""
	soup = BeautifulSoup(html_content, 'html.parser')
	code_blocks = []

	# Find all code blocks (both fenced and within pre/code tags)
	for code_element in soup.find_all(['pre', 'code']):
	code_blocks.append(code_element.get_text().strip())

	return code_blocks

	def parse_comment_path(self, line: str) -> Optional[str]:
	"""
	Parse a comment line that specifies a file path.
	Handles formats like:
	# path/to/file.py
	# path/to/file.py
	"""
	patterns = [
	r'^#\s\?([\w./\-]+)\?$', # Matches # path/to/file.py or # path/to/file.py*
	r'^#\s[\w./\-]+:\s\?([\w./\-]+)\?$', # Matches # filename: path/to/file.py
	]

	for pattern in patterns:
	match = re.match(pattern, line.strip())
	if match:
	return match.group(1).strip()
	return None

	def split_code_blocks_by_files(self, code_block: str) -> Dict[str, str]:
	"""
	Split a code block into individual files based on comment headers.
	Returns a dictionary mapping file paths to their contents.
	"""
	files = {}
	current_file = None
	current_content = []
	lines = code_block.split('\n')

	for line in lines:
	# Skip empty lines at the start
	if not current_file and not line.strip():
	continue

	# Check for file path in comment
	if line.strip().startswith('#'):
	path = self.parse_comment_path(line)
	if path:
	# Save previous file if it exists
	if current_file:
	files[current_file] = '\n'.join(current_content).strip()
	current_file = path
	current_content = []
	continue

	# Add line to current file if we have one
	if current_file:
	current_content.append(line)

	# Save the last file
	if current_file and current_content:
	files[current_file] = '\n'.join(current_content).strip()

	return files

	def parse_directory_map(self, lines: List[str]) -> Tuple[Dict[str, str], int]:
	"""
	Parse directory/file map at the start of a code block.
	Returns a dictionary of file paths and their contents, and the line number where the map ends.
	"""
	file_map = {}
	current_path = []
	last_indent = -1
	map_end_line = 0

	# Common directory map patterns
	dir_patterns = [
	r'^[\s│├└─]*([^│├└─]+)/$', # Directory with trailing slash
	r'^[\s│├└─]*([^│├└─]+)$', # Plain text with possible tree characters
	r'^\s*[-+]\s+(.+)/$', # Bullet point with trailing slash
	]

	file_patterns = [
	r'^[\s│├└─]*([^│├└─]+\.[a-zA-Z0-9_]+)$', # File with extension
	r'^\s*[-+]\s+(.+\.[a-zA-Z0-9]+)$', # Bullet point with file extension
	]

	for i, line in enumerate(lines):
	if not line.strip() or line.strip().startswith(('```', '/', '/', '//', '#')):
	map_end_line = i
	break

	# Calculate current indent level
	indent = len(line) - len(line.lstrip())

	# Check if this is a directory
	is_dir = False
	dir_name = None
	for pattern in dir_patterns:
	match = re.match(pattern, line)
	if match:
	dir_name = match.group(1).strip()
	is_dir = True
	break

	# Check if this is a file
	is_file = False
	file_name = None
	for pattern in file_patterns:
	match = re.match(pattern, line)
	if match:
	file_name = match.group(1).strip()
	is_file = True
	break

	# Handle indentation changes
	if indent < last_indent:
	levels_up = (last_indent - indent) // 2
	current_path = current_path[:-levels_up]

	if is_dir:
	current_path.append(dir_name)
	elif is_file:
	full_path = os.path.join(*current_path, file_name) if current_path else file_name
	file_map[full_path] = ""

	last_indent = indent

	return file_map, map_end_line

	def create_files_from_map(self, file_map: Dict[str, str]):
	"""Create all files and directories from the file map."""
	for file_path, content in file_map.items():
	self._create_file_with_content(file_path, content)

	def parse_file_operations(self, code_block: str):
	"""Parse code block for file operations and execute them."""
	# First try to parse files based on comment headers
	files_from_comments = self.split_code_blocks_by_files(code_block)
	if files_from_comments:
	self.create_files_from_map(files_from_comments)
	return

	# If no files found from comments, try directory map
	lines = code_block.split('\n')
	file_map, map_end_line = self.parse_directory_map(lines)

	if file_map:
	# Parse contents for files in the map
	current_file = None
	current_content = []

	for line in lines[map_end_line:]:
	file_name_match = re.match(r'^[\s│├└─]([^│├└─]+\.[a-zA-Z0-9]+):?\s$', line)
	if file_name_match:
	if current_file and current_file in file_map:
	file_map[current_file] = '\n'.join(current_content).strip()
	current_file = file_name_match.group(1)
	current_content = []
	elif current_file and line.strip():
	current_content.append(line)

	# Save last file content
	if current_file and current_file in file_map:
	file_map[current_file] = '\n'.join(current_content).strip()

	self.create_files_from_map(file_map)
	return

	# If no structured format found, fall back to parsing explicit operations
	current_file = None
	current_content = []

	for line in lines:
	# Check for mkdir operations
	if 'mkdir' in line or 'os.makedirs' in line:
	dir_match = re.search(r'["\'](.+?)["\']', line)
	if dir_match:
	self._create_directory(dir_match.group(1))

	# Check for file write operations
	elif 'with open' in line and 'w' in line:
	file_match = re.search(r'open\(["\'](.+?)["\']', line)
	if file_match:
	if current_file:
	self._create_file_with_content(current_file, '\n'.join(current_content))
	current_file = file_match.group(1)
	current_content = []

	# Collect content for the current file
	elif current_file and line.strip() and not line.strip().startswith(('with', '}')):
	current_content.append(line)

	# Write any remaining file content
	if current_file:
	self._create_file_with_content(current_file, '\n'.join(current_content))

	def _create_file_with_content(self, file_path: str, content: str):
	"""Create a file and its parent directories, then write content."""
	try:
	os.makedirs(os.path.dirname(file_path), exist_ok=True)
	with open(file_path, 'w', encoding='utf-8') as f:
	f.write(content.strip())
	print(f"Created file: {file_path}")
	except OSError as e:
	print(f"Error creating file {file_path}: {e}")

	def _create_directory(self, dir_path: str):
	"""Create directory if it doesn't exist."""
	try:
	os.makedirs(dir_path, exist_ok=True)
	print(f"Created directory: {dir_path}")
	except OSError as e:
	print(f"Error creating directory {dir_path}: {e}")

	def process_urls(self, urls: List[str]):
	"""Process multiple URLs and their code blocks."""
	all_code_blocks = []

	for url in urls:
	content = self.fetch_artifact_content(url)
	if content:
	code_blocks = self.extract_code_blocks(content)
	all_code_blocks.extend(code_blocks)

	# Process each code block
	for code_block in all_code_blocks:
	self.parse_file_operations(code_block)

	def main():
	parser = argparse.ArgumentParser(description='Parse Claude.ai artifacts and create files/directories')
	parser.add_argument('urls', nargs='+', help='One or more Claude.ai artifact URLs')
	parser.add_argument('--output-dir', default='.', help='Output directory for created files')

	args = parser.parse_args()

	# Change to output directory
	os.chdir(args.output_dir)

	# Process the artifacts
	artifact_parser = ClaudeArtifactParser()
	artifact_parser.process_urls(args.urls)

	if __name__ == '__main__':
	main()