Created
August 25, 2025 08:27
-
-
Save finalchild/f0a231d9cc3c4142fd908e2c347a1833 to your computer and use it in GitHub Desktop.
카페24 어드민 API 문서를 쪼개 줍니다
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Script to fetch Cafe24 API documentation and separate it by API resource (h1 level). | |
Each resource and all its endpoints will be in a single HTML file. | |
""" | |
import os | |
import re | |
import requests | |
from bs4 import BeautifulSoup | |
from urllib.parse import urljoin | |
import json | |
from pathlib import Path | |
from collections import defaultdict | |
class Cafe24DocResourceSeparator: | |
def __init__(self, base_url, output_dir="cafe24_docs_by_resource"): | |
self.base_url = base_url | |
self.output_dir = Path(output_dir) | |
self.output_dir.mkdir(exist_ok=True) | |
self.session = requests.Session() | |
self.session.headers.update({ | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' | |
}) | |
self.resources_processed = {} # Track resource titles to numbers | |
def sanitize_filename(self, name): | |
"""Convert a string into a valid filename.""" | |
name = re.sub(r'[<>:"/\\|?*]', '_', name) | |
name = name.strip('. ') | |
if len(name) > 100: | |
name = name[:100] | |
return name if name else "unnamed" | |
def fetch_page(self, url): | |
"""Fetch a page and return its HTML content.""" | |
try: | |
response = self.session.get(url, timeout=30) | |
response.raise_for_status() | |
return response.text | |
except requests.RequestException as e: | |
print(f"Error fetching {url}: {e}") | |
return None | |
def extract_resources_structure(self, soup): | |
"""Extract the resource structure (h1 level) from the documentation.""" | |
resources = defaultdict(list) | |
current_category = "General" | |
toc_element = soup.find('div', id='toc') | |
if not toc_element: | |
toc_element = soup.find('div', class_='toc-list') | |
if toc_element: | |
# Look for category headers (h4 with class toc-sub-title) | |
for element in toc_element.descendants: | |
if element.name == 'h4' and 'toc-sub-title' in element.get('class', []): | |
current_category = element.get_text(strip=True) | |
elif element.name == 'a' and 'toc-h1' in element.get('class', []): | |
# This is a main resource | |
resource_info = { | |
'title': element.get('data-title', element.get_text(strip=True)), | |
'href': element.get('href', ''), | |
'resource': element.get('data-resource', ''), | |
'category': current_category, | |
'endpoints': [] | |
} | |
# Find all h2 endpoints under this resource | |
parent_li = element.find_parent('li') | |
if parent_li: | |
h2_links = parent_li.find_all('a', class_='toc-h2') | |
for h2_link in h2_links: | |
endpoint_info = { | |
'title': h2_link.get('data-title', h2_link.get_text(strip=True)), | |
'href': h2_link.get('href', '') | |
} | |
resource_info['endpoints'].append(endpoint_info) | |
resources[current_category].append(resource_info) | |
return resources | |
def extract_section_content(self, soup, section_id): | |
"""Extract content for a specific section from the main page.""" | |
if not section_id: | |
return "<p>No section ID provided</p>" | |
# Remove the # if present | |
section_id = section_id.lstrip('#') | |
# Find the section | |
section = soup.find(id=section_id) | |
if not section: | |
# Try finding in content div | |
content_div = soup.find('div', class_='content') | |
if content_div: | |
section = content_div.find(id=section_id) | |
if not section: | |
return f"<p>Section '{section_id}' not found</p>" | |
# Collect content until next major section | |
content_parts = [] | |
# Add the section header | |
content_parts.append(str(section)) | |
# Find the parent container | |
parent = section.parent | |
if parent: | |
# Get all siblings after this section | |
current = section | |
while True: | |
current = current.find_next_sibling() | |
if not current: | |
break | |
# Stop at next h1 or h2 with different resource | |
if current.name == 'h1': | |
break | |
if current.name == 'h2' and current.get('id'): | |
# Check if this h2 belongs to a different resource | |
current_id = current.get('id', '') | |
if not current_id.startswith(section_id.split('-')[0]): | |
break | |
content_parts.append(str(current)) | |
return '\n'.join(content_parts) | |
def create_resource_html(self, resource_info, all_content, original_soup, all_resources_by_category): | |
"""Create a standalone HTML file for a resource with all its endpoints.""" | |
title = resource_info['title'] | |
category = resource_info['category'] | |
# Extract styles from original page | |
styles = [] | |
for style in original_soup.find_all('style'): | |
styles.append(str(style)) | |
stylesheets = [] | |
for link in original_soup.find_all('link', rel='stylesheet'): | |
stylesheets.append(str(link)) | |
# Build navigation for this resource | |
nav_html = '<ul class="endpoint-nav">' | |
for endpoint in resource_info['endpoints']: | |
endpoint_title = endpoint['title'].strip() | |
nav_html += f'<li><a href="#{endpoint["href"].lstrip("#")}">{endpoint_title}</a></li>' | |
nav_html += '</ul>' | |
# Build sidebar navigation with only same category resources | |
sidebar_html = f'<div class="sidebar-category">' | |
sidebar_html += f'<div class="sidebar-category-title">{category}</div>' | |
sidebar_html += '<ul class="sidebar-list">' | |
# Only show resources from the same category | |
if category in all_resources_by_category: | |
for res in all_resources_by_category[category]: | |
res_filename = f"{self.resources_processed.get(res['title'], 999):03d}_{self.sanitize_filename(res['title'])}.html" | |
active_class = 'active' if res['title'] == title else '' | |
sidebar_html += f'<li class="{active_class}"><a href="{res_filename}">{res["title"]}</a></li>' | |
sidebar_html += '</ul></div>' | |
html_template = f"""<!DOCTYPE html> | |
<html lang="ko"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>{title} - Cafe24 API Documentation</title> | |
{''.join(stylesheets)} | |
{''.join(styles)} | |
<style> | |
body {{ | |
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; | |
line-height: 1.6; | |
padding: 0; | |
margin: 0; | |
background: #f5f7fa; | |
display: flex; | |
}} | |
.sidebar {{ | |
width: 280px; | |
background: white; | |
border-right: 1px solid #dee2e6; | |
height: 100vh; | |
overflow-y: auto; | |
position: fixed; | |
left: 0; | |
top: 0; | |
}} | |
.sidebar-category {{ | |
border-bottom: 1px solid #e9ecef; | |
}} | |
.sidebar-category-title {{ | |
background: #f8f9fa; | |
padding: 10px 15px; | |
font-weight: 600; | |
color: #495057; | |
font-size: 0.9em; | |
text-transform: uppercase; | |
letter-spacing: 0.5px; | |
}} | |
.sidebar-list {{ | |
list-style: none; | |
padding: 0; | |
margin: 0; | |
}} | |
.sidebar-list li {{ | |
border-bottom: 1px solid #f1f3f5; | |
}} | |
.sidebar-list li.active {{ | |
background: #e7f3ff; | |
}} | |
.sidebar-list li.active a {{ | |
color: #0052a3; | |
font-weight: 600; | |
}} | |
.sidebar-list a {{ | |
display: block; | |
padding: 6px 15px; | |
color: #495057; | |
text-decoration: none; | |
font-size: 0.92em; | |
}} | |
.sidebar-list a:hover {{ | |
background: #f8f9fa; | |
color: #0066cc; | |
}} | |
.main-wrapper {{ | |
margin-left: 280px; | |
width: calc(100% - 280px); | |
padding: 20px; | |
}} | |
.header-nav {{ | |
background: #f8f9fa; | |
padding: 15px; | |
border-radius: 8px; | |
margin-bottom: 30px; | |
position: sticky; | |
top: 0; | |
z-index: 100; | |
box-shadow: 0 2px 8px rgba(0,0,0,0.1); | |
}} | |
.header-nav h1 {{ | |
margin: 0 0 10px 0; | |
color: #333; | |
}} | |
.category-badge {{ | |
display: inline-block; | |
background: #0066cc; | |
color: white; | |
padding: 3px 10px; | |
border-radius: 15px; | |
font-size: 0.85em; | |
margin-bottom: 10px; | |
}} | |
.endpoint-nav {{ | |
list-style: none; | |
padding: 0; | |
margin: 10px 0; | |
display: flex; | |
flex-wrap: wrap; | |
gap: 10px; | |
}} | |
.endpoint-nav li {{ | |
background: white; | |
border: 1px solid #ddd; | |
border-radius: 5px; | |
padding: 5px 12px; | |
}} | |
.endpoint-nav a {{ | |
color: #0066cc; | |
text-decoration: none; | |
font-size: 0.9em; | |
}} | |
.endpoint-nav a:hover {{ | |
text-decoration: underline; | |
}} | |
.back-link {{ | |
display: inline-block; | |
margin-bottom: 10px; | |
color: #0066cc; | |
text-decoration: none; | |
}} | |
.back-link:hover {{ | |
text-decoration: underline; | |
}} | |
.content {{ | |
background: white; | |
padding: 30px; | |
border-radius: 8px; | |
box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
max-width: 1600px; | |
margin: 0 auto; | |
}} | |
/* Dynamic scroll padding will be set by JavaScript */ | |
h2 {{ | |
border-top: 2px solid #e1e4e8; | |
padding-top: 30px; | |
margin-top: 50px; | |
color: #2c3e50; | |
}} | |
h2:first-child {{ | |
border-top: none; | |
padding-top: 0; | |
margin-top: 0; | |
}} | |
h3 {{ | |
color: #34495e; | |
margin-top: 25px; | |
}} | |
/* Better table styling */ | |
table {{ | |
width: 100%; | |
border-collapse: collapse; | |
margin: 20px 0; | |
font-size: 14px; | |
}} | |
th {{ | |
background: #f8f9fa; | |
padding: 12px; | |
text-align: left; | |
font-weight: 600; | |
border: 1px solid #dee2e6; | |
}} | |
td {{ | |
padding: 6px 12px; | |
border: 1px solid #dee2e6; | |
}} | |
tr:nth-child(even) {{ | |
background: #f8f9fa; | |
}} | |
/* Code blocks */ | |
pre {{ | |
background: #f6f8fa; | |
padding: 16px; | |
border-radius: 6px; | |
overflow-x: auto; | |
border: 1px solid #e1e4e8; | |
}} | |
code {{ | |
background: #f6f8fa; | |
padding: 2px 6px; | |
border-radius: 3px; | |
font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; | |
font-size: 0.9em; | |
}} | |
/* Better spacing for lists */ | |
ul, ol {{ | |
line-height: 1.8; | |
margin: 15px 0; | |
}} | |
li {{ | |
margin: 8px 0; | |
}} | |
/* Highlight important sections */ | |
.highlight {{ | |
background: #fff9c4; | |
padding: 15px; | |
border-left: 4px solid #ffc107; | |
margin: 20px 0; | |
}} | |
/* Better link styling */ | |
a {{ | |
color: #0066cc; | |
}} | |
a:hover {{ | |
color: #0052a3; | |
}} | |
/* Response/Request sections */ | |
.http-method {{ | |
display: inline-block; | |
padding: 4px 8px; | |
border-radius: 4px; | |
font-weight: bold; | |
font-size: 0.85em; | |
margin-right: 8px; | |
}} | |
.http-method.get {{ | |
background: #61affe; | |
color: white; | |
}} | |
.http-method.post {{ | |
background: #49cc90; | |
color: white; | |
}} | |
.http-method.put {{ | |
background: #fca130; | |
color: white; | |
}} | |
.http-method.delete {{ | |
background: #f93e3e; | |
color: white; | |
}} | |
/* HR separator styling */ | |
hr {{ | |
border: none; | |
border-top: 2px solid #e1e4e8; | |
margin: 40px 0; | |
}} | |
/* Hide Cafe24 and YouTube chips */ | |
.chip-cafe24, .chip-youtube {{ | |
display: none !important; | |
}} | |
/* Hide duplicate method sections */ | |
#post-2, #get-2, #put-2, #delete-2 {{ | |
display: none !important; | |
}} | |
/* Hide elements with collapse-child-attr controls */ | |
[aria-controls="collapse-child-attr"] {{ | |
display: none !important; | |
}} | |
</style> | |
<script> | |
// Dynamically set scroll padding based on header height | |
window.addEventListener('load', function() {{ | |
const header = document.querySelector('.header-nav'); | |
if (header) {{ | |
const updateScrollPadding = () => {{ | |
const headerHeight = header.offsetHeight; | |
document.documentElement.style.scrollPaddingTop = (headerHeight + 20) + 'px'; | |
}}; | |
// Set initial padding | |
updateScrollPadding(); | |
// Update on resize | |
window.addEventListener('resize', updateScrollPadding); | |
// Smooth scroll for anchor links | |
document.querySelectorAll('a[href^="#"]').forEach(anchor => {{ | |
anchor.addEventListener('click', function(e) {{ | |
e.preventDefault(); | |
const targetId = this.getAttribute('href').substring(1); | |
const targetElement = document.getElementById(targetId); | |
if (targetElement) {{ | |
const headerHeight = header.offsetHeight; | |
const targetPosition = targetElement.getBoundingClientRect().top + window.pageYOffset - headerHeight - 20; | |
window.scrollTo({{ | |
top: targetPosition, | |
behavior: 'smooth' | |
}}); | |
}} | |
}}); | |
}}); | |
}} | |
}}); | |
</script> | |
</head> | |
<body> | |
<div class="sidebar"> | |
{sidebar_html} | |
</div> | |
<div class="main-wrapper"> | |
<div class="header-nav"> | |
<a href="index.html" class="back-link">← Back to API Index</a> | |
<div class="category-badge">{category}</div> | |
<h1>{title}</h1> | |
<nav> | |
<strong>Endpoints in this resource:</strong> | |
{nav_html} | |
</nav> | |
</div> | |
<div class="content"> | |
{all_content} | |
</div> | |
</div> | |
</body> | |
</html>""" | |
return html_template | |
def process_documentation(self): | |
"""Main method to process the documentation.""" | |
print(f"Fetching main page from {self.base_url}...") | |
main_html = self.fetch_page(self.base_url) | |
if not main_html: | |
print("Failed to fetch main page") | |
return | |
soup = BeautifulSoup(main_html, 'html.parser') | |
# Save the complete original page | |
original_file = self.output_dir / "original_complete.html" | |
with open(original_file, 'w', encoding='utf-8') as f: | |
f.write(main_html) | |
print(f"Saved original page to {original_file}") | |
# Extract resource structure | |
resources_by_category = self.extract_resources_structure(soup) | |
total_resources = sum(len(resources) for resources in resources_by_category.values()) | |
print(f"Found {total_resources} API resources across {len(resources_by_category)} categories") | |
# First pass: assign numbers to all resources | |
resource_count = 0 | |
for category, resources in resources_by_category.items(): | |
for resource in resources: | |
resource_count += 1 | |
self.resources_processed[resource['title']] = resource_count | |
# Process each resource | |
file_index = [] | |
resource_count = 0 | |
for category, resources in resources_by_category.items(): | |
print(f"\nProcessing category: {category} ({len(resources)} resources)") | |
for resource in resources: | |
resource_count += 1 | |
title = resource['title'] | |
# Create filename | |
filename = f"{resource_count:03d}_{self.sanitize_filename(title)}.html" | |
filepath = self.output_dir / filename | |
print(f" [{resource_count}/{total_resources}] Processing: {title}") | |
print(f" - {len(resource['endpoints'])} endpoints") | |
# Collect all content for this resource | |
all_content_parts = [] | |
# Add main resource content | |
if resource['href'].startswith('#'): | |
main_content = self.extract_section_content(soup, resource['href']) | |
all_content_parts.append(main_content) | |
# Add each endpoint content | |
for endpoint in resource['endpoints']: | |
if endpoint['href'].startswith('#'): | |
endpoint_content = self.extract_section_content(soup, endpoint['href']) | |
all_content_parts.append(endpoint_content) | |
# Combine all content | |
all_content = '\n<hr>\n'.join(all_content_parts) | |
# Create standalone HTML with all resources for sidebar | |
standalone_html = self.create_resource_html(resource, all_content, soup, resources_by_category) | |
# Save the file | |
with open(filepath, 'w', encoding='utf-8') as f: | |
f.write(standalone_html) | |
print(f" ✓ Saved to {filepath}") | |
# Add to index | |
file_index.append({ | |
'filename': filename, | |
'title': title, | |
'category': category, | |
'endpoint_count': len(resource['endpoints']) | |
}) | |
# Create index file | |
self.create_index_file(file_index, resources_by_category) | |
print(f"\n✅ Processing complete! {resource_count} resource files saved to {self.output_dir}/") | |
def create_index_file(self, file_index, resources_by_category): | |
"""Create an index HTML file with links to all resource documents.""" | |
# Group files by category | |
files_by_category = defaultdict(list) | |
for file_info in file_index: | |
files_by_category[file_info['category']].append(file_info) | |
index_html = """<!DOCTYPE html> | |
<html lang="ko"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>Cafe24 API Documentation - Resource Index</title> | |
<style> | |
body { | |
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; | |
line-height: 1.6; | |
padding: 20px; | |
max-width: 1400px; | |
margin: 0 auto; | |
background: #f5f7fa; | |
} | |
h1 { | |
color: #333; | |
border-bottom: 3px solid #0066cc; | |
padding-bottom: 10px; | |
} | |
.stats { | |
background: white; | |
padding: 15px; | |
border-radius: 8px; | |
margin: 20px 0; | |
box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
} | |
.stats span { | |
margin-right: 30px; | |
font-weight: bold; | |
} | |
.category-section { | |
margin: 30px 0; | |
} | |
.category-title { | |
background: #0066cc; | |
color: white; | |
padding: 10px 15px; | |
border-radius: 8px 8px 0 0; | |
font-size: 1.2em; | |
font-weight: bold; | |
} | |
.resource-list { | |
background: white; | |
padding: 20px; | |
border-radius: 0 0 8px 8px; | |
} | |
.resource-list ul { | |
list-style: none; | |
padding: 0; | |
margin: 0; | |
} | |
.resource-list li { | |
padding: 8px 0; | |
border-bottom: 1px solid #e9ecef; | |
} | |
.resource-list li:last-child { | |
border-bottom: none; | |
} | |
.resource-list a { | |
color: #0066cc; | |
text-decoration: none; | |
font-size: 1.05em; | |
} | |
.resource-list a:hover { | |
text-decoration: underline; | |
color: #0052a3; | |
} | |
.original-link { | |
margin-top: 20px; | |
padding: 15px; | |
background: #e7f3ff; | |
border-radius: 8px; | |
} | |
</style> | |
</head> | |
<body> | |
<h1>📚 Cafe24 API Documentation - By Resource</h1> | |
<div class="stats"> | |
<span>📁 Total Resources: """ + str(len(file_index)) + """</span> | |
<span>📑 Categories: """ + str(len(files_by_category)) + """</span> | |
</div> | |
<div class="original-link"> | |
<strong>Additional Resources:</strong> | |
<a href="original_complete.html">View Complete Original Documentation</a> | | |
<a href="https://developers.cafe24.com/docs/ko/api/admin/" target="_blank">Visit Live Documentation</a> | |
</div> | |
""" | |
# Add each category | |
for category in files_by_category.keys(): | |
files = files_by_category[category] | |
index_html += f""" | |
<div class="category-section"> | |
<div class="category-title">{category} ({len(files)} resources)</div> | |
<div class="resource-list"> | |
<ul> | |
""" | |
for file_info in files: | |
index_html += f""" | |
<li><a href="{file_info['filename']}">{file_info['title']}</a></li> | |
""" | |
index_html += """ | |
</ul> | |
</div> | |
</div> | |
""" | |
index_html += """ | |
</body> | |
</html>""" | |
index_file = self.output_dir / "index.html" | |
with open(index_file, 'w', encoding='utf-8') as f: | |
f.write(index_html) | |
print(f"\n✅ Created index file: {index_file}") | |
def main(): | |
url = "https://developers.cafe24.com/docs/ko/api/admin/" | |
separator = Cafe24DocResourceSeparator(url) | |
separator.process_documentation() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Use at your own risk