Skip to content

Instantly share code, notes, and snippets.

@finalchild
Created August 25, 2025 08:27
Show Gist options
  • Save finalchild/f0a231d9cc3c4142fd908e2c347a1833 to your computer and use it in GitHub Desktop.
Save finalchild/f0a231d9cc3c4142fd908e2c347a1833 to your computer and use it in GitHub Desktop.
카페24 어드민 API 문서를 쪼개 줍니다
#!/usr/bin/env python3
"""
Script to fetch Cafe24 API documentation and separate it by API resource (h1 level).
Each resource and all its endpoints will be in a single HTML file.
"""
import os
import re
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import json
from pathlib import Path
from collections import defaultdict
class Cafe24DocResourceSeparator:
def __init__(self, base_url, output_dir="cafe24_docs_by_resource"):
self.base_url = base_url
self.output_dir = Path(output_dir)
self.output_dir.mkdir(exist_ok=True)
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
})
self.resources_processed = {} # Track resource titles to numbers
def sanitize_filename(self, name):
"""Convert a string into a valid filename."""
name = re.sub(r'[<>:"/\\|?*]', '_', name)
name = name.strip('. ')
if len(name) > 100:
name = name[:100]
return name if name else "unnamed"
def fetch_page(self, url):
"""Fetch a page and return its HTML content."""
try:
response = self.session.get(url, timeout=30)
response.raise_for_status()
return response.text
except requests.RequestException as e:
print(f"Error fetching {url}: {e}")
return None
def extract_resources_structure(self, soup):
"""Extract the resource structure (h1 level) from the documentation."""
resources = defaultdict(list)
current_category = "General"
toc_element = soup.find('div', id='toc')
if not toc_element:
toc_element = soup.find('div', class_='toc-list')
if toc_element:
# Look for category headers (h4 with class toc-sub-title)
for element in toc_element.descendants:
if element.name == 'h4' and 'toc-sub-title' in element.get('class', []):
current_category = element.get_text(strip=True)
elif element.name == 'a' and 'toc-h1' in element.get('class', []):
# This is a main resource
resource_info = {
'title': element.get('data-title', element.get_text(strip=True)),
'href': element.get('href', ''),
'resource': element.get('data-resource', ''),
'category': current_category,
'endpoints': []
}
# Find all h2 endpoints under this resource
parent_li = element.find_parent('li')
if parent_li:
h2_links = parent_li.find_all('a', class_='toc-h2')
for h2_link in h2_links:
endpoint_info = {
'title': h2_link.get('data-title', h2_link.get_text(strip=True)),
'href': h2_link.get('href', '')
}
resource_info['endpoints'].append(endpoint_info)
resources[current_category].append(resource_info)
return resources
def extract_section_content(self, soup, section_id):
"""Extract content for a specific section from the main page."""
if not section_id:
return "<p>No section ID provided</p>"
# Remove the # if present
section_id = section_id.lstrip('#')
# Find the section
section = soup.find(id=section_id)
if not section:
# Try finding in content div
content_div = soup.find('div', class_='content')
if content_div:
section = content_div.find(id=section_id)
if not section:
return f"<p>Section '{section_id}' not found</p>"
# Collect content until next major section
content_parts = []
# Add the section header
content_parts.append(str(section))
# Find the parent container
parent = section.parent
if parent:
# Get all siblings after this section
current = section
while True:
current = current.find_next_sibling()
if not current:
break
# Stop at next h1 or h2 with different resource
if current.name == 'h1':
break
if current.name == 'h2' and current.get('id'):
# Check if this h2 belongs to a different resource
current_id = current.get('id', '')
if not current_id.startswith(section_id.split('-')[0]):
break
content_parts.append(str(current))
return '\n'.join(content_parts)
def create_resource_html(self, resource_info, all_content, original_soup, all_resources_by_category):
"""Create a standalone HTML file for a resource with all its endpoints."""
title = resource_info['title']
category = resource_info['category']
# Extract styles from original page
styles = []
for style in original_soup.find_all('style'):
styles.append(str(style))
stylesheets = []
for link in original_soup.find_all('link', rel='stylesheet'):
stylesheets.append(str(link))
# Build navigation for this resource
nav_html = '<ul class="endpoint-nav">'
for endpoint in resource_info['endpoints']:
endpoint_title = endpoint['title'].strip()
nav_html += f'<li><a href="#{endpoint["href"].lstrip("#")}">{endpoint_title}</a></li>'
nav_html += '</ul>'
# Build sidebar navigation with only same category resources
sidebar_html = f'<div class="sidebar-category">'
sidebar_html += f'<div class="sidebar-category-title">{category}</div>'
sidebar_html += '<ul class="sidebar-list">'
# Only show resources from the same category
if category in all_resources_by_category:
for res in all_resources_by_category[category]:
res_filename = f"{self.resources_processed.get(res['title'], 999):03d}_{self.sanitize_filename(res['title'])}.html"
active_class = 'active' if res['title'] == title else ''
sidebar_html += f'<li class="{active_class}"><a href="{res_filename}">{res["title"]}</a></li>'
sidebar_html += '</ul></div>'
html_template = f"""<!DOCTYPE html>
<html lang="ko">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{title} - Cafe24 API Documentation</title>
{''.join(stylesheets)}
{''.join(styles)}
<style>
body {{
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
line-height: 1.6;
padding: 0;
margin: 0;
background: #f5f7fa;
display: flex;
}}
.sidebar {{
width: 280px;
background: white;
border-right: 1px solid #dee2e6;
height: 100vh;
overflow-y: auto;
position: fixed;
left: 0;
top: 0;
}}
.sidebar-category {{
border-bottom: 1px solid #e9ecef;
}}
.sidebar-category-title {{
background: #f8f9fa;
padding: 10px 15px;
font-weight: 600;
color: #495057;
font-size: 0.9em;
text-transform: uppercase;
letter-spacing: 0.5px;
}}
.sidebar-list {{
list-style: none;
padding: 0;
margin: 0;
}}
.sidebar-list li {{
border-bottom: 1px solid #f1f3f5;
}}
.sidebar-list li.active {{
background: #e7f3ff;
}}
.sidebar-list li.active a {{
color: #0052a3;
font-weight: 600;
}}
.sidebar-list a {{
display: block;
padding: 6px 15px;
color: #495057;
text-decoration: none;
font-size: 0.92em;
}}
.sidebar-list a:hover {{
background: #f8f9fa;
color: #0066cc;
}}
.main-wrapper {{
margin-left: 280px;
width: calc(100% - 280px);
padding: 20px;
}}
.header-nav {{
background: #f8f9fa;
padding: 15px;
border-radius: 8px;
margin-bottom: 30px;
position: sticky;
top: 0;
z-index: 100;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}}
.header-nav h1 {{
margin: 0 0 10px 0;
color: #333;
}}
.category-badge {{
display: inline-block;
background: #0066cc;
color: white;
padding: 3px 10px;
border-radius: 15px;
font-size: 0.85em;
margin-bottom: 10px;
}}
.endpoint-nav {{
list-style: none;
padding: 0;
margin: 10px 0;
display: flex;
flex-wrap: wrap;
gap: 10px;
}}
.endpoint-nav li {{
background: white;
border: 1px solid #ddd;
border-radius: 5px;
padding: 5px 12px;
}}
.endpoint-nav a {{
color: #0066cc;
text-decoration: none;
font-size: 0.9em;
}}
.endpoint-nav a:hover {{
text-decoration: underline;
}}
.back-link {{
display: inline-block;
margin-bottom: 10px;
color: #0066cc;
text-decoration: none;
}}
.back-link:hover {{
text-decoration: underline;
}}
.content {{
background: white;
padding: 30px;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
max-width: 1600px;
margin: 0 auto;
}}
/* Dynamic scroll padding will be set by JavaScript */
h2 {{
border-top: 2px solid #e1e4e8;
padding-top: 30px;
margin-top: 50px;
color: #2c3e50;
}}
h2:first-child {{
border-top: none;
padding-top: 0;
margin-top: 0;
}}
h3 {{
color: #34495e;
margin-top: 25px;
}}
/* Better table styling */
table {{
width: 100%;
border-collapse: collapse;
margin: 20px 0;
font-size: 14px;
}}
th {{
background: #f8f9fa;
padding: 12px;
text-align: left;
font-weight: 600;
border: 1px solid #dee2e6;
}}
td {{
padding: 6px 12px;
border: 1px solid #dee2e6;
}}
tr:nth-child(even) {{
background: #f8f9fa;
}}
/* Code blocks */
pre {{
background: #f6f8fa;
padding: 16px;
border-radius: 6px;
overflow-x: auto;
border: 1px solid #e1e4e8;
}}
code {{
background: #f6f8fa;
padding: 2px 6px;
border-radius: 3px;
font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
font-size: 0.9em;
}}
/* Better spacing for lists */
ul, ol {{
line-height: 1.8;
margin: 15px 0;
}}
li {{
margin: 8px 0;
}}
/* Highlight important sections */
.highlight {{
background: #fff9c4;
padding: 15px;
border-left: 4px solid #ffc107;
margin: 20px 0;
}}
/* Better link styling */
a {{
color: #0066cc;
}}
a:hover {{
color: #0052a3;
}}
/* Response/Request sections */
.http-method {{
display: inline-block;
padding: 4px 8px;
border-radius: 4px;
font-weight: bold;
font-size: 0.85em;
margin-right: 8px;
}}
.http-method.get {{
background: #61affe;
color: white;
}}
.http-method.post {{
background: #49cc90;
color: white;
}}
.http-method.put {{
background: #fca130;
color: white;
}}
.http-method.delete {{
background: #f93e3e;
color: white;
}}
/* HR separator styling */
hr {{
border: none;
border-top: 2px solid #e1e4e8;
margin: 40px 0;
}}
/* Hide Cafe24 and YouTube chips */
.chip-cafe24, .chip-youtube {{
display: none !important;
}}
/* Hide duplicate method sections */
#post-2, #get-2, #put-2, #delete-2 {{
display: none !important;
}}
/* Hide elements with collapse-child-attr controls */
[aria-controls="collapse-child-attr"] {{
display: none !important;
}}
</style>
<script>
// Dynamically set scroll padding based on header height
window.addEventListener('load', function() {{
const header = document.querySelector('.header-nav');
if (header) {{
const updateScrollPadding = () => {{
const headerHeight = header.offsetHeight;
document.documentElement.style.scrollPaddingTop = (headerHeight + 20) + 'px';
}};
// Set initial padding
updateScrollPadding();
// Update on resize
window.addEventListener('resize', updateScrollPadding);
// Smooth scroll for anchor links
document.querySelectorAll('a[href^="#"]').forEach(anchor => {{
anchor.addEventListener('click', function(e) {{
e.preventDefault();
const targetId = this.getAttribute('href').substring(1);
const targetElement = document.getElementById(targetId);
if (targetElement) {{
const headerHeight = header.offsetHeight;
const targetPosition = targetElement.getBoundingClientRect().top + window.pageYOffset - headerHeight - 20;
window.scrollTo({{
top: targetPosition,
behavior: 'smooth'
}});
}}
}});
}});
}}
}});
</script>
</head>
<body>
<div class="sidebar">
{sidebar_html}
</div>
<div class="main-wrapper">
<div class="header-nav">
<a href="index.html" class="back-link">← Back to API Index</a>
<div class="category-badge">{category}</div>
<h1>{title}</h1>
<nav>
<strong>Endpoints in this resource:</strong>
{nav_html}
</nav>
</div>
<div class="content">
{all_content}
</div>
</div>
</body>
</html>"""
return html_template
def process_documentation(self):
"""Main method to process the documentation."""
print(f"Fetching main page from {self.base_url}...")
main_html = self.fetch_page(self.base_url)
if not main_html:
print("Failed to fetch main page")
return
soup = BeautifulSoup(main_html, 'html.parser')
# Save the complete original page
original_file = self.output_dir / "original_complete.html"
with open(original_file, 'w', encoding='utf-8') as f:
f.write(main_html)
print(f"Saved original page to {original_file}")
# Extract resource structure
resources_by_category = self.extract_resources_structure(soup)
total_resources = sum(len(resources) for resources in resources_by_category.values())
print(f"Found {total_resources} API resources across {len(resources_by_category)} categories")
# First pass: assign numbers to all resources
resource_count = 0
for category, resources in resources_by_category.items():
for resource in resources:
resource_count += 1
self.resources_processed[resource['title']] = resource_count
# Process each resource
file_index = []
resource_count = 0
for category, resources in resources_by_category.items():
print(f"\nProcessing category: {category} ({len(resources)} resources)")
for resource in resources:
resource_count += 1
title = resource['title']
# Create filename
filename = f"{resource_count:03d}_{self.sanitize_filename(title)}.html"
filepath = self.output_dir / filename
print(f" [{resource_count}/{total_resources}] Processing: {title}")
print(f" - {len(resource['endpoints'])} endpoints")
# Collect all content for this resource
all_content_parts = []
# Add main resource content
if resource['href'].startswith('#'):
main_content = self.extract_section_content(soup, resource['href'])
all_content_parts.append(main_content)
# Add each endpoint content
for endpoint in resource['endpoints']:
if endpoint['href'].startswith('#'):
endpoint_content = self.extract_section_content(soup, endpoint['href'])
all_content_parts.append(endpoint_content)
# Combine all content
all_content = '\n<hr>\n'.join(all_content_parts)
# Create standalone HTML with all resources for sidebar
standalone_html = self.create_resource_html(resource, all_content, soup, resources_by_category)
# Save the file
with open(filepath, 'w', encoding='utf-8') as f:
f.write(standalone_html)
print(f" ✓ Saved to {filepath}")
# Add to index
file_index.append({
'filename': filename,
'title': title,
'category': category,
'endpoint_count': len(resource['endpoints'])
})
# Create index file
self.create_index_file(file_index, resources_by_category)
print(f"\n✅ Processing complete! {resource_count} resource files saved to {self.output_dir}/")
def create_index_file(self, file_index, resources_by_category):
"""Create an index HTML file with links to all resource documents."""
# Group files by category
files_by_category = defaultdict(list)
for file_info in file_index:
files_by_category[file_info['category']].append(file_info)
index_html = """<!DOCTYPE html>
<html lang="ko">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Cafe24 API Documentation - Resource Index</title>
<style>
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
line-height: 1.6;
padding: 20px;
max-width: 1400px;
margin: 0 auto;
background: #f5f7fa;
}
h1 {
color: #333;
border-bottom: 3px solid #0066cc;
padding-bottom: 10px;
}
.stats {
background: white;
padding: 15px;
border-radius: 8px;
margin: 20px 0;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.stats span {
margin-right: 30px;
font-weight: bold;
}
.category-section {
margin: 30px 0;
}
.category-title {
background: #0066cc;
color: white;
padding: 10px 15px;
border-radius: 8px 8px 0 0;
font-size: 1.2em;
font-weight: bold;
}
.resource-list {
background: white;
padding: 20px;
border-radius: 0 0 8px 8px;
}
.resource-list ul {
list-style: none;
padding: 0;
margin: 0;
}
.resource-list li {
padding: 8px 0;
border-bottom: 1px solid #e9ecef;
}
.resource-list li:last-child {
border-bottom: none;
}
.resource-list a {
color: #0066cc;
text-decoration: none;
font-size: 1.05em;
}
.resource-list a:hover {
text-decoration: underline;
color: #0052a3;
}
.original-link {
margin-top: 20px;
padding: 15px;
background: #e7f3ff;
border-radius: 8px;
}
</style>
</head>
<body>
<h1>📚 Cafe24 API Documentation - By Resource</h1>
<div class="stats">
<span>📁 Total Resources: """ + str(len(file_index)) + """</span>
<span>📑 Categories: """ + str(len(files_by_category)) + """</span>
</div>
<div class="original-link">
<strong>Additional Resources:</strong>
<a href="original_complete.html">View Complete Original Documentation</a> |
<a href="https://developers.cafe24.com/docs/ko/api/admin/" target="_blank">Visit Live Documentation</a>
</div>
"""
# Add each category
for category in files_by_category.keys():
files = files_by_category[category]
index_html += f"""
<div class="category-section">
<div class="category-title">{category} ({len(files)} resources)</div>
<div class="resource-list">
<ul>
"""
for file_info in files:
index_html += f"""
<li><a href="{file_info['filename']}">{file_info['title']}</a></li>
"""
index_html += """
</ul>
</div>
</div>
"""
index_html += """
</body>
</html>"""
index_file = self.output_dir / "index.html"
with open(index_file, 'w', encoding='utf-8') as f:
f.write(index_html)
print(f"\n✅ Created index file: {index_file}")
def main():
url = "https://developers.cafe24.com/docs/ko/api/admin/"
separator = Cafe24DocResourceSeparator(url)
separator.process_documentation()
if __name__ == "__main__":
main()
@finalchild
Copy link
Author

Use at your own risk

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment