Skip to content

Instantly share code, notes, and snippets.

@maaduukaar
Last active July 28, 2025 10:54
Show Gist options
  • Save maaduukaar/ef962780869e83ba5a4dc0fffbe0c1ed to your computer and use it in GitHub Desktop.
Save maaduukaar/ef962780869e83ba5a4dc0fffbe0c1ed to your computer and use it in GitHub Desktop.
Python script to fetch articles from a WordPress parent category and all its subcategories via REST API, then export to Excel with clickable titles and modification dates. Supports recursion for nested categories. Example usage: Enter a category URL, and it generates an XLSX file.
import requests
from urllib.parse import urlparse
from datetime import datetime
from openpyxl import Workbook
from openpyxl.styles import Font
# — Function: date formatting
def format_date(date_str):
dt = datetime.fromisoformat(date_str.replace('Z', '+00:00'))
return dt.strftime('%d.%m.%Y')
# — Recursive function to get IDs of all child categories
def get_all_child_category_ids(parent_id, base_api):
child_ids = []
categories_url = f"{base_api}categories?parent={parent_id}&per_page=100"
response = requests.get(categories_url)
if response.status_code == 200:
categories = response.json()
for cat in categories:
child_ids.append(cat['id'])
# Recursion for sublevels
child_ids.extend(get_all_child_category_ids(cat['id'], base_api))
return child_ids
# — Get the link via input
url = input("Enter the link to the parent category (e.g., https://support.r7-office.ru/category/desktop_editors/table-editor/): ").strip()
if not url:
print("Error: no link entered.")
exit()
# Extract slug from URL
slug = urlparse(url).path.strip('/').split('/')[-1]
print(f"Using parent category slug: {slug}")
# Base API URL
base_api = 'https://support.r7-office.ru/wp-json/wp/v2/'
# — Request parent category by slug
cat_api = f"{base_api}categories?slug={slug}"
cat_resp = requests.get(cat_api)
if cat_resp.status_code != 200 or not cat_resp.json():
print("Error: category not found.")
exit()
category = cat_resp.json()[0]
parent_id = category['id']
parent_name = category['name']
print(f"Parent category ID: {parent_id}")
print(f"Parent category name: {parent_name}")
# — Get IDs of all child categories (recursively)
all_child_ids = get_all_child_category_ids(parent_id, base_api)
print(f"Found {len(all_child_ids)} child categories (including nested): {all_child_ids}")
# — Add parent category to the list for processing (if you need its articles)
category_ids_to_process = [parent_id] + all_child_ids # Include parent + all children
# — Create Excel file
wb = Workbook()
ws = wb.active
ws.title = "All Articles"
# Row counter
current_row = 1
# — Process each category (parent + children)
for cat_id in category_ids_to_process:
# Get category name by ID
cat_info_url = f"{base_api}categories/{cat_id}"
cat_info_resp = requests.get(cat_info_url)
if cat_info_resp.status_code != 200:
print(f"Error: unable to get data for category ID {cat_id}. Skipping.")
continue
cat_data = cat_info_resp.json()
category_name = cat_data['name']
print(f"\n--- Processing category: {category_name} (ID: {cat_id}) ---")
# — Request articles for this category
posts_api = f"{base_api}posts?categories={cat_id}&per_page=100"
posts_resp = requests.get(posts_api)
if posts_resp.status_code != 200:
print(f"Error: no articles found for category {category_name}. Skipping.")
continue
posts = posts_resp.json()
print(f"Found {len(posts)} articles in section '{category_name}'")
# — Add section name (only if there are articles)
if posts:
ws.cell(row=current_row, column=1, value=f"{category_name}")
ws.cell(row=current_row, column=1).font = Font(bold=True, size=12)
current_row += 1
# — Add articles of this section
for post in posts:
title = post['title']['rendered']
link = post['link']
date = format_date(post['modified'])
# Add row with article
ws.cell(row=current_row, column=1, value=f'=HYPERLINK("{link}","{title}")') # English version; change to Russian if needed
ws.cell(row=current_row, column=2, value=date)
current_row += 1
# — Adjust column widths
ws.column_dimensions['A'].width = 80
ws.column_dimensions['B'].width = 18
# — Save
filename = f"{slug}_all_articles.xlsx"
wb.save(filename)
print(f"\n=== DONE! ===")
print(f"File {filename} created in the current folder.")
print(f"Processed categories: {len(category_ids_to_process)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment