Last active
July 28, 2025 10:54
-
-
Save maaduukaar/ef962780869e83ba5a4dc0fffbe0c1ed to your computer and use it in GitHub Desktop.
Python script to fetch articles from a WordPress parent category and all its subcategories via REST API, then export to Excel with clickable titles and modification dates. Supports recursion for nested categories. Example usage: Enter a category URL, and it generates an XLSX file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from urllib.parse import urlparse | |
from datetime import datetime | |
from openpyxl import Workbook | |
from openpyxl.styles import Font | |
# — Function: date formatting | |
def format_date(date_str): | |
dt = datetime.fromisoformat(date_str.replace('Z', '+00:00')) | |
return dt.strftime('%d.%m.%Y') | |
# — Recursive function to get IDs of all child categories | |
def get_all_child_category_ids(parent_id, base_api): | |
child_ids = [] | |
categories_url = f"{base_api}categories?parent={parent_id}&per_page=100" | |
response = requests.get(categories_url) | |
if response.status_code == 200: | |
categories = response.json() | |
for cat in categories: | |
child_ids.append(cat['id']) | |
# Recursion for sublevels | |
child_ids.extend(get_all_child_category_ids(cat['id'], base_api)) | |
return child_ids | |
# — Get the link via input | |
url = input("Enter the link to the parent category (e.g., https://support.r7-office.ru/category/desktop_editors/table-editor/): ").strip() | |
if not url: | |
print("Error: no link entered.") | |
exit() | |
# Extract slug from URL | |
slug = urlparse(url).path.strip('/').split('/')[-1] | |
print(f"Using parent category slug: {slug}") | |
# Base API URL | |
base_api = 'https://support.r7-office.ru/wp-json/wp/v2/' | |
# — Request parent category by slug | |
cat_api = f"{base_api}categories?slug={slug}" | |
cat_resp = requests.get(cat_api) | |
if cat_resp.status_code != 200 or not cat_resp.json(): | |
print("Error: category not found.") | |
exit() | |
category = cat_resp.json()[0] | |
parent_id = category['id'] | |
parent_name = category['name'] | |
print(f"Parent category ID: {parent_id}") | |
print(f"Parent category name: {parent_name}") | |
# — Get IDs of all child categories (recursively) | |
all_child_ids = get_all_child_category_ids(parent_id, base_api) | |
print(f"Found {len(all_child_ids)} child categories (including nested): {all_child_ids}") | |
# — Add parent category to the list for processing (if you need its articles) | |
category_ids_to_process = [parent_id] + all_child_ids # Include parent + all children | |
# — Create Excel file | |
wb = Workbook() | |
ws = wb.active | |
ws.title = "All Articles" | |
# Row counter | |
current_row = 1 | |
# — Process each category (parent + children) | |
for cat_id in category_ids_to_process: | |
# Get category name by ID | |
cat_info_url = f"{base_api}categories/{cat_id}" | |
cat_info_resp = requests.get(cat_info_url) | |
if cat_info_resp.status_code != 200: | |
print(f"Error: unable to get data for category ID {cat_id}. Skipping.") | |
continue | |
cat_data = cat_info_resp.json() | |
category_name = cat_data['name'] | |
print(f"\n--- Processing category: {category_name} (ID: {cat_id}) ---") | |
# — Request articles for this category | |
posts_api = f"{base_api}posts?categories={cat_id}&per_page=100" | |
posts_resp = requests.get(posts_api) | |
if posts_resp.status_code != 200: | |
print(f"Error: no articles found for category {category_name}. Skipping.") | |
continue | |
posts = posts_resp.json() | |
print(f"Found {len(posts)} articles in section '{category_name}'") | |
# — Add section name (only if there are articles) | |
if posts: | |
ws.cell(row=current_row, column=1, value=f"{category_name}") | |
ws.cell(row=current_row, column=1).font = Font(bold=True, size=12) | |
current_row += 1 | |
# — Add articles of this section | |
for post in posts: | |
title = post['title']['rendered'] | |
link = post['link'] | |
date = format_date(post['modified']) | |
# Add row with article | |
ws.cell(row=current_row, column=1, value=f'=HYPERLINK("{link}","{title}")') # English version; change to Russian if needed | |
ws.cell(row=current_row, column=2, value=date) | |
current_row += 1 | |
# — Adjust column widths | |
ws.column_dimensions['A'].width = 80 | |
ws.column_dimensions['B'].width = 18 | |
# — Save | |
filename = f"{slug}_all_articles.xlsx" | |
wb.save(filename) | |
print(f"\n=== DONE! ===") | |
print(f"File {filename} created in the current folder.") | |
print(f"Processed categories: {len(category_ids_to_process)}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment