maaduukaar · July 28, 2025 10:54
diff --git a/wp-category-articles-to-excel.py b/wp-category-articles-to-excel.py
 import requests
 from urllib.parse import urlparse
 from datetime import datetime
 from openpyxl import Workbook
 from openpyxl.styles import Font

 # — Function: date formatting
 def format_date(date_str):
    dt = datetime.fromisoformat(date_str.replace('Z', '+00:00'))
    return dt.strftime('%d.%m.%Y')

 # — Recursive function to get IDs of all child categories
 def get_all_child_category_ids(parent_id, base_api):
    child_ids = []
    categories_url = f"{base_api}categories?parent={parent_id}&per_page=100"
    response = requests.get(categories_url)
    if response.status_code == 200:
        categories = response.json()
        for cat in categories:
            child_ids.append(cat['id'])
            # Recursion for sublevels
            child_ids.extend(get_all_child_category_ids(cat['id'], base_api))
    return child_ids

 # — Get the link via input
 url = input("Enter the link to the parent category (e.g., https://support.r7-office.ru/category/desktop_editors/table-editor/): ").strip()
 if not url:
    print("Error: no link entered.")
    exit()

 # Extract slug from URL
 slug = urlparse(url).path.strip('/').split('/')[-1]
 print(f"Using parent category slug: {slug}")

 # Base API URL
 base_api = 'https://support.r7-office.ru/wp-json/wp/v2/'

 # — Request parent category by slug
 cat_api = f"{base_api}categories?slug={slug}"
 cat_resp = requests.get(cat_api)
 if cat_resp.status_code != 200 or not cat_resp.json():
    print("Error: category not found.")
    exit()

 category = cat_resp.json()[0]
 parent_id = category['id']
 parent_name = category['name']
 print(f"Parent category ID: {parent_id}")
 print(f"Parent category name: {parent_name}")

 # — Get IDs of all child categories (recursively)
 all_child_ids = get_all_child_category_ids(parent_id, base_api)
 print(f"Found {len(all_child_ids)} child categories (including nested): {all_child_ids}")

 # — Add parent category to the list for processing (if you need its articles)
 category_ids_to_process = [parent_id] + all_child_ids  # Include parent + all children

 # — Create Excel file
 wb = Workbook()
 ws = wb.active
 ws.title = "All Articles"

 # Row counter
 current_row = 1

 # — Process each category (parent + children)
 for cat_id in category_ids_to_process:
    # Get category name by ID
    cat_info_url = f"{base_api}categories/{cat_id}"
    cat_info_resp = requests.get(cat_info_url)
    if cat_info_resp.status_code != 200:
        print(f"Error: unable to get data for category ID {cat_id}. Skipping.")
        continue
    cat_data = cat_info_resp.json()
    category_name = cat_data['name']
    print(f"\n--- Processing category: {category_name} (ID: {cat_id}) ---")

    # — Request articles for this category
    posts_api = f"{base_api}posts?categories={cat_id}&per_page=100"
    posts_resp = requests.get(posts_api)
    if posts_resp.status_code != 200:
        print(f"Error: no articles found for category {category_name}. Skipping.")
        continue

    posts = posts_resp.json()
    print(f"Found {len(posts)} articles in section '{category_name}'")

    # — Add section name (only if there are articles)
    if posts:
        ws.cell(row=current_row, column=1, value=f"{category_name}")
        ws.cell(row=current_row, column=1).font = Font(bold=True, size=12)
        current_row += 1

        # — Add articles of this section
        for post in posts:
            title = post['title']['rendered']
            link = post['link']
            date = format_date(post['modified'])

            # Add row with article
            ws.cell(row=current_row, column=1, value=f'=HYPERLINK("{link}","{title}")')  # English version; change to Russian if needed
            ws.cell(row=current_row, column=2, value=date)
            current_row += 1

 # — Adjust column widths
 ws.column_dimensions['A'].width = 80
 ws.column_dimensions['B'].width = 18

 # — Save
 filename = f"{slug}_all_articles.xlsx"
 wb.save(filename)
 print(f"\n=== DONE! ===")
 print(f"File {filename} created in the current folder.")
 print(f"Processed categories: {len(category_ids_to_process)}")
	import requests
	from urllib.parse import urlparse
	from datetime import datetime
	from openpyxl import Workbook
	from openpyxl.styles import Font

	# — Function: date formatting
	def format_date(date_str):
	dt = datetime.fromisoformat(date_str.replace('Z', '+00:00'))
	return dt.strftime('%d.%m.%Y')

	# — Recursive function to get IDs of all child categories
	def get_all_child_category_ids(parent_id, base_api):
	child_ids = []
	categories_url = f"{base_api}categories?parent={parent_id}&per_page=100"
	response = requests.get(categories_url)
	if response.status_code == 200:
	categories = response.json()
	for cat in categories:
	child_ids.append(cat['id'])
	# Recursion for sublevels
	child_ids.extend(get_all_child_category_ids(cat['id'], base_api))
	return child_ids

	# — Get the link via input
	url = input("Enter the link to the parent category (e.g., https://support.r7-office.ru/category/desktop_editors/table-editor/): ").strip()
	if not url:
	print("Error: no link entered.")
	exit()

	# Extract slug from URL
	slug = urlparse(url).path.strip('/').split('/')[-1]
	print(f"Using parent category slug: {slug}")

	# Base API URL
	base_api = 'https://support.r7-office.ru/wp-json/wp/v2/'

	# — Request parent category by slug
	cat_api = f"{base_api}categories?slug={slug}"
	cat_resp = requests.get(cat_api)
	if cat_resp.status_code != 200 or not cat_resp.json():
	print("Error: category not found.")
	exit()

	category = cat_resp.json()[0]
	parent_id = category['id']
	parent_name = category['name']
	print(f"Parent category ID: {parent_id}")
	print(f"Parent category name: {parent_name}")

	# — Get IDs of all child categories (recursively)
	all_child_ids = get_all_child_category_ids(parent_id, base_api)
	print(f"Found {len(all_child_ids)} child categories (including nested): {all_child_ids}")

	# — Add parent category to the list for processing (if you need its articles)
	category_ids_to_process = [parent_id] + all_child_ids # Include parent + all children

	# — Create Excel file
	wb = Workbook()
	ws = wb.active
	ws.title = "All Articles"

	# Row counter
	current_row = 1

	# — Process each category (parent + children)
	for cat_id in category_ids_to_process:
	# Get category name by ID
	cat_info_url = f"{base_api}categories/{cat_id}"
	cat_info_resp = requests.get(cat_info_url)
	if cat_info_resp.status_code != 200:
	print(f"Error: unable to get data for category ID {cat_id}. Skipping.")
	continue
	cat_data = cat_info_resp.json()
	category_name = cat_data['name']
	print(f"\n--- Processing category: {category_name} (ID: {cat_id}) ---")

	# — Request articles for this category
	posts_api = f"{base_api}posts?categories={cat_id}&per_page=100"
	posts_resp = requests.get(posts_api)
	if posts_resp.status_code != 200:
	print(f"Error: no articles found for category {category_name}. Skipping.")
	continue

	posts = posts_resp.json()
	print(f"Found {len(posts)} articles in section '{category_name}'")

	# — Add section name (only if there are articles)
	if posts:
	ws.cell(row=current_row, column=1, value=f"{category_name}")
	ws.cell(row=current_row, column=1).font = Font(bold=True, size=12)
	current_row += 1

	# — Add articles of this section
	for post in posts:
	title = post['title']['rendered']
	link = post['link']
	date = format_date(post['modified'])

	# Add row with article
	ws.cell(row=current_row, column=1, value=f'=HYPERLINK("{link}","{title}")') # English version; change to Russian if needed
	ws.cell(row=current_row, column=2, value=date)
	current_row += 1

	# — Adjust column widths
	ws.column_dimensions['A'].width = 80
	ws.column_dimensions['B'].width = 18

	# — Save
	filename = f"{slug}_all_articles.xlsx"
	wb.save(filename)
	print(f"\n=== DONE! ===")
	print(f"File {filename} created in the current folder.")
	print(f"Processed categories: {len(category_ids_to_process)}")