Heavy vibe-coding warning, but it works for me!
Just set API_TOKEN env var to the value of cookie api_token from audioteka.com in you browser after logging in. The token is valid for ~20 minutes, after that, you need a new one.
| #!/usr/bin/env python3 | |
| import uuid | |
| import requests | |
| import json | |
| import math | |
| import traceback | |
| import zipfile | |
| import tempfile | |
| import os | |
| import base64 | |
| from io import BytesIO | |
| from typing import List, Dict, Any, Union | |
| # Libraries for ODS output and Image handling | |
| from pyexcel_ods import save_data | |
| from pyexcel import Sheet | |
| from PIL import Image | |
| # --- Configuration --- | |
| # REPLACE WITH YOUR ACTUAL API_TOKEN | |
| API_TOKEN = os.environ["API_TOKEN"] | |
| BASE_URL = "https://audioteka.com/cz/v2" | |
| HEADERS = { | |
| 'Cookie': f'api_token={API_TOKEN}', | |
| 'Accept': 'application/json', | |
| } | |
| LIMIT = 500 # Max limit for shelf/favourites endpoint | |
| OUTPUT_FILENAME = 'books_data.ods' | |
| # Unique placeholder to mark the cell content that needs XML replacement | |
| IMAGE_PLACEHOLDER_PREFIX = "BINARY_IMAGE_DATA:" | |
| # --- ODS XML Templates --- | |
| # The template provided by the user, slightly adapted for use as a Python format string. | |
| # We'll use a unique ID and the file path. | |
| ODS_IMAGE_XML_TEMPLATE = """ | |
| <draw:frame draw:z-index="0" draw:name="{img_name}" draw:style-name="gr1" draw:text-style-name="P1" | |
| svg:width="1.5in" svg:height="1.5in" svg:x="0cm" svg:y="0cm"> | |
| <draw:image xlink:href="Pictures/{img_filename}" xlink:type="simple" | |
| xlink:show="embed" xlink:actuate="onLoad" loext:mime-type="{mime_type}"> | |
| <text:p/> | |
| </draw:image> | |
| </draw:frame> | |
| """ | |
| IMAGE_DATA_TO_EMBED = {} | |
| # Note: Complex ODS files often require definitions in 'styles.xml' and 'meta.xml' for styles like 'P2' and 'fr1'. | |
| # Since pyexcel-ods creates these, we rely on them existing. We will use 'image/png' as the MIME type. | |
| # --- Helper Functions --- | |
| def fetch_paginated_data(endpoint: str, limit: int = LIMIT) -> List[Dict[str, Any]]: | |
| # ... (function remains the same) | |
| all_books = [] | |
| page = 1 | |
| total_pages = 1 | |
| print(f"Fetching data from: {endpoint}...") | |
| while page <= total_pages: | |
| url = f'{BASE_URL}/{endpoint}?page={page}&limit={limit}&sort=added_at&order=desc' | |
| try: | |
| with requests.Session() as session: | |
| response = session.get(url, headers=HEADERS) | |
| response.raise_for_status() | |
| data = response.json() | |
| except requests.exceptions.RequestException as e: | |
| print(f"Error fetching page {page} from {endpoint}: {e}") | |
| break | |
| books = data.get('_embedded', {}).get('app:product', []) | |
| all_books.extend(books) | |
| total_pages = data.get('pages', 1) | |
| print(f" -> Fetched page {page}/{total_pages} ({len(books)} books).") | |
| page += 1 | |
| return all_books | |
| def fetch_book_relations(product_ids: List[str]) -> Dict[str, Dict[str, Any]]: | |
| # ... (function remains the same) | |
| relations_map = {} | |
| BATCH_SIZE = 30 | |
| print(f"\nFetching relations for {len(product_ids)} books in batches of {BATCH_SIZE}...") | |
| for i in range(0, len(product_ids), BATCH_SIZE): | |
| batch_ids = product_ids[i:i + BATCH_SIZE] | |
| payload = { | |
| "name": "GetProductRelations", | |
| "product_ids": batch_ids | |
| } | |
| try: | |
| with requests.Session() as session: | |
| response = session.post(f'{BASE_URL}/commands', headers=HEADERS, data=json.dumps(payload)) | |
| response.raise_for_status() | |
| relations_data = response.json() | |
| except requests.exceptions.RequestException as e: | |
| print(f"Error fetching relations batch {i//BATCH_SIZE + 1}: {e}") | |
| continue | |
| relations = relations_data.get('relations', []) | |
| for rel in relations: | |
| relations_map[rel['product_id']] = rel | |
| print(f"✅ Relations fetch complete. Total relations: {len(relations_map)}") | |
| return relations_map | |
| def download_image_and_prepare(url: str, max_size: int = 150) -> Union[str, None]: | |
| """Downloads an image, resizes it, saves it to an in-memory buffer as PNG, and stores it globally.""" | |
| response = requests.get(url, timeout=10) | |
| response.raise_for_status() | |
| img = Image.open(BytesIO(response.content)) | |
| width, height = img.size | |
| if not isinstance(width, int) or not isinstance(height, int) or width <= 0 or height <= 0: | |
| return None | |
| # Resize logic | |
| if width > height: | |
| new_width = max_size | |
| new_height = int(height * (max_size / width)) | |
| else: | |
| new_height = max_size | |
| new_width = int(width * (max_size / height)) | |
| img = img.resize((new_width, new_height)) | |
| # Save to buffer as PNG | |
| buffer = BytesIO() | |
| img.save(buffer, format="PNG") | |
| binary_data = buffer.getvalue() | |
| mime_type = "image/png" | |
| # Generate unique filename (UUID) for the ODS archive | |
| unique_id = str(uuid.uuid4()) | |
| filename = f"{unique_id}.png" | |
| # Store the binary data globally for the final ZIP write step | |
| IMAGE_DATA_TO_EMBED[filename] = binary_data | |
| # Return a placeholder string containing the filename and MIME type | |
| # We separate with '|' for easier parsing in the embedding step | |
| return f"{IMAGE_PLACEHOLDER_PREFIX}{filename}|{mime_type}" | |
| # New style definition for row height | |
| ROW_STYLE_NAME = "Row_Image_Height" | |
| ROW_STYLE_XML_TEMPLATE = """ | |
| <style:style style:name="{style_name}" style:family="table-row"> | |
| <style:row-properties fo:min-height="2.5in" fo:height="2.5in"/> | |
| </style:style>""" | |
| # ===================================================================== | |
| ## ⚙️ ODS Style Modification | |
| # ===================================================================== | |
| def get_modified_styles(zin: zipfile.ZipFile) -> bytes: | |
| """Reads styles.xml and injects the custom row height style.""" | |
| try: | |
| styles_xml = zin.read('styles.xml').decode('utf-8') | |
| except KeyError: | |
| print("Warning: styles.xml not found. Aborting style modification.") | |
| return b'' | |
| new_style_xml = ROW_STYLE_XML_TEMPLATE.format(style_name=ROW_STYLE_NAME) | |
| # Find the style insertion point: often right before the closing </office:automatic-styles> tag | |
| insertion_point_tag = '</office:automatic-styles>' | |
| insertion_point = styles_xml.rfind(insertion_point_tag) | |
| if insertion_point != -1: | |
| # Insert the new style XML just before the closing tag | |
| styles_xml = styles_xml[:insertion_point] + new_style_xml + styles_xml[insertion_point:] | |
| return styles_xml.encode('utf-8') | |
| MANIFEST_ENTRY_TEMPLATE = '<manifest:file-entry manifest:media-type="{mime_type}" manifest:full-path="{full_path}"/>' | |
| MANIFEST_VERSION_ATTR = ' manifest:version="1.2"' # Needed for manifest:manifest tag | |
| def get_modified_manifest(zin: zipfile.ZipFile, image_data_to_embed: Dict[str, bytes]) -> bytes: | |
| """Reads manifest.xml, adds file entries for all new images, and fixes the version attribute.""" | |
| try: | |
| manifest_xml = zin.read('META-INF/manifest.xml').decode('utf-8') | |
| except KeyError: | |
| # Fallback if manifest.xml is somehow missing in the pyexcel output | |
| print("Warning: META-INF/manifest.xml not found in archive.") | |
| return b'' | |
| new_entries = [] | |
| # Create entries for new image files | |
| for filename in image_data_to_embed.keys(): | |
| new_entries.append( | |
| MANIFEST_ENTRY_TEMPLATE.format( | |
| mime_type='image/png', | |
| full_path=f"Pictures/{filename}" | |
| ) | |
| ) | |
| new_entries_xml = "".join(new_entries) | |
| # 1. Insert new entries before the closing tag | |
| close_tag = '</manifest:manifest>' | |
| insertion_point = manifest_xml.rfind(close_tag) | |
| if insertion_point != -1: | |
| manifest_xml = manifest_xml[:insertion_point] + new_entries_xml + manifest_xml[insertion_point:] | |
| # 2. Fix missing version attribute error | |
| manifest_tag = '<manifest:manifest' | |
| if manifest_xml.find(MANIFEST_VERSION_ATTR) == -1: | |
| # Insert version attribute after the manifest tag | |
| manifest_xml = manifest_xml.replace(manifest_tag, manifest_tag + MANIFEST_VERSION_ATTR, 1) | |
| return manifest_xml.encode('utf-8') | |
| # ===================================================================== | |
| ## 📝 ODS Post-Processing and Embedding | |
| # ===================================================================== | |
| def embed_images_in_ods(ods_path: str): | |
| """ | |
| Edits the content.xml and adds image files to the ODS archive to achieve ODF compliance | |
| and adjust row height. | |
| """ | |
| if not IMAGE_DATA_TO_EMBED: | |
| print("No data to embed or modify.") | |
| return | |
| print(f"\nEditing ODS archive: {ods_path} to embed images and adjust row height...") | |
| temp_zip_path = ods_path + '.temp' | |
| # Data containers for all needed content | |
| content_xml_data = None | |
| modified_manifest_data = None | |
| modified_styles_data = None # NEW container for styles | |
| original_files_data = {} | |
| try: | |
| # --- PHASE 1: READ ALL DATA FROM THE ORIGINAL ZIP (zin) --- | |
| with zipfile.ZipFile(ods_path, 'r') as zin: | |
| file_list = zin.namelist() | |
| files_to_skip = {'content.xml', 'mimetype', 'META-INF/manifest.xml', 'styles.xml'} | |
| content_xml_data = zin.read('content.xml').decode('utf-8') | |
| modified_manifest_data = get_modified_manifest(zin, IMAGE_DATA_TO_EMBED) | |
| modified_styles_data = get_modified_styles(zin) # NEW: Get modified styles | |
| for item in file_list: | |
| if item not in files_to_skip: | |
| original_files_data[item] = zin.read(item) | |
| # 1. Modify content.xml (outside the zin context) | |
| modified_xml = content_xml_data | |
| # FIX 1: Add missing namespaces (Same as before) | |
| namespace_injections = ( | |
| ' xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"' | |
| ' xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"' | |
| ' xmlns:xlink="http://www.w3.org/1999/xlink"' | |
| ) | |
| tag_to_find = '<office:document-content' | |
| if namespace_injections not in modified_xml: | |
| tag_start_index = modified_xml.find(tag_to_find) | |
| if tag_start_index != -1: | |
| tag_close_index = modified_xml.find('>', tag_start_index) | |
| if tag_close_index != -1: | |
| modified_xml = ( | |
| modified_xml[:tag_close_index] + | |
| namespace_injections + | |
| modified_xml[tag_close_index:] | |
| ) | |
| # 2. Find and replace placeholders with the image XML frame and adjust row height | |
| image_counter = 1 | |
| current_search_position = 0 # Start searching from the beginning | |
| while True: | |
| # --- FINDERS --- | |
| # Search from the last position to find the next placeholder | |
| start_index = modified_xml.find(IMAGE_PLACEHOLDER_PREFIX, current_search_position) | |
| if start_index == -1: | |
| break | |
| # Locate the start of the cell and row relative to the placeholder | |
| cell_start_tag = modified_xml.rfind('<table:table-cell', 0, start_index) | |
| row_start_index = modified_xml.rfind('<table:table-row', 0, cell_start_tag) | |
| if row_start_index == -1 or cell_start_tag == -1: | |
| print(f"Warning: Could not find required XML tags near index {start_index}. Stopping.") | |
| break | |
| # --- STEP 1: ROW HEIGHT ADJUSTMENT --- | |
| # Find the closing '>' of the <table:table-row> tag | |
| row_tag_close_index = modified_xml.find('>', row_start_index) | |
| style_attribute = f' table:style-name="{ROW_STYLE_NAME}"' | |
| if row_tag_close_index != -1 and ROW_STYLE_NAME not in modified_xml[row_start_index : row_tag_close_index]: | |
| # Insert the style attribute (This changes the length of modified_xml!) | |
| modified_xml = ( | |
| modified_xml[:row_tag_close_index] + | |
| style_attribute + | |
| modified_xml[row_tag_close_index:] | |
| ) | |
| # Recalculate the search position to continue past the modified section | |
| current_search_position = row_tag_close_index + len(style_attribute) | |
| else: | |
| # If no modification was done, just move the search position past the placeholder | |
| current_search_position = start_index + len(IMAGE_PLACEHOLDER_PREFIX) | |
| # --- STEP 2: CELL CONTENT REPLACEMENT (REQUIRES RE-INDEXING) --- | |
| # RE-INDEXING IS CRITICAL HERE: Since the XML string may have been modified, | |
| # we must find the necessary boundaries again, starting from the last known good row tag. | |
| # Find the placeholder again (It might have shifted slightly, but the unique string is there) | |
| # Search from the original row start for safety, but use the unique placeholder | |
| reindexed_start_index = modified_xml.find(IMAGE_PLACEHOLDER_PREFIX, row_start_index) | |
| # Re-find the cell and content boundaries based on the reindexed start | |
| reindexed_cell_start_tag = modified_xml.rfind('<table:table-cell', 0, reindexed_start_index) | |
| # Content starts after the closing '>' of <table:table-cell> | |
| reindexed_cell_content_start = modified_xml.find('>', reindexed_cell_start_tag) + 1 | |
| # Content ends right before the closing </table:table-cell> | |
| reindexed_cell_content_end = modified_xml.find('</table:table-cell>', reindexed_cell_content_start) | |
| # Extract data and generate XML (Same logic as before) | |
| value_end = modified_xml.find('</text:p>', reindexed_start_index) | |
| placeholder_string = modified_xml[reindexed_start_index : value_end] | |
| data_string = placeholder_string[len(IMAGE_PLACEHOLDER_PREFIX):] | |
| parts = data_string.split('|') | |
| img_filename = parts[0] | |
| mime_type = parts[1].split('<')[0].strip() | |
| image_xml = ODS_IMAGE_XML_TEMPLATE.format( | |
| img_name=f"Image{image_counter}", | |
| img_filename=img_filename, | |
| mime_type=mime_type | |
| ) | |
| # Final content replacement: Replace EVERYTHING inside the cell with image_xml | |
| modified_xml = ( | |
| modified_xml[:reindexed_cell_content_start] + # XML up to > of <table:table-cell> | |
| image_xml + # The image frame | |
| modified_xml[reindexed_cell_content_end:] # The XML from </table:table-cell> onwards | |
| ) | |
| image_counter += 1 | |
| # Update the search position to continue from after the cell we just replaced | |
| # This ensures we don't accidentally find the same cell again | |
| current_search_position = reindexed_cell_content_end | |
| print(f"Prepared XML for {image_counter - 1} images.") | |
| print(f"Prepared XML for {image_counter - 1} images and adjusted row height.") | |
| # 3. Create a new compliant ODS archive | |
| with zipfile.ZipFile(temp_zip_path, 'w', zipfile.ZIP_DEFLATED) as zout: | |
| # Write mimetype uncompressed | |
| mimetype_content = b'application/vnd.oasis.opendocument.spreadsheet' | |
| zout.writestr('mimetype', mimetype_content, compress_type=zipfile.ZIP_STORED) | |
| # Write copied files | |
| for item, data in original_files_data.items(): | |
| zout.writestr(item, data) | |
| # Write the fixed manifest.xml | |
| zout.writestr('META-INF/manifest.xml', modified_manifest_data) | |
| # NEW: Write the modified styles.xml | |
| if modified_styles_data: | |
| zout.writestr('styles.xml', modified_styles_data) | |
| # Write the modified content.xml | |
| zout.writestr('content.xml', modified_xml.encode('utf-8')) | |
| # Write the image files to the Pictures/ directory | |
| for filename, binary_data in IMAGE_DATA_TO_EMBED.items(): | |
| zout.writestr(f'Pictures/{filename}', binary_data) | |
| # 4. Replace the original file | |
| os.replace(temp_zip_path, ods_path) | |
| print(f"✅ ODS file rebuilt with image-fitting row height.") | |
| except Exception as e: | |
| print(f"🚨 Error: {e}") | |
| if os.path.exists(temp_zip_path): | |
| os.remove(temp_zip_path) | |
| raise | |
| # --- Main Script --- | |
| def main(): | |
| # 1. Fetch data (omitted for brevity, assume the code from previous step runs here) | |
| shelf_books = fetch_paginated_data('me/shelf') | |
| fav_books = fetch_paginated_data('me/favourites') | |
| all_books_map = {} | |
| for book in shelf_books + fav_books: | |
| book_id = book['id'] | |
| if book_id not in all_books_map: | |
| all_books_map[book_id] = book | |
| all_product_ids = list(all_books_map.keys()) | |
| filtered_product_ids = [ | |
| book_id for book_id, book in all_books_map.items() | |
| if "Respekt" not in book.get('name', '') | |
| ] | |
| print(f"\nBooks to process: {len(filtered_product_ids)}") | |
| relations_map = fetch_book_relations(filtered_product_ids) | |
| # 2. Prepare ODS data: Image column contains Base64 string placeholder | |
| header = [ | |
| "Title", | |
| "Author/Description", | |
| "Cover Image", # This column will hold the Base64 string | |
| "Owns It", | |
| "Is Favorite", | |
| "Percent Listened", | |
| "Finished Listening", | |
| "Price", | |
| "Download Link" | |
| ] | |
| sheet_data = [header] | |
| print(f"\nProcessing books and encoding covers...") | |
| for i, book_id in enumerate(filtered_product_ids): | |
| book = all_books_map[book_id] | |
| relation = relations_map.get(book_id, {}) | |
| # ... (progress calculation) ... | |
| duration_ms = relation.get('duration_in_ms', 0) | |
| progress_ms = relation.get('progress_in_ms', 0) | |
| if duration_ms is None: | |
| duration_ms = 0 | |
| if progress_ms is None: | |
| progress_ms = 0 | |
| percent_listened = 0.0 | |
| finished = "No" | |
| if duration_ms > 0: | |
| percent_listened = (progress_ms / duration_ms) * 100 | |
| if (duration_ms - progress_ms) <= 10000 and relation.get('shelf'): | |
| finished = "Yes" | |
| download_url = f"{BASE_URL}/me/audiobooks/{book.get('id', '')}/download" | |
| # --- Image Preparation --- | |
| image_url = book.get('image_url', '') | |
| # Returns placeholder string (or None) | |
| image_placeholder = download_image_and_prepare(image_url) | |
| image_cell = image_placeholder if image_placeholder else "" | |
| # --- Create the row --- | |
| row = [ | |
| str(book.get('name', '')), | |
| str(book.get('description', '')), | |
| image_cell, # Insert Base64 placeholder string | |
| str("Yes" if relation.get('shelf') else "No"), | |
| str("Yes" if relation.get('favorite') else "No"), | |
| f"{percent_listened:.2f}%", | |
| str(finished), | |
| str(book.get('price', '')), | |
| str(download_url) | |
| ] | |
| sheet_data.append(row) | |
| if (i + 1) % 50 == 0: | |
| print(f" -> Processed {i + 1} of {len(filtered_product_ids)} books...") | |
| # 3. Export to ODS using pyexcel | |
| book_sheet = Sheet(sheet_data) | |
| ods_data_dict = {'Audiobooks': book_sheet} | |
| try: | |
| print(f"\nWriting initial ODS file: {OUTPUT_FILENAME}...") | |
| save_data(OUTPUT_FILENAME, ods_data_dict) | |
| except Exception as e: | |
| print(f"🚨 Error writing initial ODS file: {e}") | |
| return | |
| # 4. Post-process ODS to embed images | |
| embed_images_in_ods(OUTPUT_FILENAME) | |
| print(f"\n🎉 Process complete. Final data with embedded images exported to **{OUTPUT_FILENAME}**.") | |
| if __name__ == "__main__": | |
| # Check for required libraries | |
| try: | |
| main() | |
| except Exception as e: | |
| print(f"An unexpected top-level error occurred: {e}") | |
| print("\n--- STACK TRACE ---") | |
| traceback.print_exc() | |
| print("-------------------") |