Skip to content

Instantly share code, notes, and snippets.

@peci1
Created November 29, 2025 03:25
Show Gist options
  • Select an option

  • Save peci1/db0624db0f1afed74f1a8935adb4ae85 to your computer and use it in GitHub Desktop.

Select an option

Save peci1/db0624db0f1afed74f1a8935adb4ae85 to your computer and use it in GitHub Desktop.
Export data about your shelf and favourite audiobooks from Audioteka.com.

Heavy vibe-coding warning, but it works for me!

Just set API_TOKEN env var to the value of cookie api_token from audioteka.com in you browser after logging in. The token is valid for ~20 minutes, after that, you need a new one.

#!/usr/bin/env python3
import uuid
import requests
import json
import math
import traceback
import zipfile
import tempfile
import os
import base64
from io import BytesIO
from typing import List, Dict, Any, Union
# Libraries for ODS output and Image handling
from pyexcel_ods import save_data
from pyexcel import Sheet
from PIL import Image
# --- Configuration ---
# REPLACE WITH YOUR ACTUAL API_TOKEN
API_TOKEN = os.environ["API_TOKEN"]
BASE_URL = "https://audioteka.com/cz/v2"
HEADERS = {
'Cookie': f'api_token={API_TOKEN}',
'Accept': 'application/json',
}
LIMIT = 500 # Max limit for shelf/favourites endpoint
OUTPUT_FILENAME = 'books_data.ods'
# Unique placeholder to mark the cell content that needs XML replacement
IMAGE_PLACEHOLDER_PREFIX = "BINARY_IMAGE_DATA:"
# --- ODS XML Templates ---
# The template provided by the user, slightly adapted for use as a Python format string.
# We'll use a unique ID and the file path.
ODS_IMAGE_XML_TEMPLATE = """
<draw:frame draw:z-index="0" draw:name="{img_name}" draw:style-name="gr1" draw:text-style-name="P1"
svg:width="1.5in" svg:height="1.5in" svg:x="0cm" svg:y="0cm">
<draw:image xlink:href="Pictures/{img_filename}" xlink:type="simple"
xlink:show="embed" xlink:actuate="onLoad" loext:mime-type="{mime_type}">
<text:p/>
</draw:image>
</draw:frame>
"""
IMAGE_DATA_TO_EMBED = {}
# Note: Complex ODS files often require definitions in 'styles.xml' and 'meta.xml' for styles like 'P2' and 'fr1'.
# Since pyexcel-ods creates these, we rely on them existing. We will use 'image/png' as the MIME type.
# --- Helper Functions ---
def fetch_paginated_data(endpoint: str, limit: int = LIMIT) -> List[Dict[str, Any]]:
# ... (function remains the same)
all_books = []
page = 1
total_pages = 1
print(f"Fetching data from: {endpoint}...")
while page <= total_pages:
url = f'{BASE_URL}/{endpoint}?page={page}&limit={limit}&sort=added_at&order=desc'
try:
with requests.Session() as session:
response = session.get(url, headers=HEADERS)
response.raise_for_status()
data = response.json()
except requests.exceptions.RequestException as e:
print(f"Error fetching page {page} from {endpoint}: {e}")
break
books = data.get('_embedded', {}).get('app:product', [])
all_books.extend(books)
total_pages = data.get('pages', 1)
print(f" -> Fetched page {page}/{total_pages} ({len(books)} books).")
page += 1
return all_books
def fetch_book_relations(product_ids: List[str]) -> Dict[str, Dict[str, Any]]:
# ... (function remains the same)
relations_map = {}
BATCH_SIZE = 30
print(f"\nFetching relations for {len(product_ids)} books in batches of {BATCH_SIZE}...")
for i in range(0, len(product_ids), BATCH_SIZE):
batch_ids = product_ids[i:i + BATCH_SIZE]
payload = {
"name": "GetProductRelations",
"product_ids": batch_ids
}
try:
with requests.Session() as session:
response = session.post(f'{BASE_URL}/commands', headers=HEADERS, data=json.dumps(payload))
response.raise_for_status()
relations_data = response.json()
except requests.exceptions.RequestException as e:
print(f"Error fetching relations batch {i//BATCH_SIZE + 1}: {e}")
continue
relations = relations_data.get('relations', [])
for rel in relations:
relations_map[rel['product_id']] = rel
print(f"✅ Relations fetch complete. Total relations: {len(relations_map)}")
return relations_map
def download_image_and_prepare(url: str, max_size: int = 150) -> Union[str, None]:
"""Downloads an image, resizes it, saves it to an in-memory buffer as PNG, and stores it globally."""
response = requests.get(url, timeout=10)
response.raise_for_status()
img = Image.open(BytesIO(response.content))
width, height = img.size
if not isinstance(width, int) or not isinstance(height, int) or width <= 0 or height <= 0:
return None
# Resize logic
if width > height:
new_width = max_size
new_height = int(height * (max_size / width))
else:
new_height = max_size
new_width = int(width * (max_size / height))
img = img.resize((new_width, new_height))
# Save to buffer as PNG
buffer = BytesIO()
img.save(buffer, format="PNG")
binary_data = buffer.getvalue()
mime_type = "image/png"
# Generate unique filename (UUID) for the ODS archive
unique_id = str(uuid.uuid4())
filename = f"{unique_id}.png"
# Store the binary data globally for the final ZIP write step
IMAGE_DATA_TO_EMBED[filename] = binary_data
# Return a placeholder string containing the filename and MIME type
# We separate with '|' for easier parsing in the embedding step
return f"{IMAGE_PLACEHOLDER_PREFIX}{filename}|{mime_type}"
# New style definition for row height
ROW_STYLE_NAME = "Row_Image_Height"
ROW_STYLE_XML_TEMPLATE = """
<style:style style:name="{style_name}" style:family="table-row">
<style:row-properties fo:min-height="2.5in" fo:height="2.5in"/>
</style:style>"""
# =====================================================================
## ⚙️ ODS Style Modification
# =====================================================================
def get_modified_styles(zin: zipfile.ZipFile) -> bytes:
"""Reads styles.xml and injects the custom row height style."""
try:
styles_xml = zin.read('styles.xml').decode('utf-8')
except KeyError:
print("Warning: styles.xml not found. Aborting style modification.")
return b''
new_style_xml = ROW_STYLE_XML_TEMPLATE.format(style_name=ROW_STYLE_NAME)
# Find the style insertion point: often right before the closing </office:automatic-styles> tag
insertion_point_tag = '</office:automatic-styles>'
insertion_point = styles_xml.rfind(insertion_point_tag)
if insertion_point != -1:
# Insert the new style XML just before the closing tag
styles_xml = styles_xml[:insertion_point] + new_style_xml + styles_xml[insertion_point:]
return styles_xml.encode('utf-8')
MANIFEST_ENTRY_TEMPLATE = '<manifest:file-entry manifest:media-type="{mime_type}" manifest:full-path="{full_path}"/>'
MANIFEST_VERSION_ATTR = ' manifest:version="1.2"' # Needed for manifest:manifest tag
def get_modified_manifest(zin: zipfile.ZipFile, image_data_to_embed: Dict[str, bytes]) -> bytes:
"""Reads manifest.xml, adds file entries for all new images, and fixes the version attribute."""
try:
manifest_xml = zin.read('META-INF/manifest.xml').decode('utf-8')
except KeyError:
# Fallback if manifest.xml is somehow missing in the pyexcel output
print("Warning: META-INF/manifest.xml not found in archive.")
return b''
new_entries = []
# Create entries for new image files
for filename in image_data_to_embed.keys():
new_entries.append(
MANIFEST_ENTRY_TEMPLATE.format(
mime_type='image/png',
full_path=f"Pictures/{filename}"
)
)
new_entries_xml = "".join(new_entries)
# 1. Insert new entries before the closing tag
close_tag = '</manifest:manifest>'
insertion_point = manifest_xml.rfind(close_tag)
if insertion_point != -1:
manifest_xml = manifest_xml[:insertion_point] + new_entries_xml + manifest_xml[insertion_point:]
# 2. Fix missing version attribute error
manifest_tag = '<manifest:manifest'
if manifest_xml.find(MANIFEST_VERSION_ATTR) == -1:
# Insert version attribute after the manifest tag
manifest_xml = manifest_xml.replace(manifest_tag, manifest_tag + MANIFEST_VERSION_ATTR, 1)
return manifest_xml.encode('utf-8')
# =====================================================================
## 📝 ODS Post-Processing and Embedding
# =====================================================================
def embed_images_in_ods(ods_path: str):
"""
Edits the content.xml and adds image files to the ODS archive to achieve ODF compliance
and adjust row height.
"""
if not IMAGE_DATA_TO_EMBED:
print("No data to embed or modify.")
return
print(f"\nEditing ODS archive: {ods_path} to embed images and adjust row height...")
temp_zip_path = ods_path + '.temp'
# Data containers for all needed content
content_xml_data = None
modified_manifest_data = None
modified_styles_data = None # NEW container for styles
original_files_data = {}
try:
# --- PHASE 1: READ ALL DATA FROM THE ORIGINAL ZIP (zin) ---
with zipfile.ZipFile(ods_path, 'r') as zin:
file_list = zin.namelist()
files_to_skip = {'content.xml', 'mimetype', 'META-INF/manifest.xml', 'styles.xml'}
content_xml_data = zin.read('content.xml').decode('utf-8')
modified_manifest_data = get_modified_manifest(zin, IMAGE_DATA_TO_EMBED)
modified_styles_data = get_modified_styles(zin) # NEW: Get modified styles
for item in file_list:
if item not in files_to_skip:
original_files_data[item] = zin.read(item)
# 1. Modify content.xml (outside the zin context)
modified_xml = content_xml_data
# FIX 1: Add missing namespaces (Same as before)
namespace_injections = (
' xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"'
' xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"'
' xmlns:xlink="http://www.w3.org/1999/xlink"'
)
tag_to_find = '<office:document-content'
if namespace_injections not in modified_xml:
tag_start_index = modified_xml.find(tag_to_find)
if tag_start_index != -1:
tag_close_index = modified_xml.find('>', tag_start_index)
if tag_close_index != -1:
modified_xml = (
modified_xml[:tag_close_index] +
namespace_injections +
modified_xml[tag_close_index:]
)
# 2. Find and replace placeholders with the image XML frame and adjust row height
image_counter = 1
current_search_position = 0 # Start searching from the beginning
while True:
# --- FINDERS ---
# Search from the last position to find the next placeholder
start_index = modified_xml.find(IMAGE_PLACEHOLDER_PREFIX, current_search_position)
if start_index == -1:
break
# Locate the start of the cell and row relative to the placeholder
cell_start_tag = modified_xml.rfind('<table:table-cell', 0, start_index)
row_start_index = modified_xml.rfind('<table:table-row', 0, cell_start_tag)
if row_start_index == -1 or cell_start_tag == -1:
print(f"Warning: Could not find required XML tags near index {start_index}. Stopping.")
break
# --- STEP 1: ROW HEIGHT ADJUSTMENT ---
# Find the closing '>' of the <table:table-row> tag
row_tag_close_index = modified_xml.find('>', row_start_index)
style_attribute = f' table:style-name="{ROW_STYLE_NAME}"'
if row_tag_close_index != -1 and ROW_STYLE_NAME not in modified_xml[row_start_index : row_tag_close_index]:
# Insert the style attribute (This changes the length of modified_xml!)
modified_xml = (
modified_xml[:row_tag_close_index] +
style_attribute +
modified_xml[row_tag_close_index:]
)
# Recalculate the search position to continue past the modified section
current_search_position = row_tag_close_index + len(style_attribute)
else:
# If no modification was done, just move the search position past the placeholder
current_search_position = start_index + len(IMAGE_PLACEHOLDER_PREFIX)
# --- STEP 2: CELL CONTENT REPLACEMENT (REQUIRES RE-INDEXING) ---
# RE-INDEXING IS CRITICAL HERE: Since the XML string may have been modified,
# we must find the necessary boundaries again, starting from the last known good row tag.
# Find the placeholder again (It might have shifted slightly, but the unique string is there)
# Search from the original row start for safety, but use the unique placeholder
reindexed_start_index = modified_xml.find(IMAGE_PLACEHOLDER_PREFIX, row_start_index)
# Re-find the cell and content boundaries based on the reindexed start
reindexed_cell_start_tag = modified_xml.rfind('<table:table-cell', 0, reindexed_start_index)
# Content starts after the closing '>' of <table:table-cell>
reindexed_cell_content_start = modified_xml.find('>', reindexed_cell_start_tag) + 1
# Content ends right before the closing </table:table-cell>
reindexed_cell_content_end = modified_xml.find('</table:table-cell>', reindexed_cell_content_start)
# Extract data and generate XML (Same logic as before)
value_end = modified_xml.find('</text:p>', reindexed_start_index)
placeholder_string = modified_xml[reindexed_start_index : value_end]
data_string = placeholder_string[len(IMAGE_PLACEHOLDER_PREFIX):]
parts = data_string.split('|')
img_filename = parts[0]
mime_type = parts[1].split('<')[0].strip()
image_xml = ODS_IMAGE_XML_TEMPLATE.format(
img_name=f"Image{image_counter}",
img_filename=img_filename,
mime_type=mime_type
)
# Final content replacement: Replace EVERYTHING inside the cell with image_xml
modified_xml = (
modified_xml[:reindexed_cell_content_start] + # XML up to > of <table:table-cell>
image_xml + # The image frame
modified_xml[reindexed_cell_content_end:] # The XML from </table:table-cell> onwards
)
image_counter += 1
# Update the search position to continue from after the cell we just replaced
# This ensures we don't accidentally find the same cell again
current_search_position = reindexed_cell_content_end
print(f"Prepared XML for {image_counter - 1} images.")
print(f"Prepared XML for {image_counter - 1} images and adjusted row height.")
# 3. Create a new compliant ODS archive
with zipfile.ZipFile(temp_zip_path, 'w', zipfile.ZIP_DEFLATED) as zout:
# Write mimetype uncompressed
mimetype_content = b'application/vnd.oasis.opendocument.spreadsheet'
zout.writestr('mimetype', mimetype_content, compress_type=zipfile.ZIP_STORED)
# Write copied files
for item, data in original_files_data.items():
zout.writestr(item, data)
# Write the fixed manifest.xml
zout.writestr('META-INF/manifest.xml', modified_manifest_data)
# NEW: Write the modified styles.xml
if modified_styles_data:
zout.writestr('styles.xml', modified_styles_data)
# Write the modified content.xml
zout.writestr('content.xml', modified_xml.encode('utf-8'))
# Write the image files to the Pictures/ directory
for filename, binary_data in IMAGE_DATA_TO_EMBED.items():
zout.writestr(f'Pictures/{filename}', binary_data)
# 4. Replace the original file
os.replace(temp_zip_path, ods_path)
print(f"✅ ODS file rebuilt with image-fitting row height.")
except Exception as e:
print(f"🚨 Error: {e}")
if os.path.exists(temp_zip_path):
os.remove(temp_zip_path)
raise
# --- Main Script ---
def main():
# 1. Fetch data (omitted for brevity, assume the code from previous step runs here)
shelf_books = fetch_paginated_data('me/shelf')
fav_books = fetch_paginated_data('me/favourites')
all_books_map = {}
for book in shelf_books + fav_books:
book_id = book['id']
if book_id not in all_books_map:
all_books_map[book_id] = book
all_product_ids = list(all_books_map.keys())
filtered_product_ids = [
book_id for book_id, book in all_books_map.items()
if "Respekt" not in book.get('name', '')
]
print(f"\nBooks to process: {len(filtered_product_ids)}")
relations_map = fetch_book_relations(filtered_product_ids)
# 2. Prepare ODS data: Image column contains Base64 string placeholder
header = [
"Title",
"Author/Description",
"Cover Image", # This column will hold the Base64 string
"Owns It",
"Is Favorite",
"Percent Listened",
"Finished Listening",
"Price",
"Download Link"
]
sheet_data = [header]
print(f"\nProcessing books and encoding covers...")
for i, book_id in enumerate(filtered_product_ids):
book = all_books_map[book_id]
relation = relations_map.get(book_id, {})
# ... (progress calculation) ...
duration_ms = relation.get('duration_in_ms', 0)
progress_ms = relation.get('progress_in_ms', 0)
if duration_ms is None:
duration_ms = 0
if progress_ms is None:
progress_ms = 0
percent_listened = 0.0
finished = "No"
if duration_ms > 0:
percent_listened = (progress_ms / duration_ms) * 100
if (duration_ms - progress_ms) <= 10000 and relation.get('shelf'):
finished = "Yes"
download_url = f"{BASE_URL}/me/audiobooks/{book.get('id', '')}/download"
# --- Image Preparation ---
image_url = book.get('image_url', '')
# Returns placeholder string (or None)
image_placeholder = download_image_and_prepare(image_url)
image_cell = image_placeholder if image_placeholder else ""
# --- Create the row ---
row = [
str(book.get('name', '')),
str(book.get('description', '')),
image_cell, # Insert Base64 placeholder string
str("Yes" if relation.get('shelf') else "No"),
str("Yes" if relation.get('favorite') else "No"),
f"{percent_listened:.2f}%",
str(finished),
str(book.get('price', '')),
str(download_url)
]
sheet_data.append(row)
if (i + 1) % 50 == 0:
print(f" -> Processed {i + 1} of {len(filtered_product_ids)} books...")
# 3. Export to ODS using pyexcel
book_sheet = Sheet(sheet_data)
ods_data_dict = {'Audiobooks': book_sheet}
try:
print(f"\nWriting initial ODS file: {OUTPUT_FILENAME}...")
save_data(OUTPUT_FILENAME, ods_data_dict)
except Exception as e:
print(f"🚨 Error writing initial ODS file: {e}")
return
# 4. Post-process ODS to embed images
embed_images_in_ods(OUTPUT_FILENAME)
print(f"\n🎉 Process complete. Final data with embedded images exported to **{OUTPUT_FILENAME}**.")
if __name__ == "__main__":
# Check for required libraries
try:
main()
except Exception as e:
print(f"An unexpected top-level error occurred: {e}")
print("\n--- STACK TRACE ---")
traceback.print_exc()
print("-------------------")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment