Skip to content

Instantly share code, notes, and snippets.

@apetenchea
Last active July 22, 2025 19:10
Show Gist options
  • Save apetenchea/4df556a49f9a2543be877c31355b4164 to your computer and use it in GitHub Desktop.
Save apetenchea/4df556a49f9a2543be877c31355b4164 to your computer and use it in GitHub Desktop.
Download any manuals from https://www.manua.ls
# This script gathers all the pages of a manual and merges them into a PDF.
# You'll need to play a bit with inspect-element in order to figure out the format the correct url,
# but it should be easy to adapt it to any manual.
# This script is specifically for https://www.manua.ls/audi/q3-2018/manual.
# Their url format is https://www.manua.ls/viewer/{manual-id}/{page-number}/bg{page-number-hex}.png
# Example: https://www.manua.ls/viewer/668006/100/bg64.png
# Enjoy!
import requests
from tqdm import tqdm
from PIL import Image
from io import BytesIO
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from reportlab.lib.utils import ImageReader
def download_image(url):
response = requests.get(url)
if response.status_code == 200:
return Image.open(BytesIO(response.content))
else:
print(f"Failed to download {url}")
return None
def save_images_as_pdf(images, pdf_filename):
c = canvas.Canvas(pdf_filename, pagesize=letter)
width, height = letter
for image in images:
image_width, image_height = image.size
aspect_ratio = image_width / image_height
new_width = width
new_height = width / aspect_ratio
if new_height > height:
new_height = height
new_width = height * aspect_ratio
# Convert PIL image to byte stream
img_byte_arr = BytesIO()
image.save(img_byte_arr, format='PNG')
img_byte_arr.seek(0)
# Draw image from byte stream
c.drawImage(ImageReader(img_byte_arr), 0, height - new_height, width=new_width, height=new_height)
c.showPage()
c.save()
def main():
base_url = "https://www.manua.ls/viewer/668006/"
images = []
for i in tqdm(range(1, 231)): # Adjust the range as needed
url = f"{base_url}{i}/bg{hex(i)[2:]}.png"
image = download_image(url)
if image:
images.append(image)
if images:
save_images_as_pdf(images, "output.pdf")
print("PDF created successfully")
else:
print("No images downloaded")
if __name__ == "__main__":
main()
# Use this script for webp manuals
# example: https://www.manua.ls/growatt/min-3000-11400tl-xh-us/manual?p=1
# pip install selenium webdriver-manager pillow tqdm
# By default Firefox is used, but it's easy to adapt to chrome, see below
"""
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
options = Options()
options.headless = True
options.add_argument("--window-size=1200,1600")
driver = webdriver.Chrome(options=options)
"""
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
from webdriver_manager.firefox import GeckoDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from PIL import Image
from tqdm import tqdm
import io
import time
def get_screenshot(driver, url, consent):
driver.get(url)
# Wait for consent and give time for JS to load elements
if consent:
try:
consent_button = WebDriverWait(driver, 3).until(
EC.element_to_be_clickable((By.XPATH, '//button[@aria-label="Consent"]'))
)
consent_button.click()
except:
pass
else:
time.sleep(1)
viewer_div = driver.find_element(By.ID, "viewer")
# Save screenshot of just one element
png = viewer_div.screenshot_as_png
# Optional, intermediary save step
# viewer_div.screenshot(f"{url[-1]}.png")
return Image.open(io.BytesIO(png))
def main():
options = Options()
options.headless = True
options.set_preference("layout.css.devPixelsPerPx", "1.5")
driver = webdriver.Firefox(options=options)
base_url = "https://www.manua.ls"
images = []
try:
for i in tqdm(range(1, 82)): # number of pages 81
url = f"{base_url}/growatt/min-3000-11400tl-xh-us/manual?p={i}" # manual name may differ
img = get_screenshot(driver, url, consent=(i == 1))
if img:
images.append(img)
finally:
driver.quit()
if images:
images[0].save("output.pdf", save_all=True, append_images=images[1:])
print("PDF created successfully")
else:
print("No screenshots taken")
driver.quit()
if __name__ == "__main__":
main()
@gohamstergo
Copy link

thanks, ill mess with that

@gohamstergo
Copy link

gohamstergo commented Jul 22, 2025

this is great. i made two changes:

viewer_div = driver.find_element(By.CLASS_NAME, "viewer-page")
doing it this way removes the viewer UI (the arrows still show. i know selenium can hide elements, but this is good enough for what i need)
and
options.add_argument("--headless")
the other headless method works for chrome but not FF.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment