Skip to content

Instantly share code, notes, and snippets.

@limitedeternity
Last active March 20, 2025 17:50
Show Gist options
  • Save limitedeternity/f3c832938852dfcba9b76a60e1b0ccfc to your computer and use it in GitHub Desktop.
Save limitedeternity/f3c832938852dfcba9b76a60e1b0ccfc to your computer and use it in GitHub Desktop.
A Scribd-Downloader that actually works
const path = require("path");
const fs = require("fs");
const puppeteer = require("puppeteer-extra").use(require("puppeteer-extra-plugin-stealth")());
const imagesToPdf = require("images-to-pdf");
(async () => {
const browser = await puppeteer.launch({
userDataDir: path.join(process.cwd(), "tmp"),
ignoreHTTPSErrors: true,
headless: false,
slowMo: 0,
args: [
"--no-sandbox",
"--disable-dev-shm-usage",
'--proxy-server="direct://"',
"--proxy-bypass-list=*"
]
});
const page = await browser.newPage();
await page.emulateMediaType("screen");
console.log("Log in");
await page.goto("https://www.scribd.com", { waitUntil: "domcontentloaded" });
await page.waitForSelector(".logged_in", { timeout: 0 });
// npm start "https://www.scribd.com/read/414178613/How-to-Prepare-for-the-Biology-Olympiad-and-Science-Competitions"
await page.goto(process.argv[2], { waitUntil: "networkidle2" });
console.log("Navigate to the first page");
const pageCounterHandle = await page.$("div.page_counter");
while ((await pageCounterHandle.evaluate(node => node.innerText)).match("PAGE (\\d+) OF (\\d+)")[1] !== "1") {
await new Promise(resolve => setTimeout(resolve, 200));
}
await pageCounterHandle.dispose();
let shotCounter = 1;
const readerHandle = await page.$("#column_container");
const documentId = await page.evaluate(() => window.Scribd.current_doc.id);
const outputDir = path.join(process.cwd(), String(documentId));
if (!fs.existsSync(outputDir)) fs.mkdirSync(outputDir);
while (await page.evaluate(() => document.querySelector(".epub_banner").getAttribute("aria-hidden") === "true")) {
while (await page.evaluate(() => document.querySelector(".loading_page").style.display !== "none") ||
await page.evaluate(() => document.querySelector(".loading_page").style.opacity !== "1")) {
await new Promise(resolve => setTimeout(resolve, 200));
}
await readerHandle.screenshot({
path: path.join(outputDir, String(shotCounter) + ".png")
});
await page.evaluate(() => document.querySelector("a.page_arrow_link:nth-child(3)").click());
shotCounter++;
}
while (await page.evaluate(() => document.querySelector(".loading_page").style.display !== "none") ||
await page.evaluate(() => document.querySelector(".loading_page").style.opacity !== "1")) {
await new Promise(resolve => setTimeout(resolve, 200));
}
await readerHandle.screenshot({
path: path.join(outputDir, String(shotCounter) + ".png")
});
await readerHandle.dispose();
await page.close();
await browser.close();
console.log("Done. Rendering PDF...");
await imagesToPdf(
[...Array(shotCounter).keys()].map(x => x + 1).map(cnt => path.join(outputDir, String(cnt) + ".png")),
path.join(outputDir, String(documentId) + ".pdf")
);
})();
{
"name": "Scribd-Downloader",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"start": "node index.js"
},
"keywords": [],
"author": "@limitedeternity",
"license": "MIT",
"engines": {
"node": ">=12.2.0 <14",
"npm": "7.8.0"
},
"dependencies": {
"images-to-pdf": "^1.0.3",
"puppeteer": "^8.0.0",
"puppeteer-extra": "^3.1.18",
"puppeteer-extra-plugin-stealth": "^2.7.6"
}
}
@diertian
Copy link

diertian commented May 1, 2023

Hello, thanks for sharing this. Could you please convert it to a Greasy Fork user script and post it on greasyfork.org?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment