Skip to content

Instantly share code, notes, and snippets.

@Vegasq
Created March 16, 2025 14:40
Show Gist options
  • Save Vegasq/2db1f39f19d65efa73528af311a8abb5 to your computer and use it in GitHub Desktop.
Save Vegasq/2db1f39f19d65efa73528af311a8abb5 to your computer and use it in GitHub Desktop.
// This tool downloads PDF from pocketmags.com
//
// HOWTO:
// Open an issue like this: https://pocketmags.com/magazine/reader/1234567?pageNumber=1
// Stick this code into DevTools Console and run.
// Downloading JPEGs takes a little time, but jsPDF is waaaaaay too slow. Go make a coffee.
// Once it's ready PDF Print preview will pop, save result as a PDF using browsers feature.
//
async function loadJsPDF() {
return new Promise((resolve) => {
if (window.jspdf?.jsPDF) {
console.log("[jsPDF] Already loaded.");
resolve();
return;
}
console.log("[jsPDF] Loading...");
const script = document.createElement("script");
script.src = "https://cdnjs.cloudflare.com/ajax/libs/jspdf/2.5.1/jspdf.umd.min.js";
script.onload = () => {
if (window.jspdf?.jsPDF) {
console.log("[jsPDF] Loaded successfully!");
resolve();
} else {
console.error("[jsPDF] Failed to load!");
}
};
document.head.appendChild(script);
});
}
async function generatePDF(images) {
await loadJsPDF(); // Ensure jsPDF is loaded before proceeding
const { jsPDF } = window.jspdf; // Correct way to access jsPDF from UMD build
console.log(`[PDF] Generating PDF with ${images.length} images...`);
if (images.length === 0) {
console.warn("[PDF] No images found, skipping PDF generation.");
return;
}
const pdf = new jsPDF();
for (let i = 0; i < images.length; i++) {
const imgBlob = images[i].blob;
const imgData = await blobToBase64(imgBlob);
console.log(`[PDF] Processing image ${i + 1}/${images.length}`);
const img = new Image();
img.src = imgData;
await new Promise((resolve) => (img.onload = resolve));
const imgWidth = 210; // A4 width in mm
const imgHeight = (img.height / img.width) * imgWidth;
if (i !== 0) {
pdf.addPage();
}
pdf.addImage(imgData, "JPEG", 0, 0, imgWidth, imgHeight);
}
console.log("[PDF] Saving PDF as combined.pdf");
pdf.save("combined.pdf");
}
var known = [];
async function fetchImageBlob(url) {
try {
console.log(`[Fetch] Downloading: ${url}`);
const response = await fetch(url);
const blob = await response.blob();
console.log(`[Fetch] Successfully downloaded: ${url}`);
return blob;
} catch (error) {
console.error(`[Fetch] Error fetching image: ${url}`, error);
return null;
}
}
async function scrapeAndCombinePDF() {
await loadJsPDF(); // Ensure jsPDF is loaded before processing pages
let pageNum = 0;
let images = [];
console.log("[Scraper] Starting page scan...");
while (document.getElementById("right_navigation")?.style.display !== "none") {
console.log("[Scraper] Checking current page...");
const nextPageButton = document.getElementById("btnNextPage");
if (!nextPageButton) {
console.warn("[Scraper] Next page button not found. Stopping.");
break;
}
const pdfViewers = document.getElementsByClassName("pdfViewer");
for (let item of pdfViewers) {
let urls = item.style.backgroundImage;
if (!urls || urls.length === 0) {
console.warn("[Scraper] No background images found.");
continue;
}
let parts = urls.split('"');
for (let part of parts) {
if (part.includes("transparent.png")) {
console.log(`[Scraper] Skipping: ${part}`);
continue;
}
if (part.includes("https://")) {
if (known.includes(part)) {
console.log(`[Scraper] Skipping (already processed): ${part}`);
continue;
}
known.push(part);
console.log(`[Scraper] Fetching new image: ${part}`);
const blob = await fetchImageBlob(part);
if (blob) {
images.push({ blob, pageNum });
console.log(`[Scraper] Image added to processing queue. PageNum: ${pageNum}`);
pageNum++;
}
}
}
}
nextPageButton.click();
console.log("[Scraper] Going to next page...");
console.log("[Scraper] Sleeping for 1 second...");
await new Promise(resolve => setTimeout(resolve, 1000));
console.log("[Scraper] Waking up.");
}
console.log(`[Scraper] Finished scanning. Total images found: ${images.length}`);
if (images.length > 0) {
console.log("[Scraper] Generating PDF...");
generatePDF(images);
} else {
console.warn("[Scraper] No images found to generate PDF.");
}
}
function blobToBase64(blob) {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onloadend = () => resolve(reader.result);
reader.onerror = reject;
reader.readAsDataURL(blob);
});
}
// Start script **ONLY AFTER** jsPDF is fully loaded
loadJsPDF().then(() => {
console.log("[Main] Starting PDF processing...");
scrapeAndCombinePDF();
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment