Created
March 16, 2025 14:40
-
-
Save Vegasq/2db1f39f19d65efa73528af311a8abb5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This tool downloads PDF from pocketmags.com | |
// | |
// HOWTO: | |
// Open an issue like this: https://pocketmags.com/magazine/reader/1234567?pageNumber=1 | |
// Stick this code into DevTools Console and run. | |
// Downloading JPEGs takes a little time, but jsPDF is waaaaaay too slow. Go make a coffee. | |
// Once it's ready PDF Print preview will pop, save result as a PDF using browsers feature. | |
// | |
async function loadJsPDF() { | |
return new Promise((resolve) => { | |
if (window.jspdf?.jsPDF) { | |
console.log("[jsPDF] Already loaded."); | |
resolve(); | |
return; | |
} | |
console.log("[jsPDF] Loading..."); | |
const script = document.createElement("script"); | |
script.src = "https://cdnjs.cloudflare.com/ajax/libs/jspdf/2.5.1/jspdf.umd.min.js"; | |
script.onload = () => { | |
if (window.jspdf?.jsPDF) { | |
console.log("[jsPDF] Loaded successfully!"); | |
resolve(); | |
} else { | |
console.error("[jsPDF] Failed to load!"); | |
} | |
}; | |
document.head.appendChild(script); | |
}); | |
} | |
async function generatePDF(images) { | |
await loadJsPDF(); // Ensure jsPDF is loaded before proceeding | |
const { jsPDF } = window.jspdf; // Correct way to access jsPDF from UMD build | |
console.log(`[PDF] Generating PDF with ${images.length} images...`); | |
if (images.length === 0) { | |
console.warn("[PDF] No images found, skipping PDF generation."); | |
return; | |
} | |
const pdf = new jsPDF(); | |
for (let i = 0; i < images.length; i++) { | |
const imgBlob = images[i].blob; | |
const imgData = await blobToBase64(imgBlob); | |
console.log(`[PDF] Processing image ${i + 1}/${images.length}`); | |
const img = new Image(); | |
img.src = imgData; | |
await new Promise((resolve) => (img.onload = resolve)); | |
const imgWidth = 210; // A4 width in mm | |
const imgHeight = (img.height / img.width) * imgWidth; | |
if (i !== 0) { | |
pdf.addPage(); | |
} | |
pdf.addImage(imgData, "JPEG", 0, 0, imgWidth, imgHeight); | |
} | |
console.log("[PDF] Saving PDF as combined.pdf"); | |
pdf.save("combined.pdf"); | |
} | |
var known = []; | |
async function fetchImageBlob(url) { | |
try { | |
console.log(`[Fetch] Downloading: ${url}`); | |
const response = await fetch(url); | |
const blob = await response.blob(); | |
console.log(`[Fetch] Successfully downloaded: ${url}`); | |
return blob; | |
} catch (error) { | |
console.error(`[Fetch] Error fetching image: ${url}`, error); | |
return null; | |
} | |
} | |
async function scrapeAndCombinePDF() { | |
await loadJsPDF(); // Ensure jsPDF is loaded before processing pages | |
let pageNum = 0; | |
let images = []; | |
console.log("[Scraper] Starting page scan..."); | |
while (document.getElementById("right_navigation")?.style.display !== "none") { | |
console.log("[Scraper] Checking current page..."); | |
const nextPageButton = document.getElementById("btnNextPage"); | |
if (!nextPageButton) { | |
console.warn("[Scraper] Next page button not found. Stopping."); | |
break; | |
} | |
const pdfViewers = document.getElementsByClassName("pdfViewer"); | |
for (let item of pdfViewers) { | |
let urls = item.style.backgroundImage; | |
if (!urls || urls.length === 0) { | |
console.warn("[Scraper] No background images found."); | |
continue; | |
} | |
let parts = urls.split('"'); | |
for (let part of parts) { | |
if (part.includes("transparent.png")) { | |
console.log(`[Scraper] Skipping: ${part}`); | |
continue; | |
} | |
if (part.includes("https://")) { | |
if (known.includes(part)) { | |
console.log(`[Scraper] Skipping (already processed): ${part}`); | |
continue; | |
} | |
known.push(part); | |
console.log(`[Scraper] Fetching new image: ${part}`); | |
const blob = await fetchImageBlob(part); | |
if (blob) { | |
images.push({ blob, pageNum }); | |
console.log(`[Scraper] Image added to processing queue. PageNum: ${pageNum}`); | |
pageNum++; | |
} | |
} | |
} | |
} | |
nextPageButton.click(); | |
console.log("[Scraper] Going to next page..."); | |
console.log("[Scraper] Sleeping for 1 second..."); | |
await new Promise(resolve => setTimeout(resolve, 1000)); | |
console.log("[Scraper] Waking up."); | |
} | |
console.log(`[Scraper] Finished scanning. Total images found: ${images.length}`); | |
if (images.length > 0) { | |
console.log("[Scraper] Generating PDF..."); | |
generatePDF(images); | |
} else { | |
console.warn("[Scraper] No images found to generate PDF."); | |
} | |
} | |
function blobToBase64(blob) { | |
return new Promise((resolve, reject) => { | |
const reader = new FileReader(); | |
reader.onloadend = () => resolve(reader.result); | |
reader.onerror = reject; | |
reader.readAsDataURL(blob); | |
}); | |
} | |
// Start script **ONLY AFTER** jsPDF is fully loaded | |
loadJsPDF().then(() => { | |
console.log("[Main] Starting PDF processing..."); | |
scrapeAndCombinePDF(); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment