Created
June 16, 2021 18:33
-
-
Save harrisoncramer/73e43d8b016e3a506602bea644be5934 to your computer and use it in GitHub Desktop.
Script to download all of my articles from my time at National Journal, using Puppeteer.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import puppeteer from "puppeteer"; | |
import { stories } from "./stories.js"; // A list of links to my stories behind the paywall. | |
import fs from "fs"; | |
import { promisify } from "util"; | |
import { basename } from "path"; | |
import dotenv from "dotenv"; | |
dotenv.config(); // Keep login inside of .env file | |
const writer = promisify(fs.writeFile); | |
async function printPDF({ page, url }) { | |
const fileName = basename(url); | |
console.log(`Saving ${fileName}...`); | |
const pdf = await page.pdf({ format: "A4" }); | |
await writer(`pdfs/${fileName}.pdf`, pdf, { encoding: "utf-8" }); | |
return pdf; | |
} | |
async function initializePuppeteer() { | |
const browser = await puppeteer.launch({ headless: true }); | |
return browser; | |
} | |
async function login({ browser, email, password }) { | |
// Do login... | |
const page = await browser.newPage(); | |
await page.goto("https://www.nationaljournal.com/login", { | |
waitUntil: "networkidle0", | |
}); | |
await page.evaluate((email) => { | |
document.querySelector("input[type=email]").value = email; | |
}, email); | |
await page.evaluate((password) => { | |
document.querySelector("input[type=password]").value = password; | |
}, password); | |
await Promise.all([ | |
page.click("#submit"), | |
page.waitForNavigation({ waitUntil: "networkidle2" }), | |
]); | |
} | |
async function execute() { | |
const browser = await initializePuppeteer(); | |
const password = process.env.password; | |
const email = process.env.email; | |
await login({ browser, password, email }); | |
const page = await browser.newPage(); | |
for await (const story of stories) { | |
await page.goto(story); | |
const url = page.url(); // Required because of redirect... | |
await printPDF({ page, url }); | |
} | |
browser.close(); | |
} | |
execute(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment