Last active
January 25, 2022 22:04
-
-
Save EBendinelli/7c51e84cfae267183013a1d8bbe11c6d to your computer and use it in GitHub Desktop.
Puppeteer script to download latest edition of Le Monde Diplomatique (epub)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Requires npm, puppeteer, chromium | |
| // One line installer (debian): ```sudo apt install chromium-browser npm && npm i puppeteer-core``` | |
| // launch with ```node mondeDiploGrabber.js``` | |
| // Files will be put in the download folder | |
| // tested on raspbian with node v12.18^ | |
| const puppeteer = require('puppeteer'); | |
| (async () => { | |
| //starting puppeteer, user agent and window-size are mandatory in headless mode otherwise the download links aren't populated on the page | |
| const browser = await puppeteer.launch({ executablePath: 'chromium-browser', | |
| headless: true, | |
| args: [ | |
| '--window-size=1920,1080', | |
| '--user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"' | |
| ] | |
| }); | |
| const page = await browser.newPage(); | |
| // visit monde-diplomatique and login | |
| await page.goto('https://www.monde-diplomatique.fr/'); | |
| await page.click('#session_connexion'); | |
| await page.waitForSelector('input[name=mot_de_passe]'); | |
| await page.type('#identification_sso > ul > li > input[name=email]', 'youremail@provider.com'); | |
| await page.type('#identification_sso > ul > li > input[name=mot_de_passe]', 'password'); | |
| await page.click('#identification_sso > ul > li > input[type=submit]'); | |
| await page.waitForNavigation(); | |
| console.log("login successful") | |
| //Go to download page and click download button | |
| await page.goto('https://www.monde-diplomatique.fr/telechargements/'); | |
| await page.waitForSelector('.epub > .bouton_telecharger'); | |
| await page._client.send('Page.setDownloadBehavior', { behavior: 'allow', downloadPath: './downloads' }); | |
| await page.click('.epub > .bouton_telecharger'); | |
| // waiting to make sur download is complete | |
| await page.waitFor(10000); | |
| browser.close() | |
| console.log('job done') | |
| })(); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment