Last active
June 14, 2024 06:32
-
-
Save Mirochiu/a2c247172910928c62f55b28d156a050 to your computer and use it in GitHub Desktop.
a sample code for puppeteer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { createClient, destroyPage } from './puppeteerClient.mjs'; | |
const waitMs = (msec) => new Promise(res => setTimeout(() => res(), msec)); | |
const getLastPageFromPage = async (page) => { | |
return await page.evaluate(() => { | |
const urls = Array.from(document.querySelectorAll('.pagination a[href]')) | |
if (urls.length) { | |
const lastUrl = urls.pop().href; | |
return lastUrl.match(/\d+/g).map(Number)[0]; | |
} | |
return 1; | |
}); | |
}; | |
const extractUrl = (u) => { | |
if (u) { | |
const p = u.indexOf('"'); | |
const l = u.lastIndexOf('"'); | |
if (p >= 0 && l >= 0) { | |
return u.substring(p + 1, l); | |
} | |
} | |
return undefined; | |
} | |
const getAlbumsInfosFromPage = async (page) => { | |
const titles = await page.evaluate(() => | |
Array.from(document.querySelectorAll('.info')).map(x => x.innerText) | |
); | |
if (!titles?.length) return {}; | |
const res = await page.evaluate(() => | |
Array.from(document.querySelectorAll('a.albumbgphoto')) | |
.map(x => ({ | |
url: x.href, | |
thumbnailUrl: x.style['background-image'], | |
})) | |
); | |
const urls = res.map(x => ({ | |
...x, | |
thumbnailUrl: extractUrl(x.thumbnailUrl), | |
})); | |
return titles.map((title, idx) => ({ | |
title, | |
...urls[idx], | |
})); | |
} | |
const client = await createClient({ debug: process.env.DEBUG }); | |
const LOGIN_URL = '.../login.html'; | |
const LOGINED_URL = '.../main.html'; | |
try { | |
const page = client.page; | |
await page.goto(LOGIN_URL); | |
if (page.url() === LOGINED_URL) { | |
console.info('has been logined'); | |
} else { | |
await page.waitForSelector('#loginBtn', { timeout: 5000 }); | |
await waitMs(2000); | |
await page.type('input#account', user, { delay: 180 }); | |
await page.type('input#password', pass, { delay: 180 }); | |
await page.click('#loginBtn', { delay: 1000 }); | |
await page.waitForNavigation(); | |
if (page.url() !== LOGINED_URL) { | |
throw new Error(`logined url not match ${page.url()} != ${LOGINED_URL}`); | |
} | |
} | |
const lastPage = await getLastPageFromPage(page) || 1; | |
console.log('last page', lastPage); | |
const infos = await getAlbumsInfosFromPage(page); | |
console.log(JSON.stringify(infos, null, 2)); | |
} catch (error) { | |
throw error; | |
} | |
finally { | |
porgressBar?.stop(); | |
destroyPage(client); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import puppeteer from 'puppeteer'; | |
import path from 'node:path'; | |
import { fileURLToPath } from 'url'; | |
export const createClient = async (config = {}) => { | |
const { debug } = config; | |
const args = [ | |
'--no-sandbox', | |
'--disable-setuid-sandbox', | |
'--disable-dev-shm-usage', | |
'--disable-gpu', | |
'--mute-audio', | |
'--disable-notifications', | |
]; | |
// __dirname not defined in mjs | |
const __filename = fileURLToPath(import.meta.url); | |
const __dirname = path.dirname(__filename); | |
console.debug(__dirname) | |
const browser = await puppeteer.launch({ | |
headless: debug ? false : 'new', | |
userDataDir: path.join(__dirname, '..', 'puppeteerCache'), | |
args, | |
}); | |
const page = await browser.newPage(); | |
await page.setViewport({ | |
width: 1024, | |
height: 768, | |
}); | |
return { browser, page, args, debug }; | |
}; | |
export const destroyPage = async (client = {}) => { | |
const { browser, page } = client; | |
if (!browser) { | |
console.warn('not found browser'); | |
return; | |
} | |
if (page) { | |
await page.close(); | |
} else { | |
console.warn('not found page'); | |
} | |
await browser.close(); | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment