Skip to content

Instantly share code, notes, and snippets.

@Mirochiu
Last active June 14, 2024 06:32
Show Gist options
  • Save Mirochiu/a2c247172910928c62f55b28d156a050 to your computer and use it in GitHub Desktop.
Save Mirochiu/a2c247172910928c62f55b28d156a050 to your computer and use it in GitHub Desktop.
a sample code for puppeteer
import { createClient, destroyPage } from './puppeteerClient.mjs';
const waitMs = (msec) => new Promise(res => setTimeout(() => res(), msec));
const getLastPageFromPage = async (page) => {
return await page.evaluate(() => {
const urls = Array.from(document.querySelectorAll('.pagination a[href]'))
if (urls.length) {
const lastUrl = urls.pop().href;
return lastUrl.match(/\d+/g).map(Number)[0];
}
return 1;
});
};
const extractUrl = (u) => {
if (u) {
const p = u.indexOf('"');
const l = u.lastIndexOf('"');
if (p >= 0 && l >= 0) {
return u.substring(p + 1, l);
}
}
return undefined;
}
const getAlbumsInfosFromPage = async (page) => {
const titles = await page.evaluate(() =>
Array.from(document.querySelectorAll('.info')).map(x => x.innerText)
);
if (!titles?.length) return {};
const res = await page.evaluate(() =>
Array.from(document.querySelectorAll('a.albumbgphoto'))
.map(x => ({
url: x.href,
thumbnailUrl: x.style['background-image'],
}))
);
const urls = res.map(x => ({
...x,
thumbnailUrl: extractUrl(x.thumbnailUrl),
}));
return titles.map((title, idx) => ({
title,
...urls[idx],
}));
}
const client = await createClient({ debug: process.env.DEBUG });
const LOGIN_URL = '.../login.html';
const LOGINED_URL = '.../main.html';
try {
const page = client.page;
await page.goto(LOGIN_URL);
if (page.url() === LOGINED_URL) {
console.info('has been logined');
} else {
await page.waitForSelector('#loginBtn', { timeout: 5000 });
await waitMs(2000);
await page.type('input#account', user, { delay: 180 });
await page.type('input#password', pass, { delay: 180 });
await page.click('#loginBtn', { delay: 1000 });
await page.waitForNavigation();
if (page.url() !== LOGINED_URL) {
throw new Error(`logined url not match ${page.url()} != ${LOGINED_URL}`);
}
}
const lastPage = await getLastPageFromPage(page) || 1;
console.log('last page', lastPage);
const infos = await getAlbumsInfosFromPage(page);
console.log(JSON.stringify(infos, null, 2));
} catch (error) {
throw error;
}
finally {
porgressBar?.stop();
destroyPage(client);
}
import puppeteer from 'puppeteer';
import path from 'node:path';
import { fileURLToPath } from 'url';
export const createClient = async (config = {}) => {
const { debug } = config;
const args = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu',
'--mute-audio',
'--disable-notifications',
];
// __dirname not defined in mjs
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
console.debug(__dirname)
const browser = await puppeteer.launch({
headless: debug ? false : 'new',
userDataDir: path.join(__dirname, '..', 'puppeteerCache'),
args,
});
const page = await browser.newPage();
await page.setViewport({
width: 1024,
height: 768,
});
return { browser, page, args, debug };
};
export const destroyPage = async (client = {}) => {
const { browser, page } = client;
if (!browser) {
console.warn('not found browser');
return;
}
if (page) {
await page.close();
} else {
console.warn('not found page');
}
await browser.close();
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment