Skip to content

Instantly share code, notes, and snippets.

@youchan
Created January 25, 2020 08:00
Show Gist options
  • Save youchan/814c33a9a87a3b718011fe0898a94ae8 to your computer and use it in GitHub Desktop.
Save youchan/814c33a9a87a3b718011fe0898a94ae8 to your computer and use it in GitHub Desktop.
技術書典のサークルの頒布物のページから情報を抜きだす
const puppeteer = require('puppeteer');
const fs = require("fs");
const url = "https://techbookfest.org/event/tbf07/circle/xxxxxxxxxxxxxxx";
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto(url, { waitUntil: 'networkidle2' });
const distributions = await page.evaluate(() => {
distributions = [];
document.querySelectorAll(".products-container mat-card.mat-card").forEach((card) => {
const distribution = {title: "", description: "", firstAvarableEvent: "", pageCount: 0, price: 0, urls: [], images: []};
distribution.title = card.querySelector('.mat-card-title').innerText;
distribution.description = card.querySelector(".products-description").innerText;
card.querySelectorAll("img.mat-card-image").forEach((img) => {
distribution.images.push(img["src"]);
});
distribution.firstAvarableEvent = card.querySelector(".mat-card-content p").innerText;
card.querySelectorAll(".mat-card-content a").forEach((a) => {
distribution.urls.push(a["href"]);
});
subtitle = card.querySelector('.mat-card-subtitle').innerText;
desc = subtitle.split("/").map(s => s.trim());
distribution.pageCount = /\d+/.exec(desc[1]) && /\d+/.exec(desc[1])[0]
distribution.price = /\d+/.exec(desc[2]) && /\d+/.exec(desc[2])[0]
distributions.push(distribution);
});
return distributions
});
distributions.forEach((distribution, i) => {
fs.writeFileSync("distribution" + i + ".json", JSON.stringify(distribution));
});
browser.close();
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment