Created
January 9, 2018 05:29
-
-
Save dwerdo/3d2cc53a89eaa96896aff0e32b14ce8e to your computer and use it in GitHub Desktop.
Scrapes Info off multiple tabbed pages
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer'); | |
const USED_CAR_SELECTOR = '#react-content > div > section > div.vehicles > div:nth-child(INDEX) > div > a'; | |
const VIN_SELECTOR = '#page > div > div.pane-content-constrain > main > div > div > div > div > section.side > aside > p.extra-info > span:nth-child(3)'; | |
async function run() { | |
let vins = []; | |
let browser = await puppeteer.launch(); | |
let page; | |
let pages = await browser.pages(); | |
if (pages.length > 0) { | |
page = pages[0]; | |
console.log('already one page default'); | |
} else { | |
page = await browser.newPage(); | |
console.log('nope'); | |
} | |
await page.goto('https://tesla.com/used'); | |
let carHandles = await page.$$('.vehicle-link'); | |
console.log('handles: ', carHandles.length); | |
for (let i = 0; i < carHandles.length; i++) { | |
try { | |
if (i === carHandles.length - 1) { | |
await page.waitFor(200); | |
} else { | |
await page.waitForSelector(USED_CAR_SELECTOR.replace('INDEX', i + 1)); | |
} | |
carHandles[i].click(); | |
console.log('handle ' + i + ' clicked.'); | |
} catch (error) { | |
console.log('something wrong with clicking used cars.', error); | |
console.log('index wrong: ', i); | |
} | |
} | |
console.log('Finished clicking.'); | |
let count = 0; | |
let targets; | |
while (count < carHandles.length) { | |
targets = await browser.pages(); | |
count = targets.length; | |
console.log('pages in browser: ', count); | |
} | |
console.log('getting vins...', targets.length); | |
for (let i = 1; i < targets.length; i++) { | |
let targetPage = await targets[i]; | |
let vin = await targetPage.evaluate((sel) => { | |
if (document.querySelector(sel)) { | |
return document.querySelector(sel).innerHTML; | |
} | |
}, VIN_SELECTOR); | |
if (vin) { | |
vins.push(vin); | |
} else { | |
console.log('i was not created:', i); | |
} | |
} | |
let checkPages = await browser.pages(); | |
console.log('num of pages: ', checkPages.length); | |
vins.forEach((vin, i) => { | |
console.log(i + 1 + ': ' + vin); | |
}); | |
console.log('total tabs: ', targets.length); | |
} | |
run(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment