Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save arth2o/43a9f3ea6431e10eec8e45f9f34c8b4f to your computer and use it in GitHub Desktop.
Save arth2o/43a9f3ea6431e10eec8e45f9f34c8b4f to your computer and use it in GitHub Desktop.
puppeteer chrome headless ssl certification error handling ignoreHTTPSErrors
const puppeteer = require('puppeteer');
const cheerio = require('cheerio');
const striptags = require('striptags');
function cleanText(text) {
text = text.replace(/\s{2,}/g,' ');
text = striptags(text).trim();
return text;
}
async function run() {
const browser = await puppeteer.launch({dumpio: true, ignoreHTTPSErrors: true});
const page = await browser.newPage();
await page.goto('https://github.com/search?q=chrome+headless');
let content = await page.content();
var $ = cheerio.load(content);
var rows = [];
$('ul.repo-list > div.repo-list-item').each(function(i, element){
var e = $(this);
var a = e.find('h3>a');
var title_link = a.attr('href');
var title_text = a.text();
var description = e.find('.d-inline-block').text();
var repoLang = e.find('div.col-2.text-gray.pt-2').text();
var startWars = e.find('a.muted-link').text();
var topicTags = e.find('a.topic-tag-link').text();
var row = {
'title' : cleanText(title_text),
'title_link': cleanText(title_link),
'description' : cleanText(description),
'repoLang': cleanText(repoLang),
'startWars': cleanText(startWars),
'topicTags': cleanText(topicTags),
};
rows.push(row);
});
console.log(rows);
browser.close();
}
run();
@arth2o
Copy link
Author

arth2o commented Oct 4, 2017

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment