Skip to content

Instantly share code, notes, and snippets.

@brianjhanson
Last active September 8, 2017 20:13
Scrape FAQs
var $questions = jQuery('.faq-question');
var json = {};
json.categories = [];
json.entries = [];
var categoryIndex = -1;
var sectionId = 21;
var typeId = 28;
function isFirst(string) {
return (string.search(/^1\./g) !== -1);
}
function convertToSlug(Text) {
return Text
.toLowerCase()
.replace(/ /g,'-')
.replace(/[^\w-]+/g,'');
}
function buildCategory(category) {
return {
"@model": "CategoryModel",
"attributes": {
"groupId": 3,
"parentId": 1,
"locale": "en_us",
"slug": category.slug,
"dateCreated": "2016-01-13 01:25:57",
"dateUpdated": "2016-01-13 01:25:57",
"enabled": true
},
"content": {
"title": category.title,
"fields": []
}
}
}
function buildEntry(question, answer, category) {
return {
"@model": "EntryModel",
"attributes": {
"sectionId": sectionId,
"typeId": typeId,
"authorId": 1,
"locale": "en_us",
"slug": convertToSlug(question),
"postDate": "2015-02-27 16:43:52",
"expiryDate": null,
"dateCreated": "2015-02-27 16:43:52",
"dateUpdated": "2015-02-27 16:51:42",
"enabled": true
},
"content": {
"title": question,
"fields": {
"body": answer,
},
"related": {
"category": {
"@model": "CategoryModel",
"matchBy": "slug",
"matchValue": [category],
"matchCriteria": {
"groupId": 3
}
}
}
}
}
}
var $categories = jQuery('.faq-header');
var categories = [];
$categories.each(function(item) {
var title = jQuery(this).text();
json.categories.push(buildCategory({
title: title,
slug: convertToSlug(title)
}));
});
$questions.each(function(item) {
var category = json.categories[categoryIndex];
var answerId = jQuery(this).parent().parent().attr('id');
var $answer = jQuery('[aria-labelledby=' + answerId + ']');
var answerBody = $answer.html();
var question = jQuery(this).text();
if (isFirst(question)) {
categoryIndex++;
category = json.categories[categoryIndex];
}
json.entries.push(buildEntry(
question.replace(/^\d+\.\s*/, ''),
answerBody,
category.attributes.slug
));
});
// These numbers should be the same (make sure we got everything)
console.log('questions length: ', $questions.length);
console.log('faq length: ', json.entries.length);
// copy(json.categories); // copy just the categories
// copy(json.entries); // copy just the entries
copy(json); // Copy everything
@brianjhanson
Copy link
Author

Just paste this directly into the Chrome console to use.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment