Last active
September 8, 2017 20:13
Scrape FAQs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var $questions = jQuery('.faq-question'); | |
var json = {}; | |
json.categories = []; | |
json.entries = []; | |
var categoryIndex = -1; | |
var sectionId = 21; | |
var typeId = 28; | |
function isFirst(string) { | |
return (string.search(/^1\./g) !== -1); | |
} | |
function convertToSlug(Text) { | |
return Text | |
.toLowerCase() | |
.replace(/ /g,'-') | |
.replace(/[^\w-]+/g,''); | |
} | |
function buildCategory(category) { | |
return { | |
"@model": "CategoryModel", | |
"attributes": { | |
"groupId": 3, | |
"parentId": 1, | |
"locale": "en_us", | |
"slug": category.slug, | |
"dateCreated": "2016-01-13 01:25:57", | |
"dateUpdated": "2016-01-13 01:25:57", | |
"enabled": true | |
}, | |
"content": { | |
"title": category.title, | |
"fields": [] | |
} | |
} | |
} | |
function buildEntry(question, answer, category) { | |
return { | |
"@model": "EntryModel", | |
"attributes": { | |
"sectionId": sectionId, | |
"typeId": typeId, | |
"authorId": 1, | |
"locale": "en_us", | |
"slug": convertToSlug(question), | |
"postDate": "2015-02-27 16:43:52", | |
"expiryDate": null, | |
"dateCreated": "2015-02-27 16:43:52", | |
"dateUpdated": "2015-02-27 16:51:42", | |
"enabled": true | |
}, | |
"content": { | |
"title": question, | |
"fields": { | |
"body": answer, | |
}, | |
"related": { | |
"category": { | |
"@model": "CategoryModel", | |
"matchBy": "slug", | |
"matchValue": [category], | |
"matchCriteria": { | |
"groupId": 3 | |
} | |
} | |
} | |
} | |
} | |
} | |
var $categories = jQuery('.faq-header'); | |
var categories = []; | |
$categories.each(function(item) { | |
var title = jQuery(this).text(); | |
json.categories.push(buildCategory({ | |
title: title, | |
slug: convertToSlug(title) | |
})); | |
}); | |
$questions.each(function(item) { | |
var category = json.categories[categoryIndex]; | |
var answerId = jQuery(this).parent().parent().attr('id'); | |
var $answer = jQuery('[aria-labelledby=' + answerId + ']'); | |
var answerBody = $answer.html(); | |
var question = jQuery(this).text(); | |
if (isFirst(question)) { | |
categoryIndex++; | |
category = json.categories[categoryIndex]; | |
} | |
json.entries.push(buildEntry( | |
question.replace(/^\d+\.\s*/, ''), | |
answerBody, | |
category.attributes.slug | |
)); | |
}); | |
// These numbers should be the same (make sure we got everything) | |
console.log('questions length: ', $questions.length); | |
console.log('faq length: ', json.entries.length); | |
// copy(json.categories); // copy just the categories | |
// copy(json.entries); // copy just the entries | |
copy(json); // Copy everything |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Just paste this directly into the Chrome console to use.