Created
November 16, 2017 23:59
-
-
Save cirops/a389aa78f37d6ef6aa36ecc2986e8d13 to your computer and use it in GitHub Desktop.
Simple example script to grab data from a paginated website using xmlhttprequest and cheerio
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var cheerio = require('cheerio'); | |
var XMLHttpRequest = require("xmlhttprequest").XMLHttpRequest; | |
function httpGet(theUrl) { | |
var xmlHttp = new XMLHttpRequest(); | |
xmlHttp.open("GET", theUrl, false); // false for synchronous request | |
xmlHttp.send(null); | |
return xmlHttp.responseText; | |
} | |
for (page = 1; page <= 7; page++) { | |
query = `http://cempre.org.br/servico/pesquisa/lista/v3/12/v4/R/pg/${page}`; | |
result = httpGet(query); | |
$ = cheerio.load(result); | |
titles = $('.box-marg-1').find('.tit'); | |
addresses = $('.box-marg-1').find('.txt > div:nth-child(1)'); | |
neighborhoods = $('.box-marg-1').find('.txt > div:nth-child(2)'); | |
cities = $('.box-marg-1').find('.txt > div:nth-child(3)'); | |
ceps = $('.box-marg-1').find('.txt > div:nth-child(4)'); | |
materials = $('.box-marg-1').find('.txt > div:nth-child(5)'); | |
for (i = 0; i < titles.length; i++) { | |
console.log(`${$(titles[i]).text()}|${$(addresses[i]).text()}|${$(neighborhoods[i]).text()}|${$(cities[i]).text()}|${$(ceps[i]).text()}|${$(materials[i]).text()}`); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment