Last active
April 10, 2018 13:45
-
-
Save ifree/d2abee47d0fb8c73125a380fb004a378 to your computer and use it in GitHub Desktop.
export shop detail on dianping.com info to csv
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(function(){ | |
function $$get_prop(obj, path) { | |
return path.split('.').reduce(function(prev, curr) { | |
return prev ? prev[curr] : '' | |
}, obj || self) | |
} | |
function $$parse_page(doc){ | |
return { | |
name : $$get_prop(doc.querySelector('#basic-info .shop-name'), 'firstChild.nodeValue'), | |
category : $$get_prop(doc.querySelector('.breadcrumb'), 'innerText'), | |
rank : $$get_prop(doc.querySelector('#basic-info .brief-info .mid-rank-stars'), 'title'), | |
brief : $$get_prop(doc.querySelector('#basic-info .brief-info'), 'innerText'), | |
address : $$get_prop(doc.querySelector('#basic-info .expand-info.address'), 'innerText'), | |
phone : $$get_prop(doc.querySelector('#basic-info .expand-info.tel'), 'innerText') | |
}; | |
} | |
function $$request_content(url, callback) | |
{ | |
var xhr = new XMLHttpRequest(); | |
xhr.open('GET', url, true); | |
xhr.responseType = 'document'; | |
// xhr.overrideMimeType('text/xml'); | |
xhr.onload = function () { | |
if (xhr.readyState === xhr.DONE) { | |
if (xhr.status === 200) { | |
callback(xhr.responseXML); | |
} | |
} | |
}; | |
xhr.send(null); | |
} | |
function $$request_page(url, callback) | |
{ | |
$$request_content(url, function(obj){callback($$parse_page(obj));}); | |
} | |
function $$to_csv(arr) | |
{ | |
var json = arr; | |
var fields = Object.keys(json[0]); | |
var csv = json.map(function(row){ | |
return fields.map(function(fieldName){ | |
return JSON.stringify(row[fieldName], function(key, value) { return value === null ? '' : value }) | |
}).join(',') | |
}); | |
csv.unshift(fields.join(',')); | |
return csv.join('\r\n'); | |
} | |
function $$send_csv(arr) | |
{ | |
var str = $$to_csv(arr); | |
var data = new Blob([str], {type: 'text/plain'}); | |
var url = window.URL.createObjectURL(data); | |
var a = document.createElement("a"); | |
document.body.appendChild(a); | |
a.style = "display: none"; | |
a.href = url; | |
a.download = "data.csv"; | |
a.click(); | |
window.URL.revokeObjectURL(url); | |
a.parentNode.removeChild(a); | |
} | |
function $$parse_page_shops(doc, done_callback) | |
{ | |
var _shop_count = 0; | |
var _ready_count = 0; | |
var _shop_ojbs = []; | |
function _req_done_callback(obj){ | |
_shop_ojbs.push(obj); | |
_ready_count++; | |
if(_shop_count == _ready_count) | |
{ | |
console.log("current page done"); | |
done_callback(doc, _shop_ojbs); | |
}else{ | |
console.log('shop parse done', obj); | |
} | |
}; | |
doc.querySelectorAll('.shop-all-list div.tit').forEach( | |
function(node){ | |
_shop_count++; | |
(function(href){ | |
setTimeout(function(){$$request_page(href, _req_done_callback);}, Math.floor(Math.random() * 30000)); | |
})(node.querySelector("a").href); | |
} | |
) | |
} | |
function $$fetch_next_page(doc, callback) | |
{ | |
$$request_content(doc.querySelector('.page > .next').href, callback); | |
} | |
function $$start(){ | |
var max_page = 0; | |
var cur_page = 0; | |
var shop_objs = []; | |
var on_shop_list_done = function(doc, arr){ | |
var start_parse = function(doc){ | |
$$parse_page_shops(doc, on_shop_list_done); | |
} | |
cur_page ++; | |
shop_objs = shop_objs.concat(arr); | |
if(max_page == cur_page) | |
{ | |
$$send_csv(shop_objs); | |
}else{ | |
$$fetch_next_page(doc, start_parse); | |
} | |
} | |
if(max_page == 0) | |
{ | |
max_page = parseInt(prompt('how many pages do u want to get?')); | |
} | |
$$parse_page_shops(document, on_shop_list_done); | |
} | |
$$start(); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment