Skip to content

Instantly share code, notes, and snippets.

@ifree
Last active April 10, 2018 13:45
Show Gist options
  • Save ifree/d2abee47d0fb8c73125a380fb004a378 to your computer and use it in GitHub Desktop.
Save ifree/d2abee47d0fb8c73125a380fb004a378 to your computer and use it in GitHub Desktop.
export shop detail on dianping.com info to csv
(function(){
function $$get_prop(obj, path) {
return path.split('.').reduce(function(prev, curr) {
return prev ? prev[curr] : ''
}, obj || self)
}
function $$parse_page(doc){
return {
name : $$get_prop(doc.querySelector('#basic-info .shop-name'), 'firstChild.nodeValue'),
category : $$get_prop(doc.querySelector('.breadcrumb'), 'innerText'),
rank : $$get_prop(doc.querySelector('#basic-info .brief-info .mid-rank-stars'), 'title'),
brief : $$get_prop(doc.querySelector('#basic-info .brief-info'), 'innerText'),
address : $$get_prop(doc.querySelector('#basic-info .expand-info.address'), 'innerText'),
phone : $$get_prop(doc.querySelector('#basic-info .expand-info.tel'), 'innerText')
};
}
function $$request_content(url, callback)
{
var xhr = new XMLHttpRequest();
xhr.open('GET', url, true);
xhr.responseType = 'document';
// xhr.overrideMimeType('text/xml');
xhr.onload = function () {
if (xhr.readyState === xhr.DONE) {
if (xhr.status === 200) {
callback(xhr.responseXML);
}
}
};
xhr.send(null);
}
function $$request_page(url, callback)
{
$$request_content(url, function(obj){callback($$parse_page(obj));});
}
function $$to_csv(arr)
{
var json = arr;
var fields = Object.keys(json[0]);
var csv = json.map(function(row){
return fields.map(function(fieldName){
return JSON.stringify(row[fieldName], function(key, value) { return value === null ? '' : value })
}).join(',')
});
csv.unshift(fields.join(','));
return csv.join('\r\n');
}
function $$send_csv(arr)
{
var str = $$to_csv(arr);
var data = new Blob([str], {type: 'text/plain'});
var url = window.URL.createObjectURL(data);
var a = document.createElement("a");
document.body.appendChild(a);
a.style = "display: none";
a.href = url;
a.download = "data.csv";
a.click();
window.URL.revokeObjectURL(url);
a.parentNode.removeChild(a);
}
function $$parse_page_shops(doc, done_callback)
{
var _shop_count = 0;
var _ready_count = 0;
var _shop_ojbs = [];
function _req_done_callback(obj){
_shop_ojbs.push(obj);
_ready_count++;
if(_shop_count == _ready_count)
{
console.log("current page done");
done_callback(doc, _shop_ojbs);
}else{
console.log('shop parse done', obj);
}
};
doc.querySelectorAll('.shop-all-list div.tit').forEach(
function(node){
_shop_count++;
(function(href){
setTimeout(function(){$$request_page(href, _req_done_callback);}, Math.floor(Math.random() * 30000));
})(node.querySelector("a").href);
}
)
}
function $$fetch_next_page(doc, callback)
{
$$request_content(doc.querySelector('.page > .next').href, callback);
}
function $$start(){
var max_page = 0;
var cur_page = 0;
var shop_objs = [];
var on_shop_list_done = function(doc, arr){
var start_parse = function(doc){
$$parse_page_shops(doc, on_shop_list_done);
}
cur_page ++;
shop_objs = shop_objs.concat(arr);
if(max_page == cur_page)
{
$$send_csv(shop_objs);
}else{
$$fetch_next_page(doc, start_parse);
}
}
if(max_page == 0)
{
max_page = parseInt(prompt('how many pages do u want to get?'));
}
$$parse_page_shops(document, on_shop_list_done);
}
$$start();
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment