Created
August 18, 2013 11:50
-
-
Save chengmu/6261269 to your computer and use it in GitHub Desktop.
用来抓取audiofic站点下载链接的snippet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Ajax | |
function request(type, url, opts, callback) { | |
var xhr = new XMLHttpRequest(); | |
if (typeof opts === 'function') { | |
callback = opts; | |
opts = null; | |
} | |
xhr.open(type, url); | |
var fd = new FormData(); | |
if (type === 'POST' && opts) { | |
for (var key in opts) { | |
fd.append(key, JSON.stringify(opts[key])); | |
} | |
} | |
xhr.onload = function () { | |
callback(xhr.response); | |
}; | |
xhr.send(opts ? fd : null); | |
} | |
var temp = document.createElement('div'); | |
temp.style.display = 'none'; | |
temp.className = 'temp-container'; | |
var body = document.getElementsByTagName('body')[0]; | |
body.appendChild(temp); | |
var content = ''; | |
/** | |
* 修改这里是相应的分类地址 | |
* @type {String} | |
*/ | |
var url = 'http://www.audiofic.jinjurly.com/category/fayjayauthor?page='; | |
request('get', url, filterResult); | |
/** | |
* 页数嗯哼 | |
* @type {Number} | |
*/ | |
var pageCounts = 4; | |
for (var i = 1; i < pageCounts+1 ; i++ ) { | |
request('get', url + i, setResult); | |
} | |
var resultList = []; | |
function setResult(resp) { | |
temp.innerHTML = temp.innerHTML + resp; | |
} | |
function filterResult() { | |
tempList = Array.prototype.slice.call(temp.querySelectorAll('.field-item p a')); | |
tempList.forEach(function(item) { | |
if (/^mp3/g.test(item.parentNode.innerText) && /zip$/g.test(item.href)) { | |
resultList.push(item.href); | |
} | |
}); | |
return resultList; | |
} | |
var allInfo = []; | |
function collectInfo () { | |
var tempInfoList = Array.prototype.slice.call(temp.querySelectorAll('article')); | |
tempInfoList.forEach(function (item) { | |
var title, fandom = [], relationship = [], format, dlUrl, textUrl, author, reader; | |
title = item.querySelectorAll('header .title a')[0].innerText; | |
var tempCato = Array.prototype.slice.call(item.querySelectorAll('.content div')); | |
tempCato.forEach(function (fieldItem) { | |
var curItem = fieldItem.querySelectorAll('.field-label')[0]; | |
if (curItem && /^fandom/g.test(curItem.innerText)){ | |
var fandomList = Array.prototype.slice.call(curItem.parentNode.querySelectorAll('a')); | |
for (var i = 0; i < fandomList.length; i++) { | |
fandom.push(fandomList[i].innerText); | |
} | |
} | |
if (curItem && /^relationship/g.test(curItem.innerText)) { | |
var rllist = Array.prototype.slice.call(fieldItem.querySelectorAll('a')); | |
for (var j = 0; j < rllist.length; j++) { | |
relationship.push(rllist[j].innerText); | |
} | |
} | |
if (curItem === undefined) { | |
var curItemNoLabel = fieldItem.querySelectorAll('.field-item p')[0]; | |
if (curItemNoLabel && /^mp3/g.test(curItemNoLabel.innerText)) { | |
format = 'mp3'; | |
dlUrl = curItemNoLabel.querySelectorAll('a')[0].href; | |
} | |
if (curItemNoLabel && /^m4b/g.test(curItemNoLabel.innerText)) { | |
format = 'm4b'; | |
dlUrl = curItemNoLabel.querySelectorAll('a')[0].href; | |
} | |
if (curItemNoLabel && /text/g.test(curItemNoLabel.innerText)) { | |
textUrl = curItemNoLabel.querySelectorAll('a')[1].href; | |
} | |
var itemForAuthorOrReader = fieldItem.querySelectorAll('.field-item a')[0]; | |
if (itemForAuthorOrReader && /author$/g.test(itemForAuthorOrReader.innerText)) { | |
author = itemForAuthorOrReader.innerText; | |
} | |
if (itemForAuthorOrReader && /reader$/g.test(itemForAuthorOrReader.innerText)) { | |
reader = itemForAuthorOrReader.innerText; | |
} | |
} | |
}); | |
allInfo.push({ | |
title : title, | |
fandom : fandom.join(','), | |
relationship: relationship.join(','), | |
dlUrl : dlUrl, | |
textUrl :textUrl, | |
format : format, | |
author : author, | |
reader : reader | |
}); | |
// allInfo.push(item.querySelectorAll('header .title a')[0].innerText); | |
}); | |
} | |
function filertMp3 (element, index, array) { | |
return element.format === 'mp3'; | |
} | |
function filertSherlock (element, index, array) { | |
return (/sherlock/).test(element.fandom); | |
} | |
function filterspn (element) { | |
return (/supernatural/).test(element.fandom); | |
} | |
function filterMerlin (element) { | |
return (/merlin/).test(element.fandom); | |
} | |
function filterJW (element) { | |
return (/wooster/).test(element.fandom); | |
} | |
function mapdlURL(element) { | |
return element.dlUrl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment