Created
July 19, 2016 19:50
-
-
Save juandjara/79a2cb36c9457aef3c04ca5336183485 to your computer and use it in GitHub Desktop.
tusubtitulo.com scrapper module
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var got = require("got"); | |
var cheerio = require("cheerio"); | |
var Fuzzy = require("fuzzy-search"); | |
var base_url = "https://www.tusubtitulo.com"; | |
var links = []; | |
module.exports = { | |
index: index, | |
detail: detail, | |
search: search, | |
episodes: episodes | |
} | |
function index(){ | |
var subs_index_url = base_url+'/series.php'; | |
return got(subs_index_url).then(onIndexLoaded); | |
function onIndexLoaded(res){ | |
var html = res.body; | |
$ = cheerio.load(html); | |
links = $("#showindex td:not([colspan]) a"); | |
var data = links.map((i, el) => { | |
var link = $(el).attr("href"); | |
var name = $(el).text(); | |
var id = link.split("/"); | |
id = id[id.length-1]; | |
return {name, link: base_url+link, id} | |
}).get(); | |
return data; | |
} | |
} | |
function detail(show_id){ | |
var detail_url = base_url+"/show/"+show_id; | |
return got(detail_url).then(onDetailLoaded); | |
function onDetailLoaded(res){ | |
var html = res.body; | |
var $ = cheerio.load(html); | |
var show_el = $("#contenido"); | |
var title = show_el.find("p.titulo").text(); | |
var numbers_label = show_el.find("td.SectionTitle").text(); | |
var numbers = numbers_label.match(/\d+/g); | |
var episodes = numbers[1]; | |
var seasons = numbers[0]; | |
return {title, seasons, episodes}; | |
} | |
} | |
function search(text){ | |
return index().then(data => { | |
var fuzzy = new Fuzzy(data, ["name"], { | |
caseSensitive: false, | |
sort: true | |
}); | |
var search_res = fuzzy.search(text); | |
return detail(search_res); | |
}) | |
} | |
function episodes(show_id, season){ | |
var url = base_url+'/ajax_loadShow.php?show='+show_id+'&season='+season; | |
return got(url).then(onSeasonLoaded); | |
function onSeasonLoaded(res){ | |
var html = res.body; | |
var $ = cheerio.load(html); | |
var eps = []; | |
$("table").each((index, el) => { | |
// cada tabla es un episodio | |
var links = []; | |
var trs = $(el).find("tr"); | |
var title = $(trs[0]).text().trim(); | |
trs.each((index, el) => { | |
if(index < 2){ | |
return; | |
} | |
var trs = $(el); | |
var lang = trs.find("td:nth-last-child(3)").text().trim(); | |
var link = trs.find("td:nth-last-child(1) a").attr("href"); | |
if(link){ | |
links.push({lang, link}); | |
} | |
}); | |
eps.push({title, links}); | |
}); | |
return eps; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "tusubtitulo-scrapper", | |
"version": "0.1.0", | |
"description": "Scrapper module for tusubtitulo.com", | |
"main": "index.js", | |
"scripts": { | |
"test": "echo \"Error: no test specified\" && exit 1" | |
}, | |
"author": "fuken", | |
"license": "MIT", | |
"dependencies": { | |
"cheerio": "^0.20.0", | |
"fuzzy-search": "^1.3.5", | |
"got": "^6.3.0" | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment