Skip to content

Instantly share code, notes, and snippets.

@juandjara
Created July 19, 2016 19:50
Show Gist options
  • Save juandjara/79a2cb36c9457aef3c04ca5336183485 to your computer and use it in GitHub Desktop.
Save juandjara/79a2cb36c9457aef3c04ca5336183485 to your computer and use it in GitHub Desktop.
tusubtitulo.com scrapper module
var got = require("got");
var cheerio = require("cheerio");
var Fuzzy = require("fuzzy-search");
var base_url = "https://www.tusubtitulo.com";
var links = [];
module.exports = {
index: index,
detail: detail,
search: search,
episodes: episodes
}
function index(){
var subs_index_url = base_url+'/series.php';
return got(subs_index_url).then(onIndexLoaded);
function onIndexLoaded(res){
var html = res.body;
$ = cheerio.load(html);
links = $("#showindex td:not([colspan]) a");
var data = links.map((i, el) => {
var link = $(el).attr("href");
var name = $(el).text();
var id = link.split("/");
id = id[id.length-1];
return {name, link: base_url+link, id}
}).get();
return data;
}
}
function detail(show_id){
var detail_url = base_url+"/show/"+show_id;
return got(detail_url).then(onDetailLoaded);
function onDetailLoaded(res){
var html = res.body;
var $ = cheerio.load(html);
var show_el = $("#contenido");
var title = show_el.find("p.titulo").text();
var numbers_label = show_el.find("td.SectionTitle").text();
var numbers = numbers_label.match(/\d+/g);
var episodes = numbers[1];
var seasons = numbers[0];
return {title, seasons, episodes};
}
}
function search(text){
return index().then(data => {
var fuzzy = new Fuzzy(data, ["name"], {
caseSensitive: false,
sort: true
});
var search_res = fuzzy.search(text);
return detail(search_res);
})
}
function episodes(show_id, season){
var url = base_url+'/ajax_loadShow.php?show='+show_id+'&season='+season;
return got(url).then(onSeasonLoaded);
function onSeasonLoaded(res){
var html = res.body;
var $ = cheerio.load(html);
var eps = [];
$("table").each((index, el) => {
// cada tabla es un episodio
var links = [];
var trs = $(el).find("tr");
var title = $(trs[0]).text().trim();
trs.each((index, el) => {
if(index < 2){
return;
}
var trs = $(el);
var lang = trs.find("td:nth-last-child(3)").text().trim();
var link = trs.find("td:nth-last-child(1) a").attr("href");
if(link){
links.push({lang, link});
}
});
eps.push({title, links});
});
return eps;
}
}
{
"name": "tusubtitulo-scrapper",
"version": "0.1.0",
"description": "Scrapper module for tusubtitulo.com",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "fuken",
"license": "MIT",
"dependencies": {
"cheerio": "^0.20.0",
"fuzzy-search": "^1.3.5",
"got": "^6.3.0"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment