Skip to content

Instantly share code, notes, and snippets.

@kooparse
Created July 7, 2016 10:03
Show Gist options
  • Save kooparse/b92be611e1f74eb5946626e66e9d3d71 to your computer and use it in GitHub Desktop.
Save kooparse/b92be611e1f74eb5946626e66e9d3d71 to your computer and use it in GitHub Desktop.
kat.cr scraper
import request from 'axios'
import bytes from 'bytes'
import cheerio from 'cheerio'
import trakt from './trakt'
const defaultOptions = {
'timeout': 10000
}
const urls = [
'https://kat.cr/usearch/',
'https://kickassto.co/usearch/'
]
export default class Kat {
constructor (options = defaultOptions) {
this.request = request.create(options)
this.baseUrls = urls
this.languageMap = {
'en': 2,
'sq': 42,
'ar': 7,
'eu': 44,
'bn': 46,
'pt-br': 39,
'bg': 37,
'yue': 45,
'ca': 47,
'zh': 10,
'hr': 34,
'cs': 32,
'da': 26,
'nl': 8,
'tl': 11,
'fi': 31,
'fr': 5,
'de': 4,
'el': 30,
'he': 25,
'hi': 6,
'hu': 27,
'it': 3,
'ja': 15,
'kn': 49,
'ko': 16,
'lt': 43,
'ml': 21,
'cmn': 23,
'ne': 48,
'no': 19,
'fa': 33,
'pl': 9,
'pt': 17,
'pa': 35,
'ro': 18,
'ru': 12,
'sr': 28,
'sl': 36,
'es': 14,
'sv': 20,
'ta': 13,
'te': 22,
'th': 24,
'tr': 29,
'uk': 40,
'vi': 38
}
}
params (query, params = '') {
if (!query) {
throw Error('Query is required.')
} else if (typeof query === 'string') {
params += query
} else if (typeof query === 'object') {
if (query.query) params += query.query
if (query.category) params += ` category:${query.category}`
if (query.uploader) params += ` user:${query.uploader}`
if (query.min_seeds) params += ` seeds:${query.min_seeds}`
if (query.age) params += ` age:${query.age}`
if (query.min_files) params += ` files:${query.min_files}`
if (query.imdb) params += ` imdb:${query.imdb.replace(/\D/g, '')}`
if (query.tvrage) params += ` tv:${query.tvrage}`
if (query.isbn) params += ` isbn:${query.isbn}`
if (query.adult_filter) params += ` is_safe:${query.adult_filter}`
if (query.verified) params += ` verified:${query.verified}`
if (query.season) params += ` season:${query.season}`
if (query.episode) params += ` episode:${query.episode}`
if (query.language && this.languageMap[query.language]) {
params += ` lang_id:${this.languageMap[query.language]}`
}
if (query.page) params += `/${query.page}`
if (query.sort_by) params += `/?field=${query.sort_by}`
if (query.order) params += `&order=${query.order}`
} else {
throw Error('Not a valid Query.')
}
return params
}
formatData (data, page = 1, date) {
const $ = cheerio.load(data)
const matcher = /\s+[a-zA-Z]+\s\d+[-]\d+\s[a-zA-Z]+\s(\d+)/
const totalResults = $('table#mainSearchTable.doublecelltable')
.find('h2')
.find('span')
.text()
.match(matcher)
const totalPages = $('div.pages.botmarg5px.floatright')
.children('a.turnoverButton.siteButton.bigButton')
.last()
.text()
const formatted = {
response_time: parseInt(date),
page: parseInt(page),
totalResults: parseInt(totalResults[1]),
totalPages: totalPages || 1,
results: []
}
$('table.data').find('tr[id]').each(function () {
const el = $(this)
const obj = {}
obj.title = el
.find('a.cellMainLink')
.text()
obj.category = el
.find('span.font11px.lightgrey.block')
.find('a[href]')
.last()
.text()
obj.link = el
.find('a.cellMainLink[href]')
.attr('href')
obj.guid = el
.find('a.cellMainLink[href]')
.attr('href')
obj.verified = el
.find('i.ka.ka16.ka-verify.ka-green')
.length
obj.comments = parseInt(el
.find('a.icommentjs.kaButton.smallButton.rightButton')
.text())
obj.magnet = el
.find('a.icon16[data-nop]')
.attr('href')
obj.torrentLink = el
.find('a.icon16[data-download]')
.attr('href')
obj.fileSize = el
.find('td.center')
.eq(0)
.text()
obj.files = parseInt(el
.find('td.center')
.eq(1)
.text())
obj.seeds = parseInt(el
.find('td.center')
.eq(3)
.text())
obj.leechs = parseInt(el
.find('td.center')
.eq(4)
.text())
obj.pubDate = Number(new Date(el
.find('td.center')
.eq(2)
.attr('title')))
obj.size = bytes(el
.find('td.center')
.eq(0)
.text())
formatted.results.push(obj)
})
return formatted
}
search = async (query) => {
const params = this.params(query)
const endpoint = `${this.baseUrls[0]}${params}`
/* requested current time */
const rT = Date.now()
try {
const data = (await this.request.get(endpoint)).data
return this.formatData(data, query.page, Date.now() - rT)
} catch (error) {
return error
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment